1 /* 2 * fs/f2fs/data.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/buffer_head.h> 14 #include <linux/mpage.h> 15 #include <linux/aio.h> 16 #include <linux/writeback.h> 17 #include <linux/backing-dev.h> 18 #include <linux/blkdev.h> 19 #include <linux/bio.h> 20 #include <linux/prefetch.h> 21 22 #include "f2fs.h" 23 #include "node.h" 24 #include "segment.h" 25 #include <trace/events/f2fs.h> 26 27 static void f2fs_read_end_io(struct bio *bio, int err) 28 { 29 struct bio_vec *bvec; 30 int i; 31 32 bio_for_each_segment_all(bvec, bio, i) { 33 struct page *page = bvec->bv_page; 34 35 if (!err) { 36 SetPageUptodate(page); 37 } else { 38 ClearPageUptodate(page); 39 SetPageError(page); 40 } 41 unlock_page(page); 42 } 43 bio_put(bio); 44 } 45 46 static void f2fs_write_end_io(struct bio *bio, int err) 47 { 48 struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb); 49 struct bio_vec *bvec; 50 int i; 51 52 bio_for_each_segment_all(bvec, bio, i) { 53 struct page *page = bvec->bv_page; 54 55 if (unlikely(err)) { 56 SetPageError(page); 57 set_bit(AS_EIO, &page->mapping->flags); 58 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); 59 sbi->sb->s_flags |= MS_RDONLY; 60 } 61 end_page_writeback(page); 62 dec_page_count(sbi, F2FS_WRITEBACK); 63 } 64 65 if (bio->bi_private) 66 complete(bio->bi_private); 67 68 if (!get_pages(sbi, F2FS_WRITEBACK) && 69 !list_empty(&sbi->cp_wait.task_list)) 70 wake_up(&sbi->cp_wait); 71 72 bio_put(bio); 73 } 74 75 /* 76 * Low-level block read/write IO operations. 77 */ 78 static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, 79 int npages, bool is_read) 80 { 81 struct bio *bio; 82 83 /* No failure on bio allocation */ 84 bio = bio_alloc(GFP_NOIO, npages); 85 86 bio->bi_bdev = sbi->sb->s_bdev; 87 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); 88 bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; 89 90 return bio; 91 } 92 93 static void __submit_merged_bio(struct f2fs_bio_info *io) 94 { 95 struct f2fs_io_info *fio = &io->fio; 96 int rw; 97 98 if (!io->bio) 99 return; 100 101 rw = fio->rw; 102 103 if (is_read_io(rw)) { 104 trace_f2fs_submit_read_bio(io->sbi->sb, rw, 105 fio->type, io->bio); 106 submit_bio(rw, io->bio); 107 } else { 108 trace_f2fs_submit_write_bio(io->sbi->sb, rw, 109 fio->type, io->bio); 110 /* 111 * META_FLUSH is only from the checkpoint procedure, and we 112 * should wait this metadata bio for FS consistency. 113 */ 114 if (fio->type == META_FLUSH) { 115 DECLARE_COMPLETION_ONSTACK(wait); 116 io->bio->bi_private = &wait; 117 submit_bio(rw, io->bio); 118 wait_for_completion(&wait); 119 } else { 120 submit_bio(rw, io->bio); 121 } 122 } 123 124 io->bio = NULL; 125 } 126 127 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, 128 enum page_type type, int rw) 129 { 130 enum page_type btype = PAGE_TYPE_OF_BIO(type); 131 struct f2fs_bio_info *io; 132 133 io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; 134 135 mutex_lock(&io->io_mutex); 136 137 /* change META to META_FLUSH in the checkpoint procedure */ 138 if (type >= META_FLUSH) { 139 io->fio.type = META_FLUSH; 140 io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; 141 } 142 __submit_merged_bio(io); 143 mutex_unlock(&io->io_mutex); 144 } 145 146 /* 147 * Fill the locked page with data located in the block address. 148 * Return unlocked page. 149 */ 150 int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page, 151 block_t blk_addr, int rw) 152 { 153 struct bio *bio; 154 155 trace_f2fs_submit_page_bio(page, blk_addr, rw); 156 157 /* Allocate a new bio */ 158 bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw)); 159 160 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 161 bio_put(bio); 162 f2fs_put_page(page, 1); 163 return -EFAULT; 164 } 165 166 submit_bio(rw, bio); 167 return 0; 168 } 169 170 void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, 171 block_t blk_addr, struct f2fs_io_info *fio) 172 { 173 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 174 struct f2fs_bio_info *io; 175 bool is_read = is_read_io(fio->rw); 176 177 io = is_read ? &sbi->read_io : &sbi->write_io[btype]; 178 179 verify_block_addr(sbi, blk_addr); 180 181 mutex_lock(&io->io_mutex); 182 183 if (!is_read) 184 inc_page_count(sbi, F2FS_WRITEBACK); 185 186 if (io->bio && (io->last_block_in_bio != blk_addr - 1 || 187 io->fio.rw != fio->rw)) 188 __submit_merged_bio(io); 189 alloc_new: 190 if (io->bio == NULL) { 191 int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); 192 193 io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read); 194 io->fio = *fio; 195 } 196 197 if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) < 198 PAGE_CACHE_SIZE) { 199 __submit_merged_bio(io); 200 goto alloc_new; 201 } 202 203 io->last_block_in_bio = blk_addr; 204 205 mutex_unlock(&io->io_mutex); 206 trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); 207 } 208 209 /* 210 * Lock ordering for the change of data block address: 211 * ->data_page 212 * ->node_page 213 * update block addresses in the node page 214 */ 215 static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr) 216 { 217 struct f2fs_node *rn; 218 __le32 *addr_array; 219 struct page *node_page = dn->node_page; 220 unsigned int ofs_in_node = dn->ofs_in_node; 221 222 f2fs_wait_on_page_writeback(node_page, NODE); 223 224 rn = F2FS_NODE(node_page); 225 226 /* Get physical address of data block */ 227 addr_array = blkaddr_in_node(rn); 228 addr_array[ofs_in_node] = cpu_to_le32(new_addr); 229 set_page_dirty(node_page); 230 } 231 232 int reserve_new_block(struct dnode_of_data *dn) 233 { 234 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 235 236 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 237 return -EPERM; 238 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 239 return -ENOSPC; 240 241 trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); 242 243 __set_data_blkaddr(dn, NEW_ADDR); 244 dn->data_blkaddr = NEW_ADDR; 245 mark_inode_dirty(dn->inode); 246 sync_inode_page(dn); 247 return 0; 248 } 249 250 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) 251 { 252 bool need_put = dn->inode_page ? false : true; 253 int err; 254 255 /* if inode_page exists, index should be zero */ 256 f2fs_bug_on(!need_put && index); 257 258 err = get_dnode_of_data(dn, index, ALLOC_NODE); 259 if (err) 260 return err; 261 262 if (dn->data_blkaddr == NULL_ADDR) 263 err = reserve_new_block(dn); 264 if (err || need_put) 265 f2fs_put_dnode(dn); 266 return err; 267 } 268 269 static int check_extent_cache(struct inode *inode, pgoff_t pgofs, 270 struct buffer_head *bh_result) 271 { 272 struct f2fs_inode_info *fi = F2FS_I(inode); 273 pgoff_t start_fofs, end_fofs; 274 block_t start_blkaddr; 275 276 if (is_inode_flag_set(fi, FI_NO_EXTENT)) 277 return 0; 278 279 read_lock(&fi->ext.ext_lock); 280 if (fi->ext.len == 0) { 281 read_unlock(&fi->ext.ext_lock); 282 return 0; 283 } 284 285 stat_inc_total_hit(inode->i_sb); 286 287 start_fofs = fi->ext.fofs; 288 end_fofs = fi->ext.fofs + fi->ext.len - 1; 289 start_blkaddr = fi->ext.blk_addr; 290 291 if (pgofs >= start_fofs && pgofs <= end_fofs) { 292 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 293 size_t count; 294 295 clear_buffer_new(bh_result); 296 map_bh(bh_result, inode->i_sb, 297 start_blkaddr + pgofs - start_fofs); 298 count = end_fofs - pgofs + 1; 299 if (count < (UINT_MAX >> blkbits)) 300 bh_result->b_size = (count << blkbits); 301 else 302 bh_result->b_size = UINT_MAX; 303 304 stat_inc_read_hit(inode->i_sb); 305 read_unlock(&fi->ext.ext_lock); 306 return 1; 307 } 308 read_unlock(&fi->ext.ext_lock); 309 return 0; 310 } 311 312 void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn) 313 { 314 struct f2fs_inode_info *fi = F2FS_I(dn->inode); 315 pgoff_t fofs, start_fofs, end_fofs; 316 block_t start_blkaddr, end_blkaddr; 317 int need_update = true; 318 319 f2fs_bug_on(blk_addr == NEW_ADDR); 320 fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + 321 dn->ofs_in_node; 322 323 /* Update the page address in the parent node */ 324 __set_data_blkaddr(dn, blk_addr); 325 326 if (is_inode_flag_set(fi, FI_NO_EXTENT)) 327 return; 328 329 write_lock(&fi->ext.ext_lock); 330 331 start_fofs = fi->ext.fofs; 332 end_fofs = fi->ext.fofs + fi->ext.len - 1; 333 start_blkaddr = fi->ext.blk_addr; 334 end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1; 335 336 /* Drop and initialize the matched extent */ 337 if (fi->ext.len == 1 && fofs == start_fofs) 338 fi->ext.len = 0; 339 340 /* Initial extent */ 341 if (fi->ext.len == 0) { 342 if (blk_addr != NULL_ADDR) { 343 fi->ext.fofs = fofs; 344 fi->ext.blk_addr = blk_addr; 345 fi->ext.len = 1; 346 } 347 goto end_update; 348 } 349 350 /* Front merge */ 351 if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) { 352 fi->ext.fofs--; 353 fi->ext.blk_addr--; 354 fi->ext.len++; 355 goto end_update; 356 } 357 358 /* Back merge */ 359 if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) { 360 fi->ext.len++; 361 goto end_update; 362 } 363 364 /* Split the existing extent */ 365 if (fi->ext.len > 1 && 366 fofs >= start_fofs && fofs <= end_fofs) { 367 if ((end_fofs - fofs) < (fi->ext.len >> 1)) { 368 fi->ext.len = fofs - start_fofs; 369 } else { 370 fi->ext.fofs = fofs + 1; 371 fi->ext.blk_addr = start_blkaddr + 372 fofs - start_fofs + 1; 373 fi->ext.len -= fofs - start_fofs + 1; 374 } 375 } else { 376 need_update = false; 377 } 378 379 /* Finally, if the extent is very fragmented, let's drop the cache. */ 380 if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { 381 fi->ext.len = 0; 382 set_inode_flag(fi, FI_NO_EXTENT); 383 need_update = true; 384 } 385 end_update: 386 write_unlock(&fi->ext.ext_lock); 387 if (need_update) 388 sync_inode_page(dn); 389 return; 390 } 391 392 struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) 393 { 394 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 395 struct address_space *mapping = inode->i_mapping; 396 struct dnode_of_data dn; 397 struct page *page; 398 int err; 399 400 page = find_get_page(mapping, index); 401 if (page && PageUptodate(page)) 402 return page; 403 f2fs_put_page(page, 0); 404 405 set_new_dnode(&dn, inode, NULL, NULL, 0); 406 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 407 if (err) 408 return ERR_PTR(err); 409 f2fs_put_dnode(&dn); 410 411 if (dn.data_blkaddr == NULL_ADDR) 412 return ERR_PTR(-ENOENT); 413 414 /* By fallocate(), there is no cached page, but with NEW_ADDR */ 415 if (unlikely(dn.data_blkaddr == NEW_ADDR)) 416 return ERR_PTR(-EINVAL); 417 418 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 419 if (!page) 420 return ERR_PTR(-ENOMEM); 421 422 if (PageUptodate(page)) { 423 unlock_page(page); 424 return page; 425 } 426 427 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 428 sync ? READ_SYNC : READA); 429 if (err) 430 return ERR_PTR(err); 431 432 if (sync) { 433 wait_on_page_locked(page); 434 if (unlikely(!PageUptodate(page))) { 435 f2fs_put_page(page, 0); 436 return ERR_PTR(-EIO); 437 } 438 } 439 return page; 440 } 441 442 /* 443 * If it tries to access a hole, return an error. 444 * Because, the callers, functions in dir.c and GC, should be able to know 445 * whether this page exists or not. 446 */ 447 struct page *get_lock_data_page(struct inode *inode, pgoff_t index) 448 { 449 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 450 struct address_space *mapping = inode->i_mapping; 451 struct dnode_of_data dn; 452 struct page *page; 453 int err; 454 455 repeat: 456 page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); 457 if (!page) 458 return ERR_PTR(-ENOMEM); 459 460 set_new_dnode(&dn, inode, NULL, NULL, 0); 461 err = get_dnode_of_data(&dn, index, LOOKUP_NODE); 462 if (err) { 463 f2fs_put_page(page, 1); 464 return ERR_PTR(err); 465 } 466 f2fs_put_dnode(&dn); 467 468 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 469 f2fs_put_page(page, 1); 470 return ERR_PTR(-ENOENT); 471 } 472 473 if (PageUptodate(page)) 474 return page; 475 476 /* 477 * A new dentry page is allocated but not able to be written, since its 478 * new inode page couldn't be allocated due to -ENOSPC. 479 * In such the case, its blkaddr can be remained as NEW_ADDR. 480 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata. 481 */ 482 if (dn.data_blkaddr == NEW_ADDR) { 483 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 484 SetPageUptodate(page); 485 return page; 486 } 487 488 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); 489 if (err) 490 return ERR_PTR(err); 491 492 lock_page(page); 493 if (unlikely(!PageUptodate(page))) { 494 f2fs_put_page(page, 1); 495 return ERR_PTR(-EIO); 496 } 497 if (unlikely(page->mapping != mapping)) { 498 f2fs_put_page(page, 1); 499 goto repeat; 500 } 501 return page; 502 } 503 504 /* 505 * Caller ensures that this data page is never allocated. 506 * A new zero-filled data page is allocated in the page cache. 507 * 508 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and 509 * f2fs_unlock_op(). 510 * Note that, ipage is set only by make_empty_dir. 511 */ 512 struct page *get_new_data_page(struct inode *inode, 513 struct page *ipage, pgoff_t index, bool new_i_size) 514 { 515 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 516 struct address_space *mapping = inode->i_mapping; 517 struct page *page; 518 struct dnode_of_data dn; 519 int err; 520 521 set_new_dnode(&dn, inode, ipage, NULL, 0); 522 err = f2fs_reserve_block(&dn, index); 523 if (err) 524 return ERR_PTR(err); 525 repeat: 526 page = grab_cache_page(mapping, index); 527 if (!page) { 528 err = -ENOMEM; 529 goto put_err; 530 } 531 532 if (PageUptodate(page)) 533 return page; 534 535 if (dn.data_blkaddr == NEW_ADDR) { 536 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 537 SetPageUptodate(page); 538 } else { 539 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 540 READ_SYNC); 541 if (err) 542 goto put_err; 543 544 lock_page(page); 545 if (unlikely(!PageUptodate(page))) { 546 f2fs_put_page(page, 1); 547 err = -EIO; 548 goto put_err; 549 } 550 if (unlikely(page->mapping != mapping)) { 551 f2fs_put_page(page, 1); 552 goto repeat; 553 } 554 } 555 556 if (new_i_size && 557 i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { 558 i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); 559 /* Only the directory inode sets new_i_size */ 560 set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); 561 } 562 return page; 563 564 put_err: 565 f2fs_put_dnode(&dn); 566 return ERR_PTR(err); 567 } 568 569 static int __allocate_data_block(struct dnode_of_data *dn) 570 { 571 struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); 572 struct f2fs_summary sum; 573 block_t new_blkaddr; 574 struct node_info ni; 575 int type; 576 577 if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) 578 return -EPERM; 579 if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) 580 return -ENOSPC; 581 582 __set_data_blkaddr(dn, NEW_ADDR); 583 dn->data_blkaddr = NEW_ADDR; 584 585 get_node_info(sbi, dn->nid, &ni); 586 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 587 588 type = CURSEG_WARM_DATA; 589 590 allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type); 591 592 /* direct IO doesn't use extent cache to maximize the performance */ 593 set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); 594 update_extent_cache(new_blkaddr, dn); 595 clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT); 596 597 dn->data_blkaddr = new_blkaddr; 598 return 0; 599 } 600 601 /* 602 * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh. 603 * If original data blocks are allocated, then give them to blockdev. 604 * Otherwise, 605 * a. preallocate requested block addresses 606 * b. do not use extent cache for better performance 607 * c. give the block addresses to blockdev 608 */ 609 static int get_data_block(struct inode *inode, sector_t iblock, 610 struct buffer_head *bh_result, int create) 611 { 612 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 613 unsigned int blkbits = inode->i_sb->s_blocksize_bits; 614 unsigned maxblocks = bh_result->b_size >> blkbits; 615 struct dnode_of_data dn; 616 int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; 617 pgoff_t pgofs, end_offset; 618 int err = 0, ofs = 1; 619 bool allocated = false; 620 621 /* Get the page offset from the block offset(iblock) */ 622 pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits)); 623 624 if (check_extent_cache(inode, pgofs, bh_result)) 625 goto out; 626 627 if (create) 628 f2fs_lock_op(sbi); 629 630 /* When reading holes, we need its node page */ 631 set_new_dnode(&dn, inode, NULL, NULL, 0); 632 err = get_dnode_of_data(&dn, pgofs, mode); 633 if (err) { 634 if (err == -ENOENT) 635 err = 0; 636 goto unlock_out; 637 } 638 if (dn.data_blkaddr == NEW_ADDR) 639 goto put_out; 640 641 if (dn.data_blkaddr != NULL_ADDR) { 642 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 643 } else if (create) { 644 err = __allocate_data_block(&dn); 645 if (err) 646 goto put_out; 647 allocated = true; 648 map_bh(bh_result, inode->i_sb, dn.data_blkaddr); 649 } else { 650 goto put_out; 651 } 652 653 end_offset = IS_INODE(dn.node_page) ? 654 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; 655 bh_result->b_size = (((size_t)1) << blkbits); 656 dn.ofs_in_node++; 657 pgofs++; 658 659 get_next: 660 if (dn.ofs_in_node >= end_offset) { 661 if (allocated) 662 sync_inode_page(&dn); 663 allocated = false; 664 f2fs_put_dnode(&dn); 665 666 set_new_dnode(&dn, inode, NULL, NULL, 0); 667 err = get_dnode_of_data(&dn, pgofs, mode); 668 if (err) { 669 if (err == -ENOENT) 670 err = 0; 671 goto unlock_out; 672 } 673 if (dn.data_blkaddr == NEW_ADDR) 674 goto put_out; 675 676 end_offset = IS_INODE(dn.node_page) ? 677 ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; 678 } 679 680 if (maxblocks > (bh_result->b_size >> blkbits)) { 681 block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); 682 if (blkaddr == NULL_ADDR && create) { 683 err = __allocate_data_block(&dn); 684 if (err) 685 goto sync_out; 686 allocated = true; 687 blkaddr = dn.data_blkaddr; 688 } 689 /* Give more consecutive addresses for the read ahead */ 690 if (blkaddr == (bh_result->b_blocknr + ofs)) { 691 ofs++; 692 dn.ofs_in_node++; 693 pgofs++; 694 bh_result->b_size += (((size_t)1) << blkbits); 695 goto get_next; 696 } 697 } 698 sync_out: 699 if (allocated) 700 sync_inode_page(&dn); 701 put_out: 702 f2fs_put_dnode(&dn); 703 unlock_out: 704 if (create) 705 f2fs_unlock_op(sbi); 706 out: 707 trace_f2fs_get_data_block(inode, iblock, bh_result, err); 708 return err; 709 } 710 711 static int f2fs_read_data_page(struct file *file, struct page *page) 712 { 713 struct inode *inode = page->mapping->host; 714 int ret; 715 716 /* If the file has inline data, try to read it directlly */ 717 if (f2fs_has_inline_data(inode)) 718 ret = f2fs_read_inline_data(inode, page); 719 else 720 ret = mpage_readpage(page, get_data_block); 721 722 return ret; 723 } 724 725 static int f2fs_read_data_pages(struct file *file, 726 struct address_space *mapping, 727 struct list_head *pages, unsigned nr_pages) 728 { 729 struct inode *inode = file->f_mapping->host; 730 731 /* If the file has inline data, skip readpages */ 732 if (f2fs_has_inline_data(inode)) 733 return 0; 734 735 return mpage_readpages(mapping, pages, nr_pages, get_data_block); 736 } 737 738 int do_write_data_page(struct page *page, struct f2fs_io_info *fio) 739 { 740 struct inode *inode = page->mapping->host; 741 block_t old_blkaddr, new_blkaddr; 742 struct dnode_of_data dn; 743 int err = 0; 744 745 set_new_dnode(&dn, inode, NULL, NULL, 0); 746 err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); 747 if (err) 748 return err; 749 750 old_blkaddr = dn.data_blkaddr; 751 752 /* This page is already truncated */ 753 if (old_blkaddr == NULL_ADDR) 754 goto out_writepage; 755 756 set_page_writeback(page); 757 758 /* 759 * If current allocation needs SSR, 760 * it had better in-place writes for updated data. 761 */ 762 if (unlikely(old_blkaddr != NEW_ADDR && 763 !is_cold_data(page) && 764 need_inplace_update(inode))) { 765 rewrite_data_page(page, old_blkaddr, fio); 766 } else { 767 write_data_page(page, &dn, &new_blkaddr, fio); 768 update_extent_cache(new_blkaddr, &dn); 769 } 770 out_writepage: 771 f2fs_put_dnode(&dn); 772 return err; 773 } 774 775 static int f2fs_write_data_page(struct page *page, 776 struct writeback_control *wbc) 777 { 778 struct inode *inode = page->mapping->host; 779 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 780 loff_t i_size = i_size_read(inode); 781 const pgoff_t end_index = ((unsigned long long) i_size) 782 >> PAGE_CACHE_SHIFT; 783 unsigned offset = 0; 784 bool need_balance_fs = false; 785 int err = 0; 786 struct f2fs_io_info fio = { 787 .type = DATA, 788 .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, 789 }; 790 791 if (page->index < end_index) 792 goto write; 793 794 /* 795 * If the offset is out-of-range of file size, 796 * this page does not have to be written to disk. 797 */ 798 offset = i_size & (PAGE_CACHE_SIZE - 1); 799 if ((page->index >= end_index + 1) || !offset) { 800 if (S_ISDIR(inode->i_mode)) { 801 dec_page_count(sbi, F2FS_DIRTY_DENTS); 802 inode_dec_dirty_dents(inode); 803 } 804 goto out; 805 } 806 807 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 808 write: 809 if (unlikely(sbi->por_doing)) { 810 err = AOP_WRITEPAGE_ACTIVATE; 811 goto redirty_out; 812 } 813 814 /* Dentry blocks are controlled by checkpoint */ 815 if (S_ISDIR(inode->i_mode)) { 816 dec_page_count(sbi, F2FS_DIRTY_DENTS); 817 inode_dec_dirty_dents(inode); 818 err = do_write_data_page(page, &fio); 819 } else { 820 f2fs_lock_op(sbi); 821 822 if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) { 823 err = f2fs_write_inline_data(inode, page, offset); 824 f2fs_unlock_op(sbi); 825 goto out; 826 } else { 827 err = do_write_data_page(page, &fio); 828 } 829 830 f2fs_unlock_op(sbi); 831 need_balance_fs = true; 832 } 833 if (err == -ENOENT) 834 goto out; 835 else if (err) 836 goto redirty_out; 837 838 if (wbc->for_reclaim) { 839 f2fs_submit_merged_bio(sbi, DATA, WRITE); 840 need_balance_fs = false; 841 } 842 843 clear_cold_data(page); 844 out: 845 unlock_page(page); 846 if (need_balance_fs) 847 f2fs_balance_fs(sbi); 848 return 0; 849 850 redirty_out: 851 wbc->pages_skipped++; 852 set_page_dirty(page); 853 return err; 854 } 855 856 #define MAX_DESIRED_PAGES_WP 4096 857 858 static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, 859 void *data) 860 { 861 struct address_space *mapping = data; 862 int ret = mapping->a_ops->writepage(page, wbc); 863 mapping_set_error(mapping, ret); 864 return ret; 865 } 866 867 static int f2fs_write_data_pages(struct address_space *mapping, 868 struct writeback_control *wbc) 869 { 870 struct inode *inode = mapping->host; 871 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 872 bool locked = false; 873 int ret; 874 long excess_nrtw = 0, desired_nrtw; 875 876 /* deal with chardevs and other special file */ 877 if (!mapping->a_ops->writepage) 878 return 0; 879 880 if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { 881 desired_nrtw = MAX_DESIRED_PAGES_WP; 882 excess_nrtw = desired_nrtw - wbc->nr_to_write; 883 wbc->nr_to_write = desired_nrtw; 884 } 885 886 if (!S_ISDIR(inode->i_mode)) { 887 mutex_lock(&sbi->writepages); 888 locked = true; 889 } 890 ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); 891 if (locked) 892 mutex_unlock(&sbi->writepages); 893 894 f2fs_submit_merged_bio(sbi, DATA, WRITE); 895 896 remove_dirty_dir_inode(inode); 897 898 wbc->nr_to_write -= excess_nrtw; 899 return ret; 900 } 901 902 static int f2fs_write_begin(struct file *file, struct address_space *mapping, 903 loff_t pos, unsigned len, unsigned flags, 904 struct page **pagep, void **fsdata) 905 { 906 struct inode *inode = mapping->host; 907 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 908 struct page *page; 909 pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; 910 struct dnode_of_data dn; 911 int err = 0; 912 913 f2fs_balance_fs(sbi); 914 repeat: 915 err = f2fs_convert_inline_data(inode, pos + len); 916 if (err) 917 return err; 918 919 page = grab_cache_page_write_begin(mapping, index, flags); 920 if (!page) 921 return -ENOMEM; 922 *pagep = page; 923 924 if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) 925 goto inline_data; 926 927 f2fs_lock_op(sbi); 928 set_new_dnode(&dn, inode, NULL, NULL, 0); 929 err = f2fs_reserve_block(&dn, index); 930 f2fs_unlock_op(sbi); 931 932 if (err) { 933 f2fs_put_page(page, 1); 934 return err; 935 } 936 inline_data: 937 if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) 938 return 0; 939 940 if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { 941 unsigned start = pos & (PAGE_CACHE_SIZE - 1); 942 unsigned end = start + len; 943 944 /* Reading beyond i_size is simple: memset to zero */ 945 zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); 946 goto out; 947 } 948 949 if (dn.data_blkaddr == NEW_ADDR) { 950 zero_user_segment(page, 0, PAGE_CACHE_SIZE); 951 } else { 952 if (f2fs_has_inline_data(inode)) 953 err = f2fs_read_inline_data(inode, page); 954 else 955 err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, 956 READ_SYNC); 957 if (err) 958 return err; 959 lock_page(page); 960 if (unlikely(!PageUptodate(page))) { 961 f2fs_put_page(page, 1); 962 return -EIO; 963 } 964 if (unlikely(page->mapping != mapping)) { 965 f2fs_put_page(page, 1); 966 goto repeat; 967 } 968 } 969 out: 970 SetPageUptodate(page); 971 clear_cold_data(page); 972 return 0; 973 } 974 975 static int f2fs_write_end(struct file *file, 976 struct address_space *mapping, 977 loff_t pos, unsigned len, unsigned copied, 978 struct page *page, void *fsdata) 979 { 980 struct inode *inode = page->mapping->host; 981 982 SetPageUptodate(page); 983 set_page_dirty(page); 984 985 if (pos + copied > i_size_read(inode)) { 986 i_size_write(inode, pos + copied); 987 mark_inode_dirty(inode); 988 update_inode_page(inode); 989 } 990 991 f2fs_put_page(page, 1); 992 return copied; 993 } 994 995 static int check_direct_IO(struct inode *inode, int rw, 996 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 997 { 998 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 999 int i; 1000 1001 if (rw == READ) 1002 return 0; 1003 1004 if (offset & blocksize_mask) 1005 return -EINVAL; 1006 1007 for (i = 0; i < nr_segs; i++) 1008 if (iov[i].iov_len & blocksize_mask) 1009 return -EINVAL; 1010 return 0; 1011 } 1012 1013 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, 1014 const struct iovec *iov, loff_t offset, unsigned long nr_segs) 1015 { 1016 struct file *file = iocb->ki_filp; 1017 struct inode *inode = file->f_mapping->host; 1018 1019 /* Let buffer I/O handle the inline data case. */ 1020 if (f2fs_has_inline_data(inode)) 1021 return 0; 1022 1023 if (check_direct_IO(inode, rw, iov, offset, nr_segs)) 1024 return 0; 1025 1026 return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, 1027 get_data_block); 1028 } 1029 1030 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, 1031 unsigned int length) 1032 { 1033 struct inode *inode = page->mapping->host; 1034 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 1035 if (S_ISDIR(inode->i_mode) && PageDirty(page)) { 1036 dec_page_count(sbi, F2FS_DIRTY_DENTS); 1037 inode_dec_dirty_dents(inode); 1038 } 1039 ClearPagePrivate(page); 1040 } 1041 1042 static int f2fs_release_data_page(struct page *page, gfp_t wait) 1043 { 1044 ClearPagePrivate(page); 1045 return 1; 1046 } 1047 1048 static int f2fs_set_data_page_dirty(struct page *page) 1049 { 1050 struct address_space *mapping = page->mapping; 1051 struct inode *inode = mapping->host; 1052 1053 trace_f2fs_set_page_dirty(page, DATA); 1054 1055 SetPageUptodate(page); 1056 mark_inode_dirty(inode); 1057 1058 if (!PageDirty(page)) { 1059 __set_page_dirty_nobuffers(page); 1060 set_dirty_dir_page(inode, page); 1061 return 1; 1062 } 1063 return 0; 1064 } 1065 1066 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 1067 { 1068 return generic_block_bmap(mapping, block, get_data_block); 1069 } 1070 1071 const struct address_space_operations f2fs_dblock_aops = { 1072 .readpage = f2fs_read_data_page, 1073 .readpages = f2fs_read_data_pages, 1074 .writepage = f2fs_write_data_page, 1075 .writepages = f2fs_write_data_pages, 1076 .write_begin = f2fs_write_begin, 1077 .write_end = f2fs_write_end, 1078 .set_page_dirty = f2fs_set_data_page_dirty, 1079 .invalidatepage = f2fs_invalidate_data_page, 1080 .releasepage = f2fs_release_data_page, 1081 .direct_IO = f2fs_direct_IO, 1082 .bmap = f2fs_bmap, 1083 }; 1084