1 /* 2 * fs/f2fs/data.c 3 * 4 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 5 * http://www.samsung.com/ 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 #include <linux/fs.h> 12 #include <linux/f2fs_fs.h> 13 #include <linux/buffer_head.h> 14 #include <linux/mpage.h> 15 #include <linux/writeback.h> 16 #include <linux/backing-dev.h> 17 #include <linux/pagevec.h> 18 #include <linux/blkdev.h> 19 #include <linux/bio.h> 20 #include <linux/prefetch.h> 21 #include <linux/uio.h> 22 #include <linux/cleancache.h> 23 #include <linux/sched/signal.h> 24 25 #include "f2fs.h" 26 #include "node.h" 27 #include "segment.h" 28 #include "trace.h" 29 #include <trace/events/f2fs.h> 30 31 #define NUM_PREALLOC_POST_READ_CTXS 128 32 33 static struct kmem_cache *bio_post_read_ctx_cache; 34 static mempool_t *bio_post_read_ctx_pool; 35 36 static bool __is_cp_guaranteed(struct page *page) 37 { 38 struct address_space *mapping = page->mapping; 39 struct inode *inode; 40 struct f2fs_sb_info *sbi; 41 42 if (!mapping) 43 return false; 44 45 inode = mapping->host; 46 sbi = F2FS_I_SB(inode); 47 48 if (inode->i_ino == F2FS_META_INO(sbi) || 49 inode->i_ino == F2FS_NODE_INO(sbi) || 50 S_ISDIR(inode->i_mode) || 51 (S_ISREG(inode->i_mode) && 52 is_inode_flag_set(inode, FI_ATOMIC_FILE)) || 53 is_cold_data(page)) 54 return true; 55 return false; 56 } 57 58 /* postprocessing steps for read bios */ 59 enum bio_post_read_step { 60 STEP_INITIAL = 0, 61 STEP_DECRYPT, 62 }; 63 64 struct bio_post_read_ctx { 65 struct bio *bio; 66 struct work_struct work; 67 unsigned int cur_step; 68 unsigned int enabled_steps; 69 }; 70 71 static void __read_end_io(struct bio *bio) 72 { 73 struct page *page; 74 struct bio_vec *bv; 75 int i; 76 77 bio_for_each_segment_all(bv, bio, i) { 78 page = bv->bv_page; 79 80 /* PG_error was set if any post_read step failed */ 81 if (bio->bi_status || PageError(page)) { 82 ClearPageUptodate(page); 83 SetPageError(page); 84 } else { 85 SetPageUptodate(page); 86 } 87 unlock_page(page); 88 } 89 if (bio->bi_private) 90 mempool_free(bio->bi_private, bio_post_read_ctx_pool); 91 bio_put(bio); 92 } 93 94 static void bio_post_read_processing(struct bio_post_read_ctx *ctx); 95 96 static void decrypt_work(struct work_struct *work) 97 { 98 struct bio_post_read_ctx *ctx = 99 container_of(work, struct bio_post_read_ctx, work); 100 101 fscrypt_decrypt_bio(ctx->bio); 102 103 bio_post_read_processing(ctx); 104 } 105 106 static void bio_post_read_processing(struct bio_post_read_ctx *ctx) 107 { 108 switch (++ctx->cur_step) { 109 case STEP_DECRYPT: 110 if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { 111 INIT_WORK(&ctx->work, decrypt_work); 112 fscrypt_enqueue_decrypt_work(&ctx->work); 113 return; 114 } 115 ctx->cur_step++; 116 /* fall-through */ 117 default: 118 __read_end_io(ctx->bio); 119 } 120 } 121 122 static bool f2fs_bio_post_read_required(struct bio *bio) 123 { 124 return bio->bi_private && !bio->bi_status; 125 } 126 127 static void f2fs_read_end_io(struct bio *bio) 128 { 129 #ifdef CONFIG_F2FS_FAULT_INJECTION 130 if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) { 131 f2fs_show_injection_info(FAULT_IO); 132 bio->bi_status = BLK_STS_IOERR; 133 } 134 #endif 135 136 if (f2fs_bio_post_read_required(bio)) { 137 struct bio_post_read_ctx *ctx = bio->bi_private; 138 139 ctx->cur_step = STEP_INITIAL; 140 bio_post_read_processing(ctx); 141 return; 142 } 143 144 __read_end_io(bio); 145 } 146 147 static void f2fs_write_end_io(struct bio *bio) 148 { 149 struct f2fs_sb_info *sbi = bio->bi_private; 150 struct bio_vec *bvec; 151 int i; 152 153 bio_for_each_segment_all(bvec, bio, i) { 154 struct page *page = bvec->bv_page; 155 enum count_type type = WB_DATA_TYPE(page); 156 157 if (IS_DUMMY_WRITTEN_PAGE(page)) { 158 set_page_private(page, (unsigned long)NULL); 159 ClearPagePrivate(page); 160 unlock_page(page); 161 mempool_free(page, sbi->write_io_dummy); 162 163 if (unlikely(bio->bi_status)) 164 f2fs_stop_checkpoint(sbi, true); 165 continue; 166 } 167 168 fscrypt_pullback_bio_page(&page, true); 169 170 if (unlikely(bio->bi_status)) { 171 mapping_set_error(page->mapping, -EIO); 172 if (type == F2FS_WB_CP_DATA) 173 f2fs_stop_checkpoint(sbi, true); 174 } 175 176 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && 177 page->index != nid_of_node(page)); 178 179 dec_page_count(sbi, type); 180 clear_cold_data(page); 181 end_page_writeback(page); 182 } 183 if (!get_pages(sbi, F2FS_WB_CP_DATA) && 184 wq_has_sleeper(&sbi->cp_wait)) 185 wake_up(&sbi->cp_wait); 186 187 bio_put(bio); 188 } 189 190 /* 191 * Return true, if pre_bio's bdev is same as its target device. 192 */ 193 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, 194 block_t blk_addr, struct bio *bio) 195 { 196 struct block_device *bdev = sbi->sb->s_bdev; 197 int i; 198 199 for (i = 0; i < sbi->s_ndevs; i++) { 200 if (FDEV(i).start_blk <= blk_addr && 201 FDEV(i).end_blk >= blk_addr) { 202 blk_addr -= FDEV(i).start_blk; 203 bdev = FDEV(i).bdev; 204 break; 205 } 206 } 207 if (bio) { 208 bio_set_dev(bio, bdev); 209 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); 210 } 211 return bdev; 212 } 213 214 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) 215 { 216 int i; 217 218 for (i = 0; i < sbi->s_ndevs; i++) 219 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) 220 return i; 221 return 0; 222 } 223 224 static bool __same_bdev(struct f2fs_sb_info *sbi, 225 block_t blk_addr, struct bio *bio) 226 { 227 struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL); 228 return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno; 229 } 230 231 /* 232 * Low-level block read/write IO operations. 233 */ 234 static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, 235 struct writeback_control *wbc, 236 int npages, bool is_read, 237 enum page_type type, enum temp_type temp) 238 { 239 struct bio *bio; 240 241 bio = f2fs_bio_alloc(sbi, npages, true); 242 243 f2fs_target_device(sbi, blk_addr, bio); 244 if (is_read) { 245 bio->bi_end_io = f2fs_read_end_io; 246 bio->bi_private = NULL; 247 } else { 248 bio->bi_end_io = f2fs_write_end_io; 249 bio->bi_private = sbi; 250 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp); 251 } 252 if (wbc) 253 wbc_init_bio(wbc, bio); 254 255 return bio; 256 } 257 258 static inline void __submit_bio(struct f2fs_sb_info *sbi, 259 struct bio *bio, enum page_type type) 260 { 261 if (!is_read_io(bio_op(bio))) { 262 unsigned int start; 263 264 if (type != DATA && type != NODE) 265 goto submit_io; 266 267 if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug) 268 blk_finish_plug(current->plug); 269 270 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; 271 start %= F2FS_IO_SIZE(sbi); 272 273 if (start == 0) 274 goto submit_io; 275 276 /* fill dummy pages */ 277 for (; start < F2FS_IO_SIZE(sbi); start++) { 278 struct page *page = 279 mempool_alloc(sbi->write_io_dummy, 280 GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL); 281 f2fs_bug_on(sbi, !page); 282 283 SetPagePrivate(page); 284 set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE); 285 lock_page(page); 286 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) 287 f2fs_bug_on(sbi, 1); 288 } 289 /* 290 * In the NODE case, we lose next block address chain. So, we 291 * need to do checkpoint in f2fs_sync_file. 292 */ 293 if (type == NODE) 294 set_sbi_flag(sbi, SBI_NEED_CP); 295 } 296 submit_io: 297 if (is_read_io(bio_op(bio))) 298 trace_f2fs_submit_read_bio(sbi->sb, type, bio); 299 else 300 trace_f2fs_submit_write_bio(sbi->sb, type, bio); 301 submit_bio(bio); 302 } 303 304 static void __submit_merged_bio(struct f2fs_bio_info *io) 305 { 306 struct f2fs_io_info *fio = &io->fio; 307 308 if (!io->bio) 309 return; 310 311 bio_set_op_attrs(io->bio, fio->op, fio->op_flags); 312 313 if (is_read_io(fio->op)) 314 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio); 315 else 316 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio); 317 318 __submit_bio(io->sbi, io->bio, fio->type); 319 io->bio = NULL; 320 } 321 322 static bool __has_merged_page(struct f2fs_bio_info *io, 323 struct inode *inode, nid_t ino, pgoff_t idx) 324 { 325 struct bio_vec *bvec; 326 struct page *target; 327 int i; 328 329 if (!io->bio) 330 return false; 331 332 if (!inode && !ino) 333 return true; 334 335 bio_for_each_segment_all(bvec, io->bio, i) { 336 337 if (bvec->bv_page->mapping) 338 target = bvec->bv_page; 339 else 340 target = fscrypt_control_page(bvec->bv_page); 341 342 if (idx != target->index) 343 continue; 344 345 if (inode && inode == target->mapping->host) 346 return true; 347 if (ino && ino == ino_of_node(target)) 348 return true; 349 } 350 351 return false; 352 } 353 354 static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode, 355 nid_t ino, pgoff_t idx, enum page_type type) 356 { 357 enum page_type btype = PAGE_TYPE_OF_BIO(type); 358 enum temp_type temp; 359 struct f2fs_bio_info *io; 360 bool ret = false; 361 362 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { 363 io = sbi->write_io[btype] + temp; 364 365 down_read(&io->io_rwsem); 366 ret = __has_merged_page(io, inode, ino, idx); 367 up_read(&io->io_rwsem); 368 369 /* TODO: use HOT temp only for meta pages now. */ 370 if (ret || btype == META) 371 break; 372 } 373 return ret; 374 } 375 376 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, 377 enum page_type type, enum temp_type temp) 378 { 379 enum page_type btype = PAGE_TYPE_OF_BIO(type); 380 struct f2fs_bio_info *io = sbi->write_io[btype] + temp; 381 382 down_write(&io->io_rwsem); 383 384 /* change META to META_FLUSH in the checkpoint procedure */ 385 if (type >= META_FLUSH) { 386 io->fio.type = META_FLUSH; 387 io->fio.op = REQ_OP_WRITE; 388 io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC; 389 if (!test_opt(sbi, NOBARRIER)) 390 io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; 391 } 392 __submit_merged_bio(io); 393 up_write(&io->io_rwsem); 394 } 395 396 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, 397 struct inode *inode, nid_t ino, pgoff_t idx, 398 enum page_type type, bool force) 399 { 400 enum temp_type temp; 401 402 if (!force && !has_merged_page(sbi, inode, ino, idx, type)) 403 return; 404 405 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { 406 407 __f2fs_submit_merged_write(sbi, type, temp); 408 409 /* TODO: use HOT temp only for meta pages now. */ 410 if (type >= META) 411 break; 412 } 413 } 414 415 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) 416 { 417 __submit_merged_write_cond(sbi, NULL, 0, 0, type, true); 418 } 419 420 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, 421 struct inode *inode, nid_t ino, pgoff_t idx, 422 enum page_type type) 423 { 424 __submit_merged_write_cond(sbi, inode, ino, idx, type, false); 425 } 426 427 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) 428 { 429 f2fs_submit_merged_write(sbi, DATA); 430 f2fs_submit_merged_write(sbi, NODE); 431 f2fs_submit_merged_write(sbi, META); 432 } 433 434 /* 435 * Fill the locked page with data located in the block address. 436 * A caller needs to unlock the page on failure. 437 */ 438 int f2fs_submit_page_bio(struct f2fs_io_info *fio) 439 { 440 struct bio *bio; 441 struct page *page = fio->encrypted_page ? 442 fio->encrypted_page : fio->page; 443 444 verify_block_addr(fio, fio->new_blkaddr); 445 trace_f2fs_submit_page_bio(page, fio); 446 f2fs_trace_ios(fio, 0); 447 448 /* Allocate a new bio */ 449 bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, 450 1, is_read_io(fio->op), fio->type, fio->temp); 451 452 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 453 bio_put(bio); 454 return -EFAULT; 455 } 456 bio_set_op_attrs(bio, fio->op, fio->op_flags); 457 458 __submit_bio(fio->sbi, bio, fio->type); 459 460 if (!is_read_io(fio->op)) 461 inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); 462 return 0; 463 } 464 465 void f2fs_submit_page_write(struct f2fs_io_info *fio) 466 { 467 struct f2fs_sb_info *sbi = fio->sbi; 468 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 469 struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; 470 struct page *bio_page; 471 472 f2fs_bug_on(sbi, is_read_io(fio->op)); 473 474 down_write(&io->io_rwsem); 475 next: 476 if (fio->in_list) { 477 spin_lock(&io->io_lock); 478 if (list_empty(&io->io_list)) { 479 spin_unlock(&io->io_lock); 480 goto out; 481 } 482 fio = list_first_entry(&io->io_list, 483 struct f2fs_io_info, list); 484 list_del(&fio->list); 485 spin_unlock(&io->io_lock); 486 } 487 488 if (is_valid_blkaddr(fio->old_blkaddr)) 489 verify_block_addr(fio, fio->old_blkaddr); 490 verify_block_addr(fio, fio->new_blkaddr); 491 492 bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; 493 494 /* set submitted = true as a return value */ 495 fio->submitted = true; 496 497 inc_page_count(sbi, WB_DATA_TYPE(bio_page)); 498 499 if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 || 500 (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) || 501 !__same_bdev(sbi, fio->new_blkaddr, io->bio))) 502 __submit_merged_bio(io); 503 alloc_new: 504 if (io->bio == NULL) { 505 if ((fio->type == DATA || fio->type == NODE) && 506 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { 507 dec_page_count(sbi, WB_DATA_TYPE(bio_page)); 508 fio->retry = true; 509 goto skip; 510 } 511 io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc, 512 BIO_MAX_PAGES, false, 513 fio->type, fio->temp); 514 io->fio = *fio; 515 } 516 517 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { 518 __submit_merged_bio(io); 519 goto alloc_new; 520 } 521 522 if (fio->io_wbc) 523 wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE); 524 525 io->last_block_in_bio = fio->new_blkaddr; 526 f2fs_trace_ios(fio, 0); 527 528 trace_f2fs_submit_page_write(fio->page, fio); 529 skip: 530 if (fio->in_list) 531 goto next; 532 out: 533 up_write(&io->io_rwsem); 534 } 535 536 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, 537 unsigned nr_pages) 538 { 539 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 540 struct bio *bio; 541 struct bio_post_read_ctx *ctx; 542 unsigned int post_read_steps = 0; 543 544 bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); 545 if (!bio) 546 return ERR_PTR(-ENOMEM); 547 f2fs_target_device(sbi, blkaddr, bio); 548 bio->bi_end_io = f2fs_read_end_io; 549 bio_set_op_attrs(bio, REQ_OP_READ, 0); 550 551 if (f2fs_encrypted_file(inode)) 552 post_read_steps |= 1 << STEP_DECRYPT; 553 if (post_read_steps) { 554 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); 555 if (!ctx) { 556 bio_put(bio); 557 return ERR_PTR(-ENOMEM); 558 } 559 ctx->bio = bio; 560 ctx->enabled_steps = post_read_steps; 561 bio->bi_private = ctx; 562 563 /* wait the page to be moved by cleaning */ 564 f2fs_wait_on_block_writeback(sbi, blkaddr); 565 } 566 567 return bio; 568 } 569 570 /* This can handle encryption stuffs */ 571 static int f2fs_submit_page_read(struct inode *inode, struct page *page, 572 block_t blkaddr) 573 { 574 struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1); 575 576 if (IS_ERR(bio)) 577 return PTR_ERR(bio); 578 579 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 580 bio_put(bio); 581 return -EFAULT; 582 } 583 __submit_bio(F2FS_I_SB(inode), bio, DATA); 584 return 0; 585 } 586 587 static void __set_data_blkaddr(struct dnode_of_data *dn) 588 { 589 struct f2fs_node *rn = F2FS_NODE(dn->node_page); 590 __le32 *addr_array; 591 int base = 0; 592 593 if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) 594 base = get_extra_isize(dn->inode); 595 596 /* Get physical address of data block */ 597 addr_array = blkaddr_in_node(rn); 598 addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 599 } 600 601 /* 602 * Lock ordering for the change of data block address: 603 * ->data_page 604 * ->node_page 605 * update block addresses in the node page 606 */ 607 void f2fs_set_data_blkaddr(struct dnode_of_data *dn) 608 { 609 f2fs_wait_on_page_writeback(dn->node_page, NODE, true); 610 __set_data_blkaddr(dn); 611 if (set_page_dirty(dn->node_page)) 612 dn->node_changed = true; 613 } 614 615 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) 616 { 617 dn->data_blkaddr = blkaddr; 618 f2fs_set_data_blkaddr(dn); 619 f2fs_update_extent_cache(dn); 620 } 621 622 /* dn->ofs_in_node will be returned with up-to-date last block pointer */ 623 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) 624 { 625 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 626 int err; 627 628 if (!count) 629 return 0; 630 631 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 632 return -EPERM; 633 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) 634 return err; 635 636 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, 637 dn->ofs_in_node, count); 638 639 f2fs_wait_on_page_writeback(dn->node_page, NODE, true); 640 641 for (; count > 0; dn->ofs_in_node++) { 642 block_t blkaddr = datablock_addr(dn->inode, 643 dn->node_page, dn->ofs_in_node); 644 if (blkaddr == NULL_ADDR) { 645 dn->data_blkaddr = NEW_ADDR; 646 __set_data_blkaddr(dn); 647 count--; 648 } 649 } 650 651 if (set_page_dirty(dn->node_page)) 652 dn->node_changed = true; 653 return 0; 654 } 655 656 /* Should keep dn->ofs_in_node unchanged */ 657 int f2fs_reserve_new_block(struct dnode_of_data *dn) 658 { 659 unsigned int ofs_in_node = dn->ofs_in_node; 660 int ret; 661 662 ret = f2fs_reserve_new_blocks(dn, 1); 663 dn->ofs_in_node = ofs_in_node; 664 return ret; 665 } 666 667 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) 668 { 669 bool need_put = dn->inode_page ? false : true; 670 int err; 671 672 err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE); 673 if (err) 674 return err; 675 676 if (dn->data_blkaddr == NULL_ADDR) 677 err = f2fs_reserve_new_block(dn); 678 if (err || need_put) 679 f2fs_put_dnode(dn); 680 return err; 681 } 682 683 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) 684 { 685 struct extent_info ei = {0,0,0}; 686 struct inode *inode = dn->inode; 687 688 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 689 dn->data_blkaddr = ei.blk + index - ei.fofs; 690 return 0; 691 } 692 693 return f2fs_reserve_block(dn, index); 694 } 695 696 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, 697 int op_flags, bool for_write) 698 { 699 struct address_space *mapping = inode->i_mapping; 700 struct dnode_of_data dn; 701 struct page *page; 702 struct extent_info ei = {0,0,0}; 703 int err; 704 705 page = f2fs_grab_cache_page(mapping, index, for_write); 706 if (!page) 707 return ERR_PTR(-ENOMEM); 708 709 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 710 dn.data_blkaddr = ei.blk + index - ei.fofs; 711 goto got_it; 712 } 713 714 set_new_dnode(&dn, inode, NULL, NULL, 0); 715 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 716 if (err) 717 goto put_err; 718 f2fs_put_dnode(&dn); 719 720 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 721 err = -ENOENT; 722 goto put_err; 723 } 724 got_it: 725 if (PageUptodate(page)) { 726 unlock_page(page); 727 return page; 728 } 729 730 /* 731 * A new dentry page is allocated but not able to be written, since its 732 * new inode page couldn't be allocated due to -ENOSPC. 733 * In such the case, its blkaddr can be remained as NEW_ADDR. 734 * see, f2fs_add_link -> f2fs_get_new_data_page -> 735 * f2fs_init_inode_metadata. 736 */ 737 if (dn.data_blkaddr == NEW_ADDR) { 738 zero_user_segment(page, 0, PAGE_SIZE); 739 if (!PageUptodate(page)) 740 SetPageUptodate(page); 741 unlock_page(page); 742 return page; 743 } 744 745 err = f2fs_submit_page_read(inode, page, dn.data_blkaddr); 746 if (err) 747 goto put_err; 748 return page; 749 750 put_err: 751 f2fs_put_page(page, 1); 752 return ERR_PTR(err); 753 } 754 755 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index) 756 { 757 struct address_space *mapping = inode->i_mapping; 758 struct page *page; 759 760 page = find_get_page(mapping, index); 761 if (page && PageUptodate(page)) 762 return page; 763 f2fs_put_page(page, 0); 764 765 page = f2fs_get_read_data_page(inode, index, 0, false); 766 if (IS_ERR(page)) 767 return page; 768 769 if (PageUptodate(page)) 770 return page; 771 772 wait_on_page_locked(page); 773 if (unlikely(!PageUptodate(page))) { 774 f2fs_put_page(page, 0); 775 return ERR_PTR(-EIO); 776 } 777 return page; 778 } 779 780 /* 781 * If it tries to access a hole, return an error. 782 * Because, the callers, functions in dir.c and GC, should be able to know 783 * whether this page exists or not. 784 */ 785 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, 786 bool for_write) 787 { 788 struct address_space *mapping = inode->i_mapping; 789 struct page *page; 790 repeat: 791 page = f2fs_get_read_data_page(inode, index, 0, for_write); 792 if (IS_ERR(page)) 793 return page; 794 795 /* wait for read completion */ 796 lock_page(page); 797 if (unlikely(page->mapping != mapping)) { 798 f2fs_put_page(page, 1); 799 goto repeat; 800 } 801 if (unlikely(!PageUptodate(page))) { 802 f2fs_put_page(page, 1); 803 return ERR_PTR(-EIO); 804 } 805 return page; 806 } 807 808 /* 809 * Caller ensures that this data page is never allocated. 810 * A new zero-filled data page is allocated in the page cache. 811 * 812 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and 813 * f2fs_unlock_op(). 814 * Note that, ipage is set only by make_empty_dir, and if any error occur, 815 * ipage should be released by this function. 816 */ 817 struct page *f2fs_get_new_data_page(struct inode *inode, 818 struct page *ipage, pgoff_t index, bool new_i_size) 819 { 820 struct address_space *mapping = inode->i_mapping; 821 struct page *page; 822 struct dnode_of_data dn; 823 int err; 824 825 page = f2fs_grab_cache_page(mapping, index, true); 826 if (!page) { 827 /* 828 * before exiting, we should make sure ipage will be released 829 * if any error occur. 830 */ 831 f2fs_put_page(ipage, 1); 832 return ERR_PTR(-ENOMEM); 833 } 834 835 set_new_dnode(&dn, inode, ipage, NULL, 0); 836 err = f2fs_reserve_block(&dn, index); 837 if (err) { 838 f2fs_put_page(page, 1); 839 return ERR_PTR(err); 840 } 841 if (!ipage) 842 f2fs_put_dnode(&dn); 843 844 if (PageUptodate(page)) 845 goto got_it; 846 847 if (dn.data_blkaddr == NEW_ADDR) { 848 zero_user_segment(page, 0, PAGE_SIZE); 849 if (!PageUptodate(page)) 850 SetPageUptodate(page); 851 } else { 852 f2fs_put_page(page, 1); 853 854 /* if ipage exists, blkaddr should be NEW_ADDR */ 855 f2fs_bug_on(F2FS_I_SB(inode), ipage); 856 page = f2fs_get_lock_data_page(inode, index, true); 857 if (IS_ERR(page)) 858 return page; 859 } 860 got_it: 861 if (new_i_size && i_size_read(inode) < 862 ((loff_t)(index + 1) << PAGE_SHIFT)) 863 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); 864 return page; 865 } 866 867 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) 868 { 869 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 870 struct f2fs_summary sum; 871 struct node_info ni; 872 pgoff_t fofs; 873 blkcnt_t count = 1; 874 int err; 875 876 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 877 return -EPERM; 878 879 dn->data_blkaddr = datablock_addr(dn->inode, 880 dn->node_page, dn->ofs_in_node); 881 if (dn->data_blkaddr == NEW_ADDR) 882 goto alloc; 883 884 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) 885 return err; 886 887 alloc: 888 f2fs_get_node_info(sbi, dn->nid, &ni); 889 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 890 891 f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr, 892 &sum, seg_type, NULL, false); 893 f2fs_set_data_blkaddr(dn); 894 895 /* update i_size */ 896 fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + 897 dn->ofs_in_node; 898 if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) 899 f2fs_i_size_write(dn->inode, 900 ((loff_t)(fofs + 1) << PAGE_SHIFT)); 901 return 0; 902 } 903 904 int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) 905 { 906 struct inode *inode = file_inode(iocb->ki_filp); 907 struct f2fs_map_blocks map; 908 int flag; 909 int err = 0; 910 bool direct_io = iocb->ki_flags & IOCB_DIRECT; 911 912 /* convert inline data for Direct I/O*/ 913 if (direct_io) { 914 err = f2fs_convert_inline_inode(inode); 915 if (err) 916 return err; 917 } 918 919 if (is_inode_flag_set(inode, FI_NO_PREALLOC)) 920 return 0; 921 922 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); 923 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); 924 if (map.m_len > map.m_lblk) 925 map.m_len -= map.m_lblk; 926 else 927 map.m_len = 0; 928 929 map.m_next_pgofs = NULL; 930 map.m_next_extent = NULL; 931 map.m_seg_type = NO_CHECK_TYPE; 932 933 if (direct_io) { 934 map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); 935 flag = f2fs_force_buffered_io(inode, WRITE) ? 936 F2FS_GET_BLOCK_PRE_AIO : 937 F2FS_GET_BLOCK_PRE_DIO; 938 goto map_blocks; 939 } 940 if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { 941 err = f2fs_convert_inline_inode(inode); 942 if (err) 943 return err; 944 } 945 if (f2fs_has_inline_data(inode)) 946 return err; 947 948 flag = F2FS_GET_BLOCK_PRE_AIO; 949 950 map_blocks: 951 err = f2fs_map_blocks(inode, &map, 1, flag); 952 if (map.m_len > 0 && err == -ENOSPC) { 953 if (!direct_io) 954 set_inode_flag(inode, FI_NO_PREALLOC); 955 err = 0; 956 } 957 return err; 958 } 959 960 static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) 961 { 962 if (flag == F2FS_GET_BLOCK_PRE_AIO) { 963 if (lock) 964 down_read(&sbi->node_change); 965 else 966 up_read(&sbi->node_change); 967 } else { 968 if (lock) 969 f2fs_lock_op(sbi); 970 else 971 f2fs_unlock_op(sbi); 972 } 973 } 974 975 /* 976 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with 977 * f2fs_map_blocks structure. 978 * If original data blocks are allocated, then give them to blockdev. 979 * Otherwise, 980 * a. preallocate requested block addresses 981 * b. do not use extent cache for better performance 982 * c. give the block addresses to blockdev 983 */ 984 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, 985 int create, int flag) 986 { 987 unsigned int maxblocks = map->m_len; 988 struct dnode_of_data dn; 989 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 990 int mode = create ? ALLOC_NODE : LOOKUP_NODE; 991 pgoff_t pgofs, end_offset, end; 992 int err = 0, ofs = 1; 993 unsigned int ofs_in_node, last_ofs_in_node; 994 blkcnt_t prealloc; 995 struct extent_info ei = {0,0,0}; 996 block_t blkaddr; 997 unsigned int start_pgofs; 998 999 if (!maxblocks) 1000 return 0; 1001 1002 map->m_len = 0; 1003 map->m_flags = 0; 1004 1005 /* it only supports block size == page size */ 1006 pgofs = (pgoff_t)map->m_lblk; 1007 end = pgofs + maxblocks; 1008 1009 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { 1010 map->m_pblk = ei.blk + pgofs - ei.fofs; 1011 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); 1012 map->m_flags = F2FS_MAP_MAPPED; 1013 if (map->m_next_extent) 1014 *map->m_next_extent = pgofs + map->m_len; 1015 goto out; 1016 } 1017 1018 next_dnode: 1019 if (create) 1020 __do_map_lock(sbi, flag, true); 1021 1022 /* When reading holes, we need its node page */ 1023 set_new_dnode(&dn, inode, NULL, NULL, 0); 1024 err = f2fs_get_dnode_of_data(&dn, pgofs, mode); 1025 if (err) { 1026 if (flag == F2FS_GET_BLOCK_BMAP) 1027 map->m_pblk = 0; 1028 if (err == -ENOENT) { 1029 err = 0; 1030 if (map->m_next_pgofs) 1031 *map->m_next_pgofs = 1032 f2fs_get_next_page_offset(&dn, pgofs); 1033 if (map->m_next_extent) 1034 *map->m_next_extent = 1035 f2fs_get_next_page_offset(&dn, pgofs); 1036 } 1037 goto unlock_out; 1038 } 1039 1040 start_pgofs = pgofs; 1041 prealloc = 0; 1042 last_ofs_in_node = ofs_in_node = dn.ofs_in_node; 1043 end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 1044 1045 next_block: 1046 blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node); 1047 1048 if (!is_valid_blkaddr(blkaddr)) { 1049 if (create) { 1050 if (unlikely(f2fs_cp_error(sbi))) { 1051 err = -EIO; 1052 goto sync_out; 1053 } 1054 if (flag == F2FS_GET_BLOCK_PRE_AIO) { 1055 if (blkaddr == NULL_ADDR) { 1056 prealloc++; 1057 last_ofs_in_node = dn.ofs_in_node; 1058 } 1059 } else { 1060 err = __allocate_data_block(&dn, 1061 map->m_seg_type); 1062 if (!err) 1063 set_inode_flag(inode, FI_APPEND_WRITE); 1064 } 1065 if (err) 1066 goto sync_out; 1067 map->m_flags |= F2FS_MAP_NEW; 1068 blkaddr = dn.data_blkaddr; 1069 } else { 1070 if (flag == F2FS_GET_BLOCK_BMAP) { 1071 map->m_pblk = 0; 1072 goto sync_out; 1073 } 1074 if (flag == F2FS_GET_BLOCK_PRECACHE) 1075 goto sync_out; 1076 if (flag == F2FS_GET_BLOCK_FIEMAP && 1077 blkaddr == NULL_ADDR) { 1078 if (map->m_next_pgofs) 1079 *map->m_next_pgofs = pgofs + 1; 1080 goto sync_out; 1081 } 1082 if (flag != F2FS_GET_BLOCK_FIEMAP) { 1083 /* for defragment case */ 1084 if (map->m_next_pgofs) 1085 *map->m_next_pgofs = pgofs + 1; 1086 goto sync_out; 1087 } 1088 } 1089 } 1090 1091 if (flag == F2FS_GET_BLOCK_PRE_AIO) 1092 goto skip; 1093 1094 if (map->m_len == 0) { 1095 /* preallocated unwritten block should be mapped for fiemap. */ 1096 if (blkaddr == NEW_ADDR) 1097 map->m_flags |= F2FS_MAP_UNWRITTEN; 1098 map->m_flags |= F2FS_MAP_MAPPED; 1099 1100 map->m_pblk = blkaddr; 1101 map->m_len = 1; 1102 } else if ((map->m_pblk != NEW_ADDR && 1103 blkaddr == (map->m_pblk + ofs)) || 1104 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || 1105 flag == F2FS_GET_BLOCK_PRE_DIO) { 1106 ofs++; 1107 map->m_len++; 1108 } else { 1109 goto sync_out; 1110 } 1111 1112 skip: 1113 dn.ofs_in_node++; 1114 pgofs++; 1115 1116 /* preallocate blocks in batch for one dnode page */ 1117 if (flag == F2FS_GET_BLOCK_PRE_AIO && 1118 (pgofs == end || dn.ofs_in_node == end_offset)) { 1119 1120 dn.ofs_in_node = ofs_in_node; 1121 err = f2fs_reserve_new_blocks(&dn, prealloc); 1122 if (err) 1123 goto sync_out; 1124 1125 map->m_len += dn.ofs_in_node - ofs_in_node; 1126 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { 1127 err = -ENOSPC; 1128 goto sync_out; 1129 } 1130 dn.ofs_in_node = end_offset; 1131 } 1132 1133 if (pgofs >= end) 1134 goto sync_out; 1135 else if (dn.ofs_in_node < end_offset) 1136 goto next_block; 1137 1138 if (flag == F2FS_GET_BLOCK_PRECACHE) { 1139 if (map->m_flags & F2FS_MAP_MAPPED) { 1140 unsigned int ofs = start_pgofs - map->m_lblk; 1141 1142 f2fs_update_extent_cache_range(&dn, 1143 start_pgofs, map->m_pblk + ofs, 1144 map->m_len - ofs); 1145 } 1146 } 1147 1148 f2fs_put_dnode(&dn); 1149 1150 if (create) { 1151 __do_map_lock(sbi, flag, false); 1152 f2fs_balance_fs(sbi, dn.node_changed); 1153 } 1154 goto next_dnode; 1155 1156 sync_out: 1157 if (flag == F2FS_GET_BLOCK_PRECACHE) { 1158 if (map->m_flags & F2FS_MAP_MAPPED) { 1159 unsigned int ofs = start_pgofs - map->m_lblk; 1160 1161 f2fs_update_extent_cache_range(&dn, 1162 start_pgofs, map->m_pblk + ofs, 1163 map->m_len - ofs); 1164 } 1165 if (map->m_next_extent) 1166 *map->m_next_extent = pgofs + 1; 1167 } 1168 f2fs_put_dnode(&dn); 1169 unlock_out: 1170 if (create) { 1171 __do_map_lock(sbi, flag, false); 1172 f2fs_balance_fs(sbi, dn.node_changed); 1173 } 1174 out: 1175 trace_f2fs_map_blocks(inode, map, err); 1176 return err; 1177 } 1178 1179 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) 1180 { 1181 struct f2fs_map_blocks map; 1182 block_t last_lblk; 1183 int err; 1184 1185 if (pos + len > i_size_read(inode)) 1186 return false; 1187 1188 map.m_lblk = F2FS_BYTES_TO_BLK(pos); 1189 map.m_next_pgofs = NULL; 1190 map.m_next_extent = NULL; 1191 map.m_seg_type = NO_CHECK_TYPE; 1192 last_lblk = F2FS_BLK_ALIGN(pos + len); 1193 1194 while (map.m_lblk < last_lblk) { 1195 map.m_len = last_lblk - map.m_lblk; 1196 err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); 1197 if (err || map.m_len == 0) 1198 return false; 1199 map.m_lblk += map.m_len; 1200 } 1201 return true; 1202 } 1203 1204 static int __get_data_block(struct inode *inode, sector_t iblock, 1205 struct buffer_head *bh, int create, int flag, 1206 pgoff_t *next_pgofs, int seg_type) 1207 { 1208 struct f2fs_map_blocks map; 1209 int err; 1210 1211 map.m_lblk = iblock; 1212 map.m_len = bh->b_size >> inode->i_blkbits; 1213 map.m_next_pgofs = next_pgofs; 1214 map.m_next_extent = NULL; 1215 map.m_seg_type = seg_type; 1216 1217 err = f2fs_map_blocks(inode, &map, create, flag); 1218 if (!err) { 1219 map_bh(bh, inode->i_sb, map.m_pblk); 1220 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; 1221 bh->b_size = (u64)map.m_len << inode->i_blkbits; 1222 } 1223 return err; 1224 } 1225 1226 static int get_data_block(struct inode *inode, sector_t iblock, 1227 struct buffer_head *bh_result, int create, int flag, 1228 pgoff_t *next_pgofs) 1229 { 1230 return __get_data_block(inode, iblock, bh_result, create, 1231 flag, next_pgofs, 1232 NO_CHECK_TYPE); 1233 } 1234 1235 static int get_data_block_dio(struct inode *inode, sector_t iblock, 1236 struct buffer_head *bh_result, int create) 1237 { 1238 return __get_data_block(inode, iblock, bh_result, create, 1239 F2FS_GET_BLOCK_DEFAULT, NULL, 1240 f2fs_rw_hint_to_seg_type( 1241 inode->i_write_hint)); 1242 } 1243 1244 static int get_data_block_bmap(struct inode *inode, sector_t iblock, 1245 struct buffer_head *bh_result, int create) 1246 { 1247 /* Block number less than F2FS MAX BLOCKS */ 1248 if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks)) 1249 return -EFBIG; 1250 1251 return __get_data_block(inode, iblock, bh_result, create, 1252 F2FS_GET_BLOCK_BMAP, NULL, 1253 NO_CHECK_TYPE); 1254 } 1255 1256 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) 1257 { 1258 return (offset >> inode->i_blkbits); 1259 } 1260 1261 static inline loff_t blk_to_logical(struct inode *inode, sector_t blk) 1262 { 1263 return (blk << inode->i_blkbits); 1264 } 1265 1266 static int f2fs_xattr_fiemap(struct inode *inode, 1267 struct fiemap_extent_info *fieinfo) 1268 { 1269 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1270 struct page *page; 1271 struct node_info ni; 1272 __u64 phys = 0, len; 1273 __u32 flags; 1274 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 1275 int err = 0; 1276 1277 if (f2fs_has_inline_xattr(inode)) { 1278 int offset; 1279 1280 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), 1281 inode->i_ino, false); 1282 if (!page) 1283 return -ENOMEM; 1284 1285 f2fs_get_node_info(sbi, inode->i_ino, &ni); 1286 1287 phys = (__u64)blk_to_logical(inode, ni.blk_addr); 1288 offset = offsetof(struct f2fs_inode, i_addr) + 1289 sizeof(__le32) * (DEF_ADDRS_PER_INODE - 1290 get_inline_xattr_addrs(inode)); 1291 1292 phys += offset; 1293 len = inline_xattr_size(inode); 1294 1295 f2fs_put_page(page, 1); 1296 1297 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; 1298 1299 if (!xnid) 1300 flags |= FIEMAP_EXTENT_LAST; 1301 1302 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); 1303 if (err || err == 1) 1304 return err; 1305 } 1306 1307 if (xnid) { 1308 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false); 1309 if (!page) 1310 return -ENOMEM; 1311 1312 f2fs_get_node_info(sbi, xnid, &ni); 1313 1314 phys = (__u64)blk_to_logical(inode, ni.blk_addr); 1315 len = inode->i_sb->s_blocksize; 1316 1317 f2fs_put_page(page, 1); 1318 1319 flags = FIEMAP_EXTENT_LAST; 1320 } 1321 1322 if (phys) 1323 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); 1324 1325 return (err < 0 ? err : 0); 1326 } 1327 1328 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1329 u64 start, u64 len) 1330 { 1331 struct buffer_head map_bh; 1332 sector_t start_blk, last_blk; 1333 pgoff_t next_pgofs; 1334 u64 logical = 0, phys = 0, size = 0; 1335 u32 flags = 0; 1336 int ret = 0; 1337 1338 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { 1339 ret = f2fs_precache_extents(inode); 1340 if (ret) 1341 return ret; 1342 } 1343 1344 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR); 1345 if (ret) 1346 return ret; 1347 1348 inode_lock(inode); 1349 1350 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 1351 ret = f2fs_xattr_fiemap(inode, fieinfo); 1352 goto out; 1353 } 1354 1355 if (f2fs_has_inline_data(inode)) { 1356 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); 1357 if (ret != -EAGAIN) 1358 goto out; 1359 } 1360 1361 if (logical_to_blk(inode, len) == 0) 1362 len = blk_to_logical(inode, 1); 1363 1364 start_blk = logical_to_blk(inode, start); 1365 last_blk = logical_to_blk(inode, start + len - 1); 1366 1367 next: 1368 memset(&map_bh, 0, sizeof(struct buffer_head)); 1369 map_bh.b_size = len; 1370 1371 ret = get_data_block(inode, start_blk, &map_bh, 0, 1372 F2FS_GET_BLOCK_FIEMAP, &next_pgofs); 1373 if (ret) 1374 goto out; 1375 1376 /* HOLE */ 1377 if (!buffer_mapped(&map_bh)) { 1378 start_blk = next_pgofs; 1379 1380 if (blk_to_logical(inode, start_blk) < blk_to_logical(inode, 1381 F2FS_I_SB(inode)->max_file_blocks)) 1382 goto prep_next; 1383 1384 flags |= FIEMAP_EXTENT_LAST; 1385 } 1386 1387 if (size) { 1388 if (f2fs_encrypted_inode(inode)) 1389 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; 1390 1391 ret = fiemap_fill_next_extent(fieinfo, logical, 1392 phys, size, flags); 1393 } 1394 1395 if (start_blk > last_blk || ret) 1396 goto out; 1397 1398 logical = blk_to_logical(inode, start_blk); 1399 phys = blk_to_logical(inode, map_bh.b_blocknr); 1400 size = map_bh.b_size; 1401 flags = 0; 1402 if (buffer_unwritten(&map_bh)) 1403 flags = FIEMAP_EXTENT_UNWRITTEN; 1404 1405 start_blk += logical_to_blk(inode, size); 1406 1407 prep_next: 1408 cond_resched(); 1409 if (fatal_signal_pending(current)) 1410 ret = -EINTR; 1411 else 1412 goto next; 1413 out: 1414 if (ret == 1) 1415 ret = 0; 1416 1417 inode_unlock(inode); 1418 return ret; 1419 } 1420 1421 /* 1422 * This function was originally taken from fs/mpage.c, and customized for f2fs. 1423 * Major change was from block_size == page_size in f2fs by default. 1424 */ 1425 static int f2fs_mpage_readpages(struct address_space *mapping, 1426 struct list_head *pages, struct page *page, 1427 unsigned nr_pages) 1428 { 1429 struct bio *bio = NULL; 1430 sector_t last_block_in_bio = 0; 1431 struct inode *inode = mapping->host; 1432 const unsigned blkbits = inode->i_blkbits; 1433 const unsigned blocksize = 1 << blkbits; 1434 sector_t block_in_file; 1435 sector_t last_block; 1436 sector_t last_block_in_file; 1437 sector_t block_nr; 1438 struct f2fs_map_blocks map; 1439 1440 map.m_pblk = 0; 1441 map.m_lblk = 0; 1442 map.m_len = 0; 1443 map.m_flags = 0; 1444 map.m_next_pgofs = NULL; 1445 map.m_next_extent = NULL; 1446 map.m_seg_type = NO_CHECK_TYPE; 1447 1448 for (; nr_pages; nr_pages--) { 1449 if (pages) { 1450 page = list_last_entry(pages, struct page, lru); 1451 1452 prefetchw(&page->flags); 1453 list_del(&page->lru); 1454 if (add_to_page_cache_lru(page, mapping, 1455 page->index, 1456 readahead_gfp_mask(mapping))) 1457 goto next_page; 1458 } 1459 1460 block_in_file = (sector_t)page->index; 1461 last_block = block_in_file + nr_pages; 1462 last_block_in_file = (i_size_read(inode) + blocksize - 1) >> 1463 blkbits; 1464 if (last_block > last_block_in_file) 1465 last_block = last_block_in_file; 1466 1467 /* 1468 * Map blocks using the previous result first. 1469 */ 1470 if ((map.m_flags & F2FS_MAP_MAPPED) && 1471 block_in_file > map.m_lblk && 1472 block_in_file < (map.m_lblk + map.m_len)) 1473 goto got_it; 1474 1475 /* 1476 * Then do more f2fs_map_blocks() calls until we are 1477 * done with this page. 1478 */ 1479 map.m_flags = 0; 1480 1481 if (block_in_file < last_block) { 1482 map.m_lblk = block_in_file; 1483 map.m_len = last_block - block_in_file; 1484 1485 if (f2fs_map_blocks(inode, &map, 0, 1486 F2FS_GET_BLOCK_DEFAULT)) 1487 goto set_error_page; 1488 } 1489 got_it: 1490 if ((map.m_flags & F2FS_MAP_MAPPED)) { 1491 block_nr = map.m_pblk + block_in_file - map.m_lblk; 1492 SetPageMappedToDisk(page); 1493 1494 if (!PageUptodate(page) && !cleancache_get_page(page)) { 1495 SetPageUptodate(page); 1496 goto confused; 1497 } 1498 } else { 1499 zero_user_segment(page, 0, PAGE_SIZE); 1500 if (!PageUptodate(page)) 1501 SetPageUptodate(page); 1502 unlock_page(page); 1503 goto next_page; 1504 } 1505 1506 /* 1507 * This page will go to BIO. Do we need to send this 1508 * BIO off first? 1509 */ 1510 if (bio && (last_block_in_bio != block_nr - 1 || 1511 !__same_bdev(F2FS_I_SB(inode), block_nr, bio))) { 1512 submit_and_realloc: 1513 __submit_bio(F2FS_I_SB(inode), bio, DATA); 1514 bio = NULL; 1515 } 1516 if (bio == NULL) { 1517 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages); 1518 if (IS_ERR(bio)) { 1519 bio = NULL; 1520 goto set_error_page; 1521 } 1522 } 1523 1524 if (bio_add_page(bio, page, blocksize, 0) < blocksize) 1525 goto submit_and_realloc; 1526 1527 last_block_in_bio = block_nr; 1528 goto next_page; 1529 set_error_page: 1530 SetPageError(page); 1531 zero_user_segment(page, 0, PAGE_SIZE); 1532 unlock_page(page); 1533 goto next_page; 1534 confused: 1535 if (bio) { 1536 __submit_bio(F2FS_I_SB(inode), bio, DATA); 1537 bio = NULL; 1538 } 1539 unlock_page(page); 1540 next_page: 1541 if (pages) 1542 put_page(page); 1543 } 1544 BUG_ON(pages && !list_empty(pages)); 1545 if (bio) 1546 __submit_bio(F2FS_I_SB(inode), bio, DATA); 1547 return 0; 1548 } 1549 1550 static int f2fs_read_data_page(struct file *file, struct page *page) 1551 { 1552 struct inode *inode = page->mapping->host; 1553 int ret = -EAGAIN; 1554 1555 trace_f2fs_readpage(page, DATA); 1556 1557 /* If the file has inline data, try to read it directly */ 1558 if (f2fs_has_inline_data(inode)) 1559 ret = f2fs_read_inline_data(inode, page); 1560 if (ret == -EAGAIN) 1561 ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1); 1562 return ret; 1563 } 1564 1565 static int f2fs_read_data_pages(struct file *file, 1566 struct address_space *mapping, 1567 struct list_head *pages, unsigned nr_pages) 1568 { 1569 struct inode *inode = mapping->host; 1570 struct page *page = list_last_entry(pages, struct page, lru); 1571 1572 trace_f2fs_readpages(inode, page, nr_pages); 1573 1574 /* If the file has inline data, skip readpages */ 1575 if (f2fs_has_inline_data(inode)) 1576 return 0; 1577 1578 return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); 1579 } 1580 1581 static int encrypt_one_page(struct f2fs_io_info *fio) 1582 { 1583 struct inode *inode = fio->page->mapping->host; 1584 gfp_t gfp_flags = GFP_NOFS; 1585 1586 if (!f2fs_encrypted_file(inode)) 1587 return 0; 1588 1589 /* wait for GCed page writeback via META_MAPPING */ 1590 f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); 1591 1592 retry_encrypt: 1593 fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, 1594 PAGE_SIZE, 0, fio->page->index, gfp_flags); 1595 if (!IS_ERR(fio->encrypted_page)) 1596 return 0; 1597 1598 /* flush pending IOs and wait for a while in the ENOMEM case */ 1599 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { 1600 f2fs_flush_merged_writes(fio->sbi); 1601 congestion_wait(BLK_RW_ASYNC, HZ/50); 1602 gfp_flags |= __GFP_NOFAIL; 1603 goto retry_encrypt; 1604 } 1605 return PTR_ERR(fio->encrypted_page); 1606 } 1607 1608 static inline bool check_inplace_update_policy(struct inode *inode, 1609 struct f2fs_io_info *fio) 1610 { 1611 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1612 unsigned int policy = SM_I(sbi)->ipu_policy; 1613 1614 if (policy & (0x1 << F2FS_IPU_FORCE)) 1615 return true; 1616 if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) 1617 return true; 1618 if (policy & (0x1 << F2FS_IPU_UTIL) && 1619 utilization(sbi) > SM_I(sbi)->min_ipu_util) 1620 return true; 1621 if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) && 1622 utilization(sbi) > SM_I(sbi)->min_ipu_util) 1623 return true; 1624 1625 /* 1626 * IPU for rewrite async pages 1627 */ 1628 if (policy & (0x1 << F2FS_IPU_ASYNC) && 1629 fio && fio->op == REQ_OP_WRITE && 1630 !(fio->op_flags & REQ_SYNC) && 1631 !f2fs_encrypted_inode(inode)) 1632 return true; 1633 1634 /* this is only set during fdatasync */ 1635 if (policy & (0x1 << F2FS_IPU_FSYNC) && 1636 is_inode_flag_set(inode, FI_NEED_IPU)) 1637 return true; 1638 1639 return false; 1640 } 1641 1642 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) 1643 { 1644 if (f2fs_is_pinned_file(inode)) 1645 return true; 1646 1647 /* if this is cold file, we should overwrite to avoid fragmentation */ 1648 if (file_is_cold(inode)) 1649 return true; 1650 1651 return check_inplace_update_policy(inode, fio); 1652 } 1653 1654 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) 1655 { 1656 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1657 1658 if (test_opt(sbi, LFS)) 1659 return true; 1660 if (S_ISDIR(inode->i_mode)) 1661 return true; 1662 if (f2fs_is_atomic_file(inode)) 1663 return true; 1664 if (fio) { 1665 if (is_cold_data(fio->page)) 1666 return true; 1667 if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) 1668 return true; 1669 } 1670 return false; 1671 } 1672 1673 static inline bool need_inplace_update(struct f2fs_io_info *fio) 1674 { 1675 struct inode *inode = fio->page->mapping->host; 1676 1677 if (f2fs_should_update_outplace(inode, fio)) 1678 return false; 1679 1680 return f2fs_should_update_inplace(inode, fio); 1681 } 1682 1683 int f2fs_do_write_data_page(struct f2fs_io_info *fio) 1684 { 1685 struct page *page = fio->page; 1686 struct inode *inode = page->mapping->host; 1687 struct dnode_of_data dn; 1688 struct extent_info ei = {0,0,0}; 1689 bool ipu_force = false; 1690 int err = 0; 1691 1692 set_new_dnode(&dn, inode, NULL, NULL, 0); 1693 if (need_inplace_update(fio) && 1694 f2fs_lookup_extent_cache(inode, page->index, &ei)) { 1695 fio->old_blkaddr = ei.blk + page->index - ei.fofs; 1696 1697 if (is_valid_blkaddr(fio->old_blkaddr)) { 1698 ipu_force = true; 1699 fio->need_lock = LOCK_DONE; 1700 goto got_it; 1701 } 1702 } 1703 1704 /* Deadlock due to between page->lock and f2fs_lock_op */ 1705 if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) 1706 return -EAGAIN; 1707 1708 err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); 1709 if (err) 1710 goto out; 1711 1712 fio->old_blkaddr = dn.data_blkaddr; 1713 1714 /* This page is already truncated */ 1715 if (fio->old_blkaddr == NULL_ADDR) { 1716 ClearPageUptodate(page); 1717 goto out_writepage; 1718 } 1719 got_it: 1720 /* 1721 * If current allocation needs SSR, 1722 * it had better in-place writes for updated data. 1723 */ 1724 if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) && 1725 need_inplace_update(fio))) { 1726 err = encrypt_one_page(fio); 1727 if (err) 1728 goto out_writepage; 1729 1730 set_page_writeback(page); 1731 ClearPageError(page); 1732 f2fs_put_dnode(&dn); 1733 if (fio->need_lock == LOCK_REQ) 1734 f2fs_unlock_op(fio->sbi); 1735 err = f2fs_inplace_write_data(fio); 1736 trace_f2fs_do_write_data_page(fio->page, IPU); 1737 set_inode_flag(inode, FI_UPDATE_WRITE); 1738 return err; 1739 } 1740 1741 if (fio->need_lock == LOCK_RETRY) { 1742 if (!f2fs_trylock_op(fio->sbi)) { 1743 err = -EAGAIN; 1744 goto out_writepage; 1745 } 1746 fio->need_lock = LOCK_REQ; 1747 } 1748 1749 err = encrypt_one_page(fio); 1750 if (err) 1751 goto out_writepage; 1752 1753 set_page_writeback(page); 1754 ClearPageError(page); 1755 1756 /* LFS mode write path */ 1757 f2fs_outplace_write_data(&dn, fio); 1758 trace_f2fs_do_write_data_page(page, OPU); 1759 set_inode_flag(inode, FI_APPEND_WRITE); 1760 if (page->index == 0) 1761 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); 1762 out_writepage: 1763 f2fs_put_dnode(&dn); 1764 out: 1765 if (fio->need_lock == LOCK_REQ) 1766 f2fs_unlock_op(fio->sbi); 1767 return err; 1768 } 1769 1770 static int __write_data_page(struct page *page, bool *submitted, 1771 struct writeback_control *wbc, 1772 enum iostat_type io_type) 1773 { 1774 struct inode *inode = page->mapping->host; 1775 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1776 loff_t i_size = i_size_read(inode); 1777 const pgoff_t end_index = ((unsigned long long) i_size) 1778 >> PAGE_SHIFT; 1779 loff_t psize = (page->index + 1) << PAGE_SHIFT; 1780 unsigned offset = 0; 1781 bool need_balance_fs = false; 1782 int err = 0; 1783 struct f2fs_io_info fio = { 1784 .sbi = sbi, 1785 .ino = inode->i_ino, 1786 .type = DATA, 1787 .op = REQ_OP_WRITE, 1788 .op_flags = wbc_to_write_flags(wbc), 1789 .old_blkaddr = NULL_ADDR, 1790 .page = page, 1791 .encrypted_page = NULL, 1792 .submitted = false, 1793 .need_lock = LOCK_RETRY, 1794 .io_type = io_type, 1795 .io_wbc = wbc, 1796 }; 1797 1798 trace_f2fs_writepage(page, DATA); 1799 1800 /* we should bypass data pages to proceed the kworkder jobs */ 1801 if (unlikely(f2fs_cp_error(sbi))) { 1802 mapping_set_error(page->mapping, -EIO); 1803 /* 1804 * don't drop any dirty dentry pages for keeping lastest 1805 * directory structure. 1806 */ 1807 if (S_ISDIR(inode->i_mode)) 1808 goto redirty_out; 1809 goto out; 1810 } 1811 1812 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 1813 goto redirty_out; 1814 1815 if (page->index < end_index) 1816 goto write; 1817 1818 /* 1819 * If the offset is out-of-range of file size, 1820 * this page does not have to be written to disk. 1821 */ 1822 offset = i_size & (PAGE_SIZE - 1); 1823 if ((page->index >= end_index + 1) || !offset) 1824 goto out; 1825 1826 zero_user_segment(page, offset, PAGE_SIZE); 1827 write: 1828 if (f2fs_is_drop_cache(inode)) 1829 goto out; 1830 /* we should not write 0'th page having journal header */ 1831 if (f2fs_is_volatile_file(inode) && (!page->index || 1832 (!wbc->for_reclaim && 1833 f2fs_available_free_memory(sbi, BASE_CHECK)))) 1834 goto redirty_out; 1835 1836 /* Dentry blocks are controlled by checkpoint */ 1837 if (S_ISDIR(inode->i_mode)) { 1838 fio.need_lock = LOCK_DONE; 1839 err = f2fs_do_write_data_page(&fio); 1840 goto done; 1841 } 1842 1843 if (!wbc->for_reclaim) 1844 need_balance_fs = true; 1845 else if (has_not_enough_free_secs(sbi, 0, 0)) 1846 goto redirty_out; 1847 else 1848 set_inode_flag(inode, FI_HOT_DATA); 1849 1850 err = -EAGAIN; 1851 if (f2fs_has_inline_data(inode)) { 1852 err = f2fs_write_inline_data(inode, page); 1853 if (!err) 1854 goto out; 1855 } 1856 1857 if (err == -EAGAIN) { 1858 err = f2fs_do_write_data_page(&fio); 1859 if (err == -EAGAIN) { 1860 fio.need_lock = LOCK_REQ; 1861 err = f2fs_do_write_data_page(&fio); 1862 } 1863 } 1864 1865 if (err) { 1866 file_set_keep_isize(inode); 1867 } else { 1868 down_write(&F2FS_I(inode)->i_sem); 1869 if (F2FS_I(inode)->last_disk_size < psize) 1870 F2FS_I(inode)->last_disk_size = psize; 1871 up_write(&F2FS_I(inode)->i_sem); 1872 } 1873 1874 done: 1875 if (err && err != -ENOENT) 1876 goto redirty_out; 1877 1878 out: 1879 inode_dec_dirty_pages(inode); 1880 if (err) 1881 ClearPageUptodate(page); 1882 1883 if (wbc->for_reclaim) { 1884 f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA); 1885 clear_inode_flag(inode, FI_HOT_DATA); 1886 f2fs_remove_dirty_inode(inode); 1887 submitted = NULL; 1888 } 1889 1890 unlock_page(page); 1891 if (!S_ISDIR(inode->i_mode)) 1892 f2fs_balance_fs(sbi, need_balance_fs); 1893 1894 if (unlikely(f2fs_cp_error(sbi))) { 1895 f2fs_submit_merged_write(sbi, DATA); 1896 submitted = NULL; 1897 } 1898 1899 if (submitted) 1900 *submitted = fio.submitted; 1901 1902 return 0; 1903 1904 redirty_out: 1905 redirty_page_for_writepage(wbc, page); 1906 /* 1907 * pageout() in MM traslates EAGAIN, so calls handle_write_error() 1908 * -> mapping_set_error() -> set_bit(AS_EIO, ...). 1909 * file_write_and_wait_range() will see EIO error, which is critical 1910 * to return value of fsync() followed by atomic_write failure to user. 1911 */ 1912 if (!err || wbc->for_reclaim) 1913 return AOP_WRITEPAGE_ACTIVATE; 1914 unlock_page(page); 1915 return err; 1916 } 1917 1918 static int f2fs_write_data_page(struct page *page, 1919 struct writeback_control *wbc) 1920 { 1921 return __write_data_page(page, NULL, wbc, FS_DATA_IO); 1922 } 1923 1924 /* 1925 * This function was copied from write_cche_pages from mm/page-writeback.c. 1926 * The major change is making write step of cold data page separately from 1927 * warm/hot data page. 1928 */ 1929 static int f2fs_write_cache_pages(struct address_space *mapping, 1930 struct writeback_control *wbc, 1931 enum iostat_type io_type) 1932 { 1933 int ret = 0; 1934 int done = 0; 1935 struct pagevec pvec; 1936 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 1937 int nr_pages; 1938 pgoff_t uninitialized_var(writeback_index); 1939 pgoff_t index; 1940 pgoff_t end; /* Inclusive */ 1941 pgoff_t done_index; 1942 pgoff_t last_idx = ULONG_MAX; 1943 int cycled; 1944 int range_whole = 0; 1945 int tag; 1946 1947 pagevec_init(&pvec); 1948 1949 if (get_dirty_pages(mapping->host) <= 1950 SM_I(F2FS_M_SB(mapping))->min_hot_blocks) 1951 set_inode_flag(mapping->host, FI_HOT_DATA); 1952 else 1953 clear_inode_flag(mapping->host, FI_HOT_DATA); 1954 1955 if (wbc->range_cyclic) { 1956 writeback_index = mapping->writeback_index; /* prev offset */ 1957 index = writeback_index; 1958 if (index == 0) 1959 cycled = 1; 1960 else 1961 cycled = 0; 1962 end = -1; 1963 } else { 1964 index = wbc->range_start >> PAGE_SHIFT; 1965 end = wbc->range_end >> PAGE_SHIFT; 1966 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 1967 range_whole = 1; 1968 cycled = 1; /* ignore range_cyclic tests */ 1969 } 1970 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 1971 tag = PAGECACHE_TAG_TOWRITE; 1972 else 1973 tag = PAGECACHE_TAG_DIRTY; 1974 retry: 1975 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 1976 tag_pages_for_writeback(mapping, index, end); 1977 done_index = index; 1978 while (!done && (index <= end)) { 1979 int i; 1980 1981 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, 1982 tag); 1983 if (nr_pages == 0) 1984 break; 1985 1986 for (i = 0; i < nr_pages; i++) { 1987 struct page *page = pvec.pages[i]; 1988 bool submitted = false; 1989 1990 /* give a priority to WB_SYNC threads */ 1991 if (atomic_read(&sbi->wb_sync_req[DATA]) && 1992 wbc->sync_mode == WB_SYNC_NONE) { 1993 done = 1; 1994 break; 1995 } 1996 1997 done_index = page->index; 1998 retry_write: 1999 lock_page(page); 2000 2001 if (unlikely(page->mapping != mapping)) { 2002 continue_unlock: 2003 unlock_page(page); 2004 continue; 2005 } 2006 2007 if (!PageDirty(page)) { 2008 /* someone wrote it for us */ 2009 goto continue_unlock; 2010 } 2011 2012 if (PageWriteback(page)) { 2013 if (wbc->sync_mode != WB_SYNC_NONE) 2014 f2fs_wait_on_page_writeback(page, 2015 DATA, true); 2016 else 2017 goto continue_unlock; 2018 } 2019 2020 BUG_ON(PageWriteback(page)); 2021 if (!clear_page_dirty_for_io(page)) 2022 goto continue_unlock; 2023 2024 ret = __write_data_page(page, &submitted, wbc, io_type); 2025 if (unlikely(ret)) { 2026 /* 2027 * keep nr_to_write, since vfs uses this to 2028 * get # of written pages. 2029 */ 2030 if (ret == AOP_WRITEPAGE_ACTIVATE) { 2031 unlock_page(page); 2032 ret = 0; 2033 continue; 2034 } else if (ret == -EAGAIN) { 2035 ret = 0; 2036 if (wbc->sync_mode == WB_SYNC_ALL) { 2037 cond_resched(); 2038 congestion_wait(BLK_RW_ASYNC, 2039 HZ/50); 2040 goto retry_write; 2041 } 2042 continue; 2043 } 2044 done_index = page->index + 1; 2045 done = 1; 2046 break; 2047 } else if (submitted) { 2048 last_idx = page->index; 2049 } 2050 2051 if (--wbc->nr_to_write <= 0 && 2052 wbc->sync_mode == WB_SYNC_NONE) { 2053 done = 1; 2054 break; 2055 } 2056 } 2057 pagevec_release(&pvec); 2058 cond_resched(); 2059 } 2060 2061 if (!cycled && !done) { 2062 cycled = 1; 2063 index = 0; 2064 end = writeback_index - 1; 2065 goto retry; 2066 } 2067 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 2068 mapping->writeback_index = done_index; 2069 2070 if (last_idx != ULONG_MAX) 2071 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, 2072 0, last_idx, DATA); 2073 2074 return ret; 2075 } 2076 2077 static int __f2fs_write_data_pages(struct address_space *mapping, 2078 struct writeback_control *wbc, 2079 enum iostat_type io_type) 2080 { 2081 struct inode *inode = mapping->host; 2082 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2083 struct blk_plug plug; 2084 int ret; 2085 2086 /* deal with chardevs and other special file */ 2087 if (!mapping->a_ops->writepage) 2088 return 0; 2089 2090 /* skip writing if there is no dirty page in this inode */ 2091 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) 2092 return 0; 2093 2094 /* during POR, we don't need to trigger writepage at all. */ 2095 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 2096 goto skip_write; 2097 2098 if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && 2099 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && 2100 f2fs_available_free_memory(sbi, DIRTY_DENTS)) 2101 goto skip_write; 2102 2103 /* skip writing during file defragment */ 2104 if (is_inode_flag_set(inode, FI_DO_DEFRAG)) 2105 goto skip_write; 2106 2107 trace_f2fs_writepages(mapping->host, wbc, DATA); 2108 2109 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ 2110 if (wbc->sync_mode == WB_SYNC_ALL) 2111 atomic_inc(&sbi->wb_sync_req[DATA]); 2112 else if (atomic_read(&sbi->wb_sync_req[DATA])) 2113 goto skip_write; 2114 2115 blk_start_plug(&plug); 2116 ret = f2fs_write_cache_pages(mapping, wbc, io_type); 2117 blk_finish_plug(&plug); 2118 2119 if (wbc->sync_mode == WB_SYNC_ALL) 2120 atomic_dec(&sbi->wb_sync_req[DATA]); 2121 /* 2122 * if some pages were truncated, we cannot guarantee its mapping->host 2123 * to detect pending bios. 2124 */ 2125 2126 f2fs_remove_dirty_inode(inode); 2127 return ret; 2128 2129 skip_write: 2130 wbc->pages_skipped += get_dirty_pages(inode); 2131 trace_f2fs_writepages(mapping->host, wbc, DATA); 2132 return 0; 2133 } 2134 2135 static int f2fs_write_data_pages(struct address_space *mapping, 2136 struct writeback_control *wbc) 2137 { 2138 struct inode *inode = mapping->host; 2139 2140 return __f2fs_write_data_pages(mapping, wbc, 2141 F2FS_I(inode)->cp_task == current ? 2142 FS_CP_DATA_IO : FS_DATA_IO); 2143 } 2144 2145 static void f2fs_write_failed(struct address_space *mapping, loff_t to) 2146 { 2147 struct inode *inode = mapping->host; 2148 loff_t i_size = i_size_read(inode); 2149 2150 if (to > i_size) { 2151 down_write(&F2FS_I(inode)->i_mmap_sem); 2152 truncate_pagecache(inode, i_size); 2153 f2fs_truncate_blocks(inode, i_size, true); 2154 up_write(&F2FS_I(inode)->i_mmap_sem); 2155 } 2156 } 2157 2158 static int prepare_write_begin(struct f2fs_sb_info *sbi, 2159 struct page *page, loff_t pos, unsigned len, 2160 block_t *blk_addr, bool *node_changed) 2161 { 2162 struct inode *inode = page->mapping->host; 2163 pgoff_t index = page->index; 2164 struct dnode_of_data dn; 2165 struct page *ipage; 2166 bool locked = false; 2167 struct extent_info ei = {0,0,0}; 2168 int err = 0; 2169 2170 /* 2171 * we already allocated all the blocks, so we don't need to get 2172 * the block addresses when there is no need to fill the page. 2173 */ 2174 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && 2175 !is_inode_flag_set(inode, FI_NO_PREALLOC)) 2176 return 0; 2177 2178 if (f2fs_has_inline_data(inode) || 2179 (pos & PAGE_MASK) >= i_size_read(inode)) { 2180 __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); 2181 locked = true; 2182 } 2183 restart: 2184 /* check inline_data */ 2185 ipage = f2fs_get_node_page(sbi, inode->i_ino); 2186 if (IS_ERR(ipage)) { 2187 err = PTR_ERR(ipage); 2188 goto unlock_out; 2189 } 2190 2191 set_new_dnode(&dn, inode, ipage, ipage, 0); 2192 2193 if (f2fs_has_inline_data(inode)) { 2194 if (pos + len <= MAX_INLINE_DATA(inode)) { 2195 f2fs_do_read_inline_data(page, ipage); 2196 set_inode_flag(inode, FI_DATA_EXIST); 2197 if (inode->i_nlink) 2198 set_inline_node(ipage); 2199 } else { 2200 err = f2fs_convert_inline_page(&dn, page); 2201 if (err) 2202 goto out; 2203 if (dn.data_blkaddr == NULL_ADDR) 2204 err = f2fs_get_block(&dn, index); 2205 } 2206 } else if (locked) { 2207 err = f2fs_get_block(&dn, index); 2208 } else { 2209 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 2210 dn.data_blkaddr = ei.blk + index - ei.fofs; 2211 } else { 2212 /* hole case */ 2213 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 2214 if (err || dn.data_blkaddr == NULL_ADDR) { 2215 f2fs_put_dnode(&dn); 2216 __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, 2217 true); 2218 locked = true; 2219 goto restart; 2220 } 2221 } 2222 } 2223 2224 /* convert_inline_page can make node_changed */ 2225 *blk_addr = dn.data_blkaddr; 2226 *node_changed = dn.node_changed; 2227 out: 2228 f2fs_put_dnode(&dn); 2229 unlock_out: 2230 if (locked) 2231 __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); 2232 return err; 2233 } 2234 2235 static int f2fs_write_begin(struct file *file, struct address_space *mapping, 2236 loff_t pos, unsigned len, unsigned flags, 2237 struct page **pagep, void **fsdata) 2238 { 2239 struct inode *inode = mapping->host; 2240 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2241 struct page *page = NULL; 2242 pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; 2243 bool need_balance = false, drop_atomic = false; 2244 block_t blkaddr = NULL_ADDR; 2245 int err = 0; 2246 2247 trace_f2fs_write_begin(inode, pos, len, flags); 2248 2249 if (f2fs_is_atomic_file(inode) && 2250 !f2fs_available_free_memory(sbi, INMEM_PAGES)) { 2251 err = -ENOMEM; 2252 drop_atomic = true; 2253 goto fail; 2254 } 2255 2256 /* 2257 * We should check this at this moment to avoid deadlock on inode page 2258 * and #0 page. The locking rule for inline_data conversion should be: 2259 * lock_page(page #0) -> lock_page(inode_page) 2260 */ 2261 if (index != 0) { 2262 err = f2fs_convert_inline_inode(inode); 2263 if (err) 2264 goto fail; 2265 } 2266 repeat: 2267 /* 2268 * Do not use grab_cache_page_write_begin() to avoid deadlock due to 2269 * wait_for_stable_page. Will wait that below with our IO control. 2270 */ 2271 page = f2fs_pagecache_get_page(mapping, index, 2272 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); 2273 if (!page) { 2274 err = -ENOMEM; 2275 goto fail; 2276 } 2277 2278 *pagep = page; 2279 2280 err = prepare_write_begin(sbi, page, pos, len, 2281 &blkaddr, &need_balance); 2282 if (err) 2283 goto fail; 2284 2285 if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) { 2286 unlock_page(page); 2287 f2fs_balance_fs(sbi, true); 2288 lock_page(page); 2289 if (page->mapping != mapping) { 2290 /* The page got truncated from under us */ 2291 f2fs_put_page(page, 1); 2292 goto repeat; 2293 } 2294 } 2295 2296 f2fs_wait_on_page_writeback(page, DATA, false); 2297 2298 /* wait for GCed page writeback via META_MAPPING */ 2299 if (f2fs_post_read_required(inode)) 2300 f2fs_wait_on_block_writeback(sbi, blkaddr); 2301 2302 if (len == PAGE_SIZE || PageUptodate(page)) 2303 return 0; 2304 2305 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) { 2306 zero_user_segment(page, len, PAGE_SIZE); 2307 return 0; 2308 } 2309 2310 if (blkaddr == NEW_ADDR) { 2311 zero_user_segment(page, 0, PAGE_SIZE); 2312 SetPageUptodate(page); 2313 } else { 2314 err = f2fs_submit_page_read(inode, page, blkaddr); 2315 if (err) 2316 goto fail; 2317 2318 lock_page(page); 2319 if (unlikely(page->mapping != mapping)) { 2320 f2fs_put_page(page, 1); 2321 goto repeat; 2322 } 2323 if (unlikely(!PageUptodate(page))) { 2324 err = -EIO; 2325 goto fail; 2326 } 2327 } 2328 return 0; 2329 2330 fail: 2331 f2fs_put_page(page, 1); 2332 f2fs_write_failed(mapping, pos + len); 2333 if (drop_atomic) 2334 f2fs_drop_inmem_pages_all(sbi, false); 2335 return err; 2336 } 2337 2338 static int f2fs_write_end(struct file *file, 2339 struct address_space *mapping, 2340 loff_t pos, unsigned len, unsigned copied, 2341 struct page *page, void *fsdata) 2342 { 2343 struct inode *inode = page->mapping->host; 2344 2345 trace_f2fs_write_end(inode, pos, len, copied); 2346 2347 /* 2348 * This should be come from len == PAGE_SIZE, and we expect copied 2349 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and 2350 * let generic_perform_write() try to copy data again through copied=0. 2351 */ 2352 if (!PageUptodate(page)) { 2353 if (unlikely(copied != len)) 2354 copied = 0; 2355 else 2356 SetPageUptodate(page); 2357 } 2358 if (!copied) 2359 goto unlock_out; 2360 2361 set_page_dirty(page); 2362 2363 if (pos + copied > i_size_read(inode)) 2364 f2fs_i_size_write(inode, pos + copied); 2365 unlock_out: 2366 f2fs_put_page(page, 1); 2367 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 2368 return copied; 2369 } 2370 2371 static int check_direct_IO(struct inode *inode, struct iov_iter *iter, 2372 loff_t offset) 2373 { 2374 unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; 2375 2376 if (offset & blocksize_mask) 2377 return -EINVAL; 2378 2379 if (iov_iter_alignment(iter) & blocksize_mask) 2380 return -EINVAL; 2381 2382 return 0; 2383 } 2384 2385 static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 2386 { 2387 struct address_space *mapping = iocb->ki_filp->f_mapping; 2388 struct inode *inode = mapping->host; 2389 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2390 size_t count = iov_iter_count(iter); 2391 loff_t offset = iocb->ki_pos; 2392 int rw = iov_iter_rw(iter); 2393 int err; 2394 enum rw_hint hint = iocb->ki_hint; 2395 int whint_mode = F2FS_OPTION(sbi).whint_mode; 2396 2397 err = check_direct_IO(inode, iter, offset); 2398 if (err) 2399 return err; 2400 2401 if (f2fs_force_buffered_io(inode, rw)) 2402 return 0; 2403 2404 trace_f2fs_direct_IO_enter(inode, offset, count, rw); 2405 2406 if (rw == WRITE && whint_mode == WHINT_MODE_OFF) 2407 iocb->ki_hint = WRITE_LIFE_NOT_SET; 2408 2409 if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) { 2410 if (iocb->ki_flags & IOCB_NOWAIT) { 2411 iocb->ki_hint = hint; 2412 err = -EAGAIN; 2413 goto out; 2414 } 2415 down_read(&F2FS_I(inode)->i_gc_rwsem[rw]); 2416 } 2417 2418 err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); 2419 up_read(&F2FS_I(inode)->i_gc_rwsem[rw]); 2420 2421 if (rw == WRITE) { 2422 if (whint_mode == WHINT_MODE_OFF) 2423 iocb->ki_hint = hint; 2424 if (err > 0) { 2425 f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, 2426 err); 2427 set_inode_flag(inode, FI_UPDATE_WRITE); 2428 } else if (err < 0) { 2429 f2fs_write_failed(mapping, offset + count); 2430 } 2431 } 2432 2433 out: 2434 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); 2435 2436 return err; 2437 } 2438 2439 void f2fs_invalidate_page(struct page *page, unsigned int offset, 2440 unsigned int length) 2441 { 2442 struct inode *inode = page->mapping->host; 2443 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2444 2445 if (inode->i_ino >= F2FS_ROOT_INO(sbi) && 2446 (offset % PAGE_SIZE || length != PAGE_SIZE)) 2447 return; 2448 2449 if (PageDirty(page)) { 2450 if (inode->i_ino == F2FS_META_INO(sbi)) { 2451 dec_page_count(sbi, F2FS_DIRTY_META); 2452 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) { 2453 dec_page_count(sbi, F2FS_DIRTY_NODES); 2454 } else { 2455 inode_dec_dirty_pages(inode); 2456 f2fs_remove_dirty_inode(inode); 2457 } 2458 } 2459 2460 /* This is atomic written page, keep Private */ 2461 if (IS_ATOMIC_WRITTEN_PAGE(page)) 2462 return f2fs_drop_inmem_page(inode, page); 2463 2464 set_page_private(page, 0); 2465 ClearPagePrivate(page); 2466 } 2467 2468 int f2fs_release_page(struct page *page, gfp_t wait) 2469 { 2470 /* If this is dirty page, keep PagePrivate */ 2471 if (PageDirty(page)) 2472 return 0; 2473 2474 /* This is atomic written page, keep Private */ 2475 if (IS_ATOMIC_WRITTEN_PAGE(page)) 2476 return 0; 2477 2478 set_page_private(page, 0); 2479 ClearPagePrivate(page); 2480 return 1; 2481 } 2482 2483 static int f2fs_set_data_page_dirty(struct page *page) 2484 { 2485 struct address_space *mapping = page->mapping; 2486 struct inode *inode = mapping->host; 2487 2488 trace_f2fs_set_page_dirty(page, DATA); 2489 2490 if (!PageUptodate(page)) 2491 SetPageUptodate(page); 2492 2493 if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { 2494 if (!IS_ATOMIC_WRITTEN_PAGE(page)) { 2495 f2fs_register_inmem_page(inode, page); 2496 return 1; 2497 } 2498 /* 2499 * Previously, this page has been registered, we just 2500 * return here. 2501 */ 2502 return 0; 2503 } 2504 2505 if (!PageDirty(page)) { 2506 __set_page_dirty_nobuffers(page); 2507 f2fs_update_dirty_page(inode, page); 2508 return 1; 2509 } 2510 return 0; 2511 } 2512 2513 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 2514 { 2515 struct inode *inode = mapping->host; 2516 2517 if (f2fs_has_inline_data(inode)) 2518 return 0; 2519 2520 /* make sure allocating whole blocks */ 2521 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 2522 filemap_write_and_wait(mapping); 2523 2524 return generic_block_bmap(mapping, block, get_data_block_bmap); 2525 } 2526 2527 #ifdef CONFIG_MIGRATION 2528 #include <linux/migrate.h> 2529 2530 int f2fs_migrate_page(struct address_space *mapping, 2531 struct page *newpage, struct page *page, enum migrate_mode mode) 2532 { 2533 int rc, extra_count; 2534 struct f2fs_inode_info *fi = F2FS_I(mapping->host); 2535 bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page); 2536 2537 BUG_ON(PageWriteback(page)); 2538 2539 /* migrating an atomic written page is safe with the inmem_lock hold */ 2540 if (atomic_written) { 2541 if (mode != MIGRATE_SYNC) 2542 return -EBUSY; 2543 if (!mutex_trylock(&fi->inmem_lock)) 2544 return -EAGAIN; 2545 } 2546 2547 /* 2548 * A reference is expected if PagePrivate set when move mapping, 2549 * however F2FS breaks this for maintaining dirty page counts when 2550 * truncating pages. So here adjusting the 'extra_count' make it work. 2551 */ 2552 extra_count = (atomic_written ? 1 : 0) - page_has_private(page); 2553 rc = migrate_page_move_mapping(mapping, newpage, 2554 page, NULL, mode, extra_count); 2555 if (rc != MIGRATEPAGE_SUCCESS) { 2556 if (atomic_written) 2557 mutex_unlock(&fi->inmem_lock); 2558 return rc; 2559 } 2560 2561 if (atomic_written) { 2562 struct inmem_pages *cur; 2563 list_for_each_entry(cur, &fi->inmem_pages, list) 2564 if (cur->page == page) { 2565 cur->page = newpage; 2566 break; 2567 } 2568 mutex_unlock(&fi->inmem_lock); 2569 put_page(page); 2570 get_page(newpage); 2571 } 2572 2573 if (PagePrivate(page)) 2574 SetPagePrivate(newpage); 2575 set_page_private(newpage, page_private(page)); 2576 2577 if (mode != MIGRATE_SYNC_NO_COPY) 2578 migrate_page_copy(newpage, page); 2579 else 2580 migrate_page_states(newpage, page); 2581 2582 return MIGRATEPAGE_SUCCESS; 2583 } 2584 #endif 2585 2586 const struct address_space_operations f2fs_dblock_aops = { 2587 .readpage = f2fs_read_data_page, 2588 .readpages = f2fs_read_data_pages, 2589 .writepage = f2fs_write_data_page, 2590 .writepages = f2fs_write_data_pages, 2591 .write_begin = f2fs_write_begin, 2592 .write_end = f2fs_write_end, 2593 .set_page_dirty = f2fs_set_data_page_dirty, 2594 .invalidatepage = f2fs_invalidate_page, 2595 .releasepage = f2fs_release_page, 2596 .direct_IO = f2fs_direct_IO, 2597 .bmap = f2fs_bmap, 2598 #ifdef CONFIG_MIGRATION 2599 .migratepage = f2fs_migrate_page, 2600 #endif 2601 }; 2602 2603 void f2fs_clear_radix_tree_dirty_tag(struct page *page) 2604 { 2605 struct address_space *mapping = page_mapping(page); 2606 unsigned long flags; 2607 2608 xa_lock_irqsave(&mapping->i_pages, flags); 2609 radix_tree_tag_clear(&mapping->i_pages, page_index(page), 2610 PAGECACHE_TAG_DIRTY); 2611 xa_unlock_irqrestore(&mapping->i_pages, flags); 2612 } 2613 2614 int __init f2fs_init_post_read_processing(void) 2615 { 2616 bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0); 2617 if (!bio_post_read_ctx_cache) 2618 goto fail; 2619 bio_post_read_ctx_pool = 2620 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, 2621 bio_post_read_ctx_cache); 2622 if (!bio_post_read_ctx_pool) 2623 goto fail_free_cache; 2624 return 0; 2625 2626 fail_free_cache: 2627 kmem_cache_destroy(bio_post_read_ctx_cache); 2628 fail: 2629 return -ENOMEM; 2630 } 2631 2632 void __exit f2fs_destroy_post_read_processing(void) 2633 { 2634 mempool_destroy(bio_post_read_ctx_pool); 2635 kmem_cache_destroy(bio_post_read_ctx_cache); 2636 } 2637