1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/data.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/f2fs_fs.h> 10 #include <linux/buffer_head.h> 11 #include <linux/sched/mm.h> 12 #include <linux/mpage.h> 13 #include <linux/writeback.h> 14 #include <linux/pagevec.h> 15 #include <linux/blkdev.h> 16 #include <linux/bio.h> 17 #include <linux/blk-crypto.h> 18 #include <linux/swap.h> 19 #include <linux/prefetch.h> 20 #include <linux/uio.h> 21 #include <linux/sched/signal.h> 22 #include <linux/fiemap.h> 23 #include <linux/iomap.h> 24 25 #include "f2fs.h" 26 #include "node.h" 27 #include "segment.h" 28 #include "iostat.h" 29 #include <trace/events/f2fs.h> 30 31 #define NUM_PREALLOC_POST_READ_CTXS 128 32 33 static struct kmem_cache *bio_post_read_ctx_cache; 34 static struct kmem_cache *bio_entry_slab; 35 static mempool_t *bio_post_read_ctx_pool; 36 static struct bio_set f2fs_bioset; 37 38 #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE 39 40 int __init f2fs_init_bioset(void) 41 { 42 if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE, 43 0, BIOSET_NEED_BVECS)) 44 return -ENOMEM; 45 return 0; 46 } 47 48 void f2fs_destroy_bioset(void) 49 { 50 bioset_exit(&f2fs_bioset); 51 } 52 53 static bool __is_cp_guaranteed(struct page *page) 54 { 55 struct address_space *mapping = page->mapping; 56 struct inode *inode; 57 struct f2fs_sb_info *sbi; 58 59 if (!mapping) 60 return false; 61 62 inode = mapping->host; 63 sbi = F2FS_I_SB(inode); 64 65 if (inode->i_ino == F2FS_META_INO(sbi) || 66 inode->i_ino == F2FS_NODE_INO(sbi) || 67 S_ISDIR(inode->i_mode)) 68 return true; 69 70 if (f2fs_is_compressed_page(page)) 71 return false; 72 if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || 73 page_private_gcing(page)) 74 return true; 75 return false; 76 } 77 78 static enum count_type __read_io_type(struct page *page) 79 { 80 struct address_space *mapping = page_file_mapping(page); 81 82 if (mapping) { 83 struct inode *inode = mapping->host; 84 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 85 86 if (inode->i_ino == F2FS_META_INO(sbi)) 87 return F2FS_RD_META; 88 89 if (inode->i_ino == F2FS_NODE_INO(sbi)) 90 return F2FS_RD_NODE; 91 } 92 return F2FS_RD_DATA; 93 } 94 95 /* postprocessing steps for read bios */ 96 enum bio_post_read_step { 97 #ifdef CONFIG_FS_ENCRYPTION 98 STEP_DECRYPT = 1 << 0, 99 #else 100 STEP_DECRYPT = 0, /* compile out the decryption-related code */ 101 #endif 102 #ifdef CONFIG_F2FS_FS_COMPRESSION 103 STEP_DECOMPRESS = 1 << 1, 104 #else 105 STEP_DECOMPRESS = 0, /* compile out the decompression-related code */ 106 #endif 107 #ifdef CONFIG_FS_VERITY 108 STEP_VERITY = 1 << 2, 109 #else 110 STEP_VERITY = 0, /* compile out the verity-related code */ 111 #endif 112 }; 113 114 struct bio_post_read_ctx { 115 struct bio *bio; 116 struct f2fs_sb_info *sbi; 117 struct work_struct work; 118 unsigned int enabled_steps; 119 block_t fs_blkaddr; 120 }; 121 122 static void f2fs_finish_read_bio(struct bio *bio) 123 { 124 struct bio_vec *bv; 125 struct bvec_iter_all iter_all; 126 127 /* 128 * Update and unlock the bio's pagecache pages, and put the 129 * decompression context for any compressed pages. 130 */ 131 bio_for_each_segment_all(bv, bio, iter_all) { 132 struct page *page = bv->bv_page; 133 134 if (f2fs_is_compressed_page(page)) { 135 if (bio->bi_status) 136 f2fs_end_read_compressed_page(page, true, 0); 137 f2fs_put_page_dic(page); 138 continue; 139 } 140 141 /* PG_error was set if decryption or verity failed. */ 142 if (bio->bi_status || PageError(page)) { 143 ClearPageUptodate(page); 144 /* will re-read again later */ 145 ClearPageError(page); 146 } else { 147 SetPageUptodate(page); 148 } 149 dec_page_count(F2FS_P_SB(page), __read_io_type(page)); 150 unlock_page(page); 151 } 152 153 if (bio->bi_private) 154 mempool_free(bio->bi_private, bio_post_read_ctx_pool); 155 bio_put(bio); 156 } 157 158 static void f2fs_verify_bio(struct work_struct *work) 159 { 160 struct bio_post_read_ctx *ctx = 161 container_of(work, struct bio_post_read_ctx, work); 162 struct bio *bio = ctx->bio; 163 bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS); 164 165 /* 166 * fsverity_verify_bio() may call readahead() again, and while verity 167 * will be disabled for this, decryption and/or decompression may still 168 * be needed, resulting in another bio_post_read_ctx being allocated. 169 * So to prevent deadlocks we need to release the current ctx to the 170 * mempool first. This assumes that verity is the last post-read step. 171 */ 172 mempool_free(ctx, bio_post_read_ctx_pool); 173 bio->bi_private = NULL; 174 175 /* 176 * Verify the bio's pages with fs-verity. Exclude compressed pages, 177 * as those were handled separately by f2fs_end_read_compressed_page(). 178 */ 179 if (may_have_compressed_pages) { 180 struct bio_vec *bv; 181 struct bvec_iter_all iter_all; 182 183 bio_for_each_segment_all(bv, bio, iter_all) { 184 struct page *page = bv->bv_page; 185 186 if (!f2fs_is_compressed_page(page) && 187 !PageError(page) && !fsverity_verify_page(page)) 188 SetPageError(page); 189 } 190 } else { 191 fsverity_verify_bio(bio); 192 } 193 194 f2fs_finish_read_bio(bio); 195 } 196 197 /* 198 * If the bio's data needs to be verified with fs-verity, then enqueue the 199 * verity work for the bio. Otherwise finish the bio now. 200 * 201 * Note that to avoid deadlocks, the verity work can't be done on the 202 * decryption/decompression workqueue. This is because verifying the data pages 203 * can involve reading verity metadata pages from the file, and these verity 204 * metadata pages may be encrypted and/or compressed. 205 */ 206 static void f2fs_verify_and_finish_bio(struct bio *bio) 207 { 208 struct bio_post_read_ctx *ctx = bio->bi_private; 209 210 if (ctx && (ctx->enabled_steps & STEP_VERITY)) { 211 INIT_WORK(&ctx->work, f2fs_verify_bio); 212 fsverity_enqueue_verify_work(&ctx->work); 213 } else { 214 f2fs_finish_read_bio(bio); 215 } 216 } 217 218 /* 219 * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last 220 * remaining page was read by @ctx->bio. 221 * 222 * Note that a bio may span clusters (even a mix of compressed and uncompressed 223 * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates 224 * that the bio includes at least one compressed page. The actual decompression 225 * is done on a per-cluster basis, not a per-bio basis. 226 */ 227 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx) 228 { 229 struct bio_vec *bv; 230 struct bvec_iter_all iter_all; 231 bool all_compressed = true; 232 block_t blkaddr = ctx->fs_blkaddr; 233 234 bio_for_each_segment_all(bv, ctx->bio, iter_all) { 235 struct page *page = bv->bv_page; 236 237 /* PG_error was set if decryption failed. */ 238 if (f2fs_is_compressed_page(page)) 239 f2fs_end_read_compressed_page(page, PageError(page), 240 blkaddr); 241 else 242 all_compressed = false; 243 244 blkaddr++; 245 } 246 247 /* 248 * Optimization: if all the bio's pages are compressed, then scheduling 249 * the per-bio verity work is unnecessary, as verity will be fully 250 * handled at the compression cluster level. 251 */ 252 if (all_compressed) 253 ctx->enabled_steps &= ~STEP_VERITY; 254 } 255 256 static void f2fs_post_read_work(struct work_struct *work) 257 { 258 struct bio_post_read_ctx *ctx = 259 container_of(work, struct bio_post_read_ctx, work); 260 261 if (ctx->enabled_steps & STEP_DECRYPT) 262 fscrypt_decrypt_bio(ctx->bio); 263 264 if (ctx->enabled_steps & STEP_DECOMPRESS) 265 f2fs_handle_step_decompress(ctx); 266 267 f2fs_verify_and_finish_bio(ctx->bio); 268 } 269 270 static void f2fs_read_end_io(struct bio *bio) 271 { 272 struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); 273 struct bio_post_read_ctx *ctx; 274 275 iostat_update_and_unbind_ctx(bio, 0); 276 ctx = bio->bi_private; 277 278 if (time_to_inject(sbi, FAULT_READ_IO)) { 279 f2fs_show_injection_info(sbi, FAULT_READ_IO); 280 bio->bi_status = BLK_STS_IOERR; 281 } 282 283 if (bio->bi_status) { 284 f2fs_finish_read_bio(bio); 285 return; 286 } 287 288 if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) { 289 INIT_WORK(&ctx->work, f2fs_post_read_work); 290 queue_work(ctx->sbi->post_read_wq, &ctx->work); 291 } else { 292 f2fs_verify_and_finish_bio(bio); 293 } 294 } 295 296 static void f2fs_write_end_io(struct bio *bio) 297 { 298 struct f2fs_sb_info *sbi; 299 struct bio_vec *bvec; 300 struct bvec_iter_all iter_all; 301 302 iostat_update_and_unbind_ctx(bio, 1); 303 sbi = bio->bi_private; 304 305 if (time_to_inject(sbi, FAULT_WRITE_IO)) { 306 f2fs_show_injection_info(sbi, FAULT_WRITE_IO); 307 bio->bi_status = BLK_STS_IOERR; 308 } 309 310 bio_for_each_segment_all(bvec, bio, iter_all) { 311 struct page *page = bvec->bv_page; 312 enum count_type type = WB_DATA_TYPE(page); 313 314 if (page_private_dummy(page)) { 315 clear_page_private_dummy(page); 316 unlock_page(page); 317 mempool_free(page, sbi->write_io_dummy); 318 319 if (unlikely(bio->bi_status)) 320 f2fs_stop_checkpoint(sbi, true); 321 continue; 322 } 323 324 fscrypt_finalize_bounce_page(&page); 325 326 #ifdef CONFIG_F2FS_FS_COMPRESSION 327 if (f2fs_is_compressed_page(page)) { 328 f2fs_compress_write_end_io(bio, page); 329 continue; 330 } 331 #endif 332 333 if (unlikely(bio->bi_status)) { 334 mapping_set_error(page->mapping, -EIO); 335 if (type == F2FS_WB_CP_DATA) 336 f2fs_stop_checkpoint(sbi, true); 337 } 338 339 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && 340 page->index != nid_of_node(page)); 341 342 dec_page_count(sbi, type); 343 if (f2fs_in_warm_node_list(sbi, page)) 344 f2fs_del_fsync_node_entry(sbi, page); 345 clear_page_private_gcing(page); 346 end_page_writeback(page); 347 } 348 if (!get_pages(sbi, F2FS_WB_CP_DATA) && 349 wq_has_sleeper(&sbi->cp_wait)) 350 wake_up(&sbi->cp_wait); 351 352 bio_put(bio); 353 } 354 355 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, 356 block_t blk_addr, sector_t *sector) 357 { 358 struct block_device *bdev = sbi->sb->s_bdev; 359 int i; 360 361 if (f2fs_is_multi_device(sbi)) { 362 for (i = 0; i < sbi->s_ndevs; i++) { 363 if (FDEV(i).start_blk <= blk_addr && 364 FDEV(i).end_blk >= blk_addr) { 365 blk_addr -= FDEV(i).start_blk; 366 bdev = FDEV(i).bdev; 367 break; 368 } 369 } 370 } 371 372 if (sector) 373 *sector = SECTOR_FROM_BLOCK(blk_addr); 374 return bdev; 375 } 376 377 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) 378 { 379 int i; 380 381 if (!f2fs_is_multi_device(sbi)) 382 return 0; 383 384 for (i = 0; i < sbi->s_ndevs; i++) 385 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) 386 return i; 387 return 0; 388 } 389 390 static unsigned int f2fs_io_flags(struct f2fs_io_info *fio) 391 { 392 unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; 393 unsigned int fua_flag, meta_flag, io_flag; 394 unsigned int op_flags = 0; 395 396 if (fio->op != REQ_OP_WRITE) 397 return 0; 398 if (fio->type == DATA) 399 io_flag = fio->sbi->data_io_flag; 400 else if (fio->type == NODE) 401 io_flag = fio->sbi->node_io_flag; 402 else 403 return 0; 404 405 fua_flag = io_flag & temp_mask; 406 meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask; 407 408 /* 409 * data/node io flag bits per temp: 410 * REQ_META | REQ_FUA | 411 * 5 | 4 | 3 | 2 | 1 | 0 | 412 * Cold | Warm | Hot | Cold | Warm | Hot | 413 */ 414 if ((1 << fio->temp) & meta_flag) 415 op_flags |= REQ_META; 416 if ((1 << fio->temp) & fua_flag) 417 op_flags |= REQ_FUA; 418 return op_flags; 419 } 420 421 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) 422 { 423 struct f2fs_sb_info *sbi = fio->sbi; 424 struct block_device *bdev; 425 sector_t sector; 426 struct bio *bio; 427 428 bdev = f2fs_target_device(sbi, fio->new_blkaddr, §or); 429 bio = bio_alloc_bioset(bdev, npages, 430 fio->op | fio->op_flags | f2fs_io_flags(fio), 431 GFP_NOIO, &f2fs_bioset); 432 bio->bi_iter.bi_sector = sector; 433 if (is_read_io(fio->op)) { 434 bio->bi_end_io = f2fs_read_end_io; 435 bio->bi_private = NULL; 436 } else { 437 bio->bi_end_io = f2fs_write_end_io; 438 bio->bi_private = sbi; 439 } 440 iostat_alloc_and_bind_ctx(sbi, bio, NULL); 441 442 if (fio->io_wbc) 443 wbc_init_bio(fio->io_wbc, bio); 444 445 return bio; 446 } 447 448 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, 449 pgoff_t first_idx, 450 const struct f2fs_io_info *fio, 451 gfp_t gfp_mask) 452 { 453 /* 454 * The f2fs garbage collector sets ->encrypted_page when it wants to 455 * read/write raw data without encryption. 456 */ 457 if (!fio || !fio->encrypted_page) 458 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); 459 } 460 461 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, 462 pgoff_t next_idx, 463 const struct f2fs_io_info *fio) 464 { 465 /* 466 * The f2fs garbage collector sets ->encrypted_page when it wants to 467 * read/write raw data without encryption. 468 */ 469 if (fio && fio->encrypted_page) 470 return !bio_has_crypt_ctx(bio); 471 472 return fscrypt_mergeable_bio(bio, inode, next_idx); 473 } 474 475 static inline void __submit_bio(struct f2fs_sb_info *sbi, 476 struct bio *bio, enum page_type type) 477 { 478 if (!is_read_io(bio_op(bio))) { 479 unsigned int start; 480 481 if (type != DATA && type != NODE) 482 goto submit_io; 483 484 if (f2fs_lfs_mode(sbi) && current->plug) 485 blk_finish_plug(current->plug); 486 487 if (!F2FS_IO_ALIGNED(sbi)) 488 goto submit_io; 489 490 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; 491 start %= F2FS_IO_SIZE(sbi); 492 493 if (start == 0) 494 goto submit_io; 495 496 /* fill dummy pages */ 497 for (; start < F2FS_IO_SIZE(sbi); start++) { 498 struct page *page = 499 mempool_alloc(sbi->write_io_dummy, 500 GFP_NOIO | __GFP_NOFAIL); 501 f2fs_bug_on(sbi, !page); 502 503 lock_page(page); 504 505 zero_user_segment(page, 0, PAGE_SIZE); 506 set_page_private_dummy(page); 507 508 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) 509 f2fs_bug_on(sbi, 1); 510 } 511 /* 512 * In the NODE case, we lose next block address chain. So, we 513 * need to do checkpoint in f2fs_sync_file. 514 */ 515 if (type == NODE) 516 set_sbi_flag(sbi, SBI_NEED_CP); 517 } 518 submit_io: 519 if (is_read_io(bio_op(bio))) 520 trace_f2fs_submit_read_bio(sbi->sb, type, bio); 521 else 522 trace_f2fs_submit_write_bio(sbi->sb, type, bio); 523 524 iostat_update_submit_ctx(bio, type); 525 submit_bio(bio); 526 } 527 528 void f2fs_submit_bio(struct f2fs_sb_info *sbi, 529 struct bio *bio, enum page_type type) 530 { 531 __submit_bio(sbi, bio, type); 532 } 533 534 static void __submit_merged_bio(struct f2fs_bio_info *io) 535 { 536 struct f2fs_io_info *fio = &io->fio; 537 538 if (!io->bio) 539 return; 540 541 if (is_read_io(fio->op)) 542 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio); 543 else 544 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio); 545 546 __submit_bio(io->sbi, io->bio, fio->type); 547 io->bio = NULL; 548 } 549 550 static bool __has_merged_page(struct bio *bio, struct inode *inode, 551 struct page *page, nid_t ino) 552 { 553 struct bio_vec *bvec; 554 struct bvec_iter_all iter_all; 555 556 if (!bio) 557 return false; 558 559 if (!inode && !page && !ino) 560 return true; 561 562 bio_for_each_segment_all(bvec, bio, iter_all) { 563 struct page *target = bvec->bv_page; 564 565 if (fscrypt_is_bounce_page(target)) { 566 target = fscrypt_pagecache_page(target); 567 if (IS_ERR(target)) 568 continue; 569 } 570 if (f2fs_is_compressed_page(target)) { 571 target = f2fs_compress_control_page(target); 572 if (IS_ERR(target)) 573 continue; 574 } 575 576 if (inode && inode == target->mapping->host) 577 return true; 578 if (page && page == target) 579 return true; 580 if (ino && ino == ino_of_node(target)) 581 return true; 582 } 583 584 return false; 585 } 586 587 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi) 588 { 589 int i; 590 591 for (i = 0; i < NR_PAGE_TYPE; i++) { 592 int n = (i == META) ? 1 : NR_TEMP_TYPE; 593 int j; 594 595 sbi->write_io[i] = f2fs_kmalloc(sbi, 596 array_size(n, sizeof(struct f2fs_bio_info)), 597 GFP_KERNEL); 598 if (!sbi->write_io[i]) 599 return -ENOMEM; 600 601 for (j = HOT; j < n; j++) { 602 init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); 603 sbi->write_io[i][j].sbi = sbi; 604 sbi->write_io[i][j].bio = NULL; 605 spin_lock_init(&sbi->write_io[i][j].io_lock); 606 INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); 607 INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); 608 init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); 609 } 610 } 611 612 return 0; 613 } 614 615 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, 616 enum page_type type, enum temp_type temp) 617 { 618 enum page_type btype = PAGE_TYPE_OF_BIO(type); 619 struct f2fs_bio_info *io = sbi->write_io[btype] + temp; 620 621 f2fs_down_write(&io->io_rwsem); 622 623 /* change META to META_FLUSH in the checkpoint procedure */ 624 if (type >= META_FLUSH) { 625 io->fio.type = META_FLUSH; 626 io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC; 627 if (!test_opt(sbi, NOBARRIER)) 628 io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA; 629 } 630 __submit_merged_bio(io); 631 f2fs_up_write(&io->io_rwsem); 632 } 633 634 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, 635 struct inode *inode, struct page *page, 636 nid_t ino, enum page_type type, bool force) 637 { 638 enum temp_type temp; 639 bool ret = true; 640 641 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { 642 if (!force) { 643 enum page_type btype = PAGE_TYPE_OF_BIO(type); 644 struct f2fs_bio_info *io = sbi->write_io[btype] + temp; 645 646 f2fs_down_read(&io->io_rwsem); 647 ret = __has_merged_page(io->bio, inode, page, ino); 648 f2fs_up_read(&io->io_rwsem); 649 } 650 if (ret) 651 __f2fs_submit_merged_write(sbi, type, temp); 652 653 /* TODO: use HOT temp only for meta pages now. */ 654 if (type >= META) 655 break; 656 } 657 } 658 659 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) 660 { 661 __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true); 662 } 663 664 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, 665 struct inode *inode, struct page *page, 666 nid_t ino, enum page_type type) 667 { 668 __submit_merged_write_cond(sbi, inode, page, ino, type, false); 669 } 670 671 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) 672 { 673 f2fs_submit_merged_write(sbi, DATA); 674 f2fs_submit_merged_write(sbi, NODE); 675 f2fs_submit_merged_write(sbi, META); 676 } 677 678 /* 679 * Fill the locked page with data located in the block address. 680 * A caller needs to unlock the page on failure. 681 */ 682 int f2fs_submit_page_bio(struct f2fs_io_info *fio) 683 { 684 struct bio *bio; 685 struct page *page = fio->encrypted_page ? 686 fio->encrypted_page : fio->page; 687 688 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, 689 fio->is_por ? META_POR : (__is_meta_io(fio) ? 690 META_GENERIC : DATA_GENERIC_ENHANCE))) 691 return -EFSCORRUPTED; 692 693 trace_f2fs_submit_page_bio(page, fio); 694 695 /* Allocate a new bio */ 696 bio = __bio_alloc(fio, 1); 697 698 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, 699 fio->page->index, fio, GFP_NOIO); 700 701 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 702 bio_put(bio); 703 return -EFAULT; 704 } 705 706 if (fio->io_wbc && !is_read_io(fio->op)) 707 wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); 708 709 inc_page_count(fio->sbi, is_read_io(fio->op) ? 710 __read_io_type(page): WB_DATA_TYPE(fio->page)); 711 712 __submit_bio(fio->sbi, bio, fio->type); 713 return 0; 714 } 715 716 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, 717 block_t last_blkaddr, block_t cur_blkaddr) 718 { 719 if (unlikely(sbi->max_io_bytes && 720 bio->bi_iter.bi_size >= sbi->max_io_bytes)) 721 return false; 722 if (last_blkaddr + 1 != cur_blkaddr) 723 return false; 724 return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL); 725 } 726 727 static bool io_type_is_mergeable(struct f2fs_bio_info *io, 728 struct f2fs_io_info *fio) 729 { 730 if (io->fio.op != fio->op) 731 return false; 732 return io->fio.op_flags == fio->op_flags; 733 } 734 735 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, 736 struct f2fs_bio_info *io, 737 struct f2fs_io_info *fio, 738 block_t last_blkaddr, 739 block_t cur_blkaddr) 740 { 741 if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) { 742 unsigned int filled_blocks = 743 F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size); 744 unsigned int io_size = F2FS_IO_SIZE(sbi); 745 unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt; 746 747 /* IOs in bio is aligned and left space of vectors is not enough */ 748 if (!(filled_blocks % io_size) && left_vecs < io_size) 749 return false; 750 } 751 if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr)) 752 return false; 753 return io_type_is_mergeable(io, fio); 754 } 755 756 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, 757 struct page *page, enum temp_type temp) 758 { 759 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; 760 struct bio_entry *be; 761 762 be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL); 763 be->bio = bio; 764 bio_get(bio); 765 766 if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) 767 f2fs_bug_on(sbi, 1); 768 769 f2fs_down_write(&io->bio_list_lock); 770 list_add_tail(&be->list, &io->bio_list); 771 f2fs_up_write(&io->bio_list_lock); 772 } 773 774 static void del_bio_entry(struct bio_entry *be) 775 { 776 list_del(&be->list); 777 kmem_cache_free(bio_entry_slab, be); 778 } 779 780 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, 781 struct page *page) 782 { 783 struct f2fs_sb_info *sbi = fio->sbi; 784 enum temp_type temp; 785 bool found = false; 786 int ret = -EAGAIN; 787 788 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { 789 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; 790 struct list_head *head = &io->bio_list; 791 struct bio_entry *be; 792 793 f2fs_down_write(&io->bio_list_lock); 794 list_for_each_entry(be, head, list) { 795 if (be->bio != *bio) 796 continue; 797 798 found = true; 799 800 f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio, 801 *fio->last_block, 802 fio->new_blkaddr)); 803 if (f2fs_crypt_mergeable_bio(*bio, 804 fio->page->mapping->host, 805 fio->page->index, fio) && 806 bio_add_page(*bio, page, PAGE_SIZE, 0) == 807 PAGE_SIZE) { 808 ret = 0; 809 break; 810 } 811 812 /* page can't be merged into bio; submit the bio */ 813 del_bio_entry(be); 814 __submit_bio(sbi, *bio, DATA); 815 break; 816 } 817 f2fs_up_write(&io->bio_list_lock); 818 } 819 820 if (ret) { 821 bio_put(*bio); 822 *bio = NULL; 823 } 824 825 return ret; 826 } 827 828 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, 829 struct bio **bio, struct page *page) 830 { 831 enum temp_type temp; 832 bool found = false; 833 struct bio *target = bio ? *bio : NULL; 834 835 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { 836 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; 837 struct list_head *head = &io->bio_list; 838 struct bio_entry *be; 839 840 if (list_empty(head)) 841 continue; 842 843 f2fs_down_read(&io->bio_list_lock); 844 list_for_each_entry(be, head, list) { 845 if (target) 846 found = (target == be->bio); 847 else 848 found = __has_merged_page(be->bio, NULL, 849 page, 0); 850 if (found) 851 break; 852 } 853 f2fs_up_read(&io->bio_list_lock); 854 855 if (!found) 856 continue; 857 858 found = false; 859 860 f2fs_down_write(&io->bio_list_lock); 861 list_for_each_entry(be, head, list) { 862 if (target) 863 found = (target == be->bio); 864 else 865 found = __has_merged_page(be->bio, NULL, 866 page, 0); 867 if (found) { 868 target = be->bio; 869 del_bio_entry(be); 870 break; 871 } 872 } 873 f2fs_up_write(&io->bio_list_lock); 874 } 875 876 if (found) 877 __submit_bio(sbi, target, DATA); 878 if (bio && *bio) { 879 bio_put(*bio); 880 *bio = NULL; 881 } 882 } 883 884 int f2fs_merge_page_bio(struct f2fs_io_info *fio) 885 { 886 struct bio *bio = *fio->bio; 887 struct page *page = fio->encrypted_page ? 888 fio->encrypted_page : fio->page; 889 890 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, 891 __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) 892 return -EFSCORRUPTED; 893 894 trace_f2fs_submit_page_bio(page, fio); 895 896 if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, 897 fio->new_blkaddr)) 898 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); 899 alloc_new: 900 if (!bio) { 901 bio = __bio_alloc(fio, BIO_MAX_VECS); 902 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, 903 fio->page->index, fio, GFP_NOIO); 904 905 add_bio_entry(fio->sbi, bio, page, fio->temp); 906 } else { 907 if (add_ipu_page(fio, &bio, page)) 908 goto alloc_new; 909 } 910 911 if (fio->io_wbc) 912 wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE); 913 914 inc_page_count(fio->sbi, WB_DATA_TYPE(page)); 915 916 *fio->last_block = fio->new_blkaddr; 917 *fio->bio = bio; 918 919 return 0; 920 } 921 922 void f2fs_submit_page_write(struct f2fs_io_info *fio) 923 { 924 struct f2fs_sb_info *sbi = fio->sbi; 925 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 926 struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; 927 struct page *bio_page; 928 929 f2fs_bug_on(sbi, is_read_io(fio->op)); 930 931 f2fs_down_write(&io->io_rwsem); 932 next: 933 if (fio->in_list) { 934 spin_lock(&io->io_lock); 935 if (list_empty(&io->io_list)) { 936 spin_unlock(&io->io_lock); 937 goto out; 938 } 939 fio = list_first_entry(&io->io_list, 940 struct f2fs_io_info, list); 941 list_del(&fio->list); 942 spin_unlock(&io->io_lock); 943 } 944 945 verify_fio_blkaddr(fio); 946 947 if (fio->encrypted_page) 948 bio_page = fio->encrypted_page; 949 else if (fio->compressed_page) 950 bio_page = fio->compressed_page; 951 else 952 bio_page = fio->page; 953 954 /* set submitted = true as a return value */ 955 fio->submitted = true; 956 957 inc_page_count(sbi, WB_DATA_TYPE(bio_page)); 958 959 if (io->bio && 960 (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, 961 fio->new_blkaddr) || 962 !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, 963 bio_page->index, fio))) 964 __submit_merged_bio(io); 965 alloc_new: 966 if (io->bio == NULL) { 967 if (F2FS_IO_ALIGNED(sbi) && 968 (fio->type == DATA || fio->type == NODE) && 969 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { 970 dec_page_count(sbi, WB_DATA_TYPE(bio_page)); 971 fio->retry = true; 972 goto skip; 973 } 974 io->bio = __bio_alloc(fio, BIO_MAX_VECS); 975 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, 976 bio_page->index, fio, GFP_NOIO); 977 io->fio = *fio; 978 } 979 980 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { 981 __submit_merged_bio(io); 982 goto alloc_new; 983 } 984 985 if (fio->io_wbc) 986 wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE); 987 988 io->last_block_in_bio = fio->new_blkaddr; 989 990 trace_f2fs_submit_page_write(fio->page, fio); 991 skip: 992 if (fio->in_list) 993 goto next; 994 out: 995 if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || 996 !f2fs_is_checkpoint_ready(sbi)) 997 __submit_merged_bio(io); 998 f2fs_up_write(&io->io_rwsem); 999 } 1000 1001 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, 1002 unsigned nr_pages, unsigned op_flag, 1003 pgoff_t first_idx, bool for_write) 1004 { 1005 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1006 struct bio *bio; 1007 struct bio_post_read_ctx *ctx = NULL; 1008 unsigned int post_read_steps = 0; 1009 sector_t sector; 1010 struct block_device *bdev = f2fs_target_device(sbi, blkaddr, §or); 1011 1012 bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages), 1013 REQ_OP_READ | op_flag, 1014 for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset); 1015 if (!bio) 1016 return ERR_PTR(-ENOMEM); 1017 bio->bi_iter.bi_sector = sector; 1018 f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); 1019 bio->bi_end_io = f2fs_read_end_io; 1020 1021 if (fscrypt_inode_uses_fs_layer_crypto(inode)) 1022 post_read_steps |= STEP_DECRYPT; 1023 1024 if (f2fs_need_verity(inode, first_idx)) 1025 post_read_steps |= STEP_VERITY; 1026 1027 /* 1028 * STEP_DECOMPRESS is handled specially, since a compressed file might 1029 * contain both compressed and uncompressed clusters. We'll allocate a 1030 * bio_post_read_ctx if the file is compressed, but the caller is 1031 * responsible for enabling STEP_DECOMPRESS if it's actually needed. 1032 */ 1033 1034 if (post_read_steps || f2fs_compressed_file(inode)) { 1035 /* Due to the mempool, this never fails. */ 1036 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); 1037 ctx->bio = bio; 1038 ctx->sbi = sbi; 1039 ctx->enabled_steps = post_read_steps; 1040 ctx->fs_blkaddr = blkaddr; 1041 bio->bi_private = ctx; 1042 } 1043 iostat_alloc_and_bind_ctx(sbi, bio, ctx); 1044 1045 return bio; 1046 } 1047 1048 /* This can handle encryption stuffs */ 1049 static int f2fs_submit_page_read(struct inode *inode, struct page *page, 1050 block_t blkaddr, int op_flags, bool for_write) 1051 { 1052 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1053 struct bio *bio; 1054 1055 bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, 1056 page->index, for_write); 1057 if (IS_ERR(bio)) 1058 return PTR_ERR(bio); 1059 1060 /* wait for GCed page writeback via META_MAPPING */ 1061 f2fs_wait_on_block_writeback(inode, blkaddr); 1062 1063 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 1064 bio_put(bio); 1065 return -EFAULT; 1066 } 1067 ClearPageError(page); 1068 inc_page_count(sbi, F2FS_RD_DATA); 1069 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); 1070 __submit_bio(sbi, bio, DATA); 1071 return 0; 1072 } 1073 1074 static void __set_data_blkaddr(struct dnode_of_data *dn) 1075 { 1076 struct f2fs_node *rn = F2FS_NODE(dn->node_page); 1077 __le32 *addr_array; 1078 int base = 0; 1079 1080 if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) 1081 base = get_extra_isize(dn->inode); 1082 1083 /* Get physical address of data block */ 1084 addr_array = blkaddr_in_node(rn); 1085 addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 1086 } 1087 1088 /* 1089 * Lock ordering for the change of data block address: 1090 * ->data_page 1091 * ->node_page 1092 * update block addresses in the node page 1093 */ 1094 void f2fs_set_data_blkaddr(struct dnode_of_data *dn) 1095 { 1096 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); 1097 __set_data_blkaddr(dn); 1098 if (set_page_dirty(dn->node_page)) 1099 dn->node_changed = true; 1100 } 1101 1102 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) 1103 { 1104 dn->data_blkaddr = blkaddr; 1105 f2fs_set_data_blkaddr(dn); 1106 f2fs_update_extent_cache(dn); 1107 } 1108 1109 /* dn->ofs_in_node will be returned with up-to-date last block pointer */ 1110 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) 1111 { 1112 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1113 int err; 1114 1115 if (!count) 1116 return 0; 1117 1118 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 1119 return -EPERM; 1120 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) 1121 return err; 1122 1123 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, 1124 dn->ofs_in_node, count); 1125 1126 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); 1127 1128 for (; count > 0; dn->ofs_in_node++) { 1129 block_t blkaddr = f2fs_data_blkaddr(dn); 1130 1131 if (blkaddr == NULL_ADDR) { 1132 dn->data_blkaddr = NEW_ADDR; 1133 __set_data_blkaddr(dn); 1134 count--; 1135 } 1136 } 1137 1138 if (set_page_dirty(dn->node_page)) 1139 dn->node_changed = true; 1140 return 0; 1141 } 1142 1143 /* Should keep dn->ofs_in_node unchanged */ 1144 int f2fs_reserve_new_block(struct dnode_of_data *dn) 1145 { 1146 unsigned int ofs_in_node = dn->ofs_in_node; 1147 int ret; 1148 1149 ret = f2fs_reserve_new_blocks(dn, 1); 1150 dn->ofs_in_node = ofs_in_node; 1151 return ret; 1152 } 1153 1154 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) 1155 { 1156 bool need_put = dn->inode_page ? false : true; 1157 int err; 1158 1159 err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE); 1160 if (err) 1161 return err; 1162 1163 if (dn->data_blkaddr == NULL_ADDR) 1164 err = f2fs_reserve_new_block(dn); 1165 if (err || need_put) 1166 f2fs_put_dnode(dn); 1167 return err; 1168 } 1169 1170 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) 1171 { 1172 struct extent_info ei = {0, }; 1173 struct inode *inode = dn->inode; 1174 1175 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 1176 dn->data_blkaddr = ei.blk + index - ei.fofs; 1177 return 0; 1178 } 1179 1180 return f2fs_reserve_block(dn, index); 1181 } 1182 1183 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, 1184 int op_flags, bool for_write) 1185 { 1186 struct address_space *mapping = inode->i_mapping; 1187 struct dnode_of_data dn; 1188 struct page *page; 1189 struct extent_info ei = {0, }; 1190 int err; 1191 1192 page = f2fs_grab_cache_page(mapping, index, for_write); 1193 if (!page) 1194 return ERR_PTR(-ENOMEM); 1195 1196 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 1197 dn.data_blkaddr = ei.blk + index - ei.fofs; 1198 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, 1199 DATA_GENERIC_ENHANCE_READ)) { 1200 err = -EFSCORRUPTED; 1201 goto put_err; 1202 } 1203 goto got_it; 1204 } 1205 1206 set_new_dnode(&dn, inode, NULL, NULL, 0); 1207 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 1208 if (err) 1209 goto put_err; 1210 f2fs_put_dnode(&dn); 1211 1212 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 1213 err = -ENOENT; 1214 goto put_err; 1215 } 1216 if (dn.data_blkaddr != NEW_ADDR && 1217 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), 1218 dn.data_blkaddr, 1219 DATA_GENERIC_ENHANCE)) { 1220 err = -EFSCORRUPTED; 1221 goto put_err; 1222 } 1223 got_it: 1224 if (PageUptodate(page)) { 1225 unlock_page(page); 1226 return page; 1227 } 1228 1229 /* 1230 * A new dentry page is allocated but not able to be written, since its 1231 * new inode page couldn't be allocated due to -ENOSPC. 1232 * In such the case, its blkaddr can be remained as NEW_ADDR. 1233 * see, f2fs_add_link -> f2fs_get_new_data_page -> 1234 * f2fs_init_inode_metadata. 1235 */ 1236 if (dn.data_blkaddr == NEW_ADDR) { 1237 zero_user_segment(page, 0, PAGE_SIZE); 1238 if (!PageUptodate(page)) 1239 SetPageUptodate(page); 1240 unlock_page(page); 1241 return page; 1242 } 1243 1244 err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, 1245 op_flags, for_write); 1246 if (err) 1247 goto put_err; 1248 return page; 1249 1250 put_err: 1251 f2fs_put_page(page, 1); 1252 return ERR_PTR(err); 1253 } 1254 1255 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index) 1256 { 1257 struct address_space *mapping = inode->i_mapping; 1258 struct page *page; 1259 1260 page = find_get_page(mapping, index); 1261 if (page && PageUptodate(page)) 1262 return page; 1263 f2fs_put_page(page, 0); 1264 1265 page = f2fs_get_read_data_page(inode, index, 0, false); 1266 if (IS_ERR(page)) 1267 return page; 1268 1269 if (PageUptodate(page)) 1270 return page; 1271 1272 wait_on_page_locked(page); 1273 if (unlikely(!PageUptodate(page))) { 1274 f2fs_put_page(page, 0); 1275 return ERR_PTR(-EIO); 1276 } 1277 return page; 1278 } 1279 1280 /* 1281 * If it tries to access a hole, return an error. 1282 * Because, the callers, functions in dir.c and GC, should be able to know 1283 * whether this page exists or not. 1284 */ 1285 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, 1286 bool for_write) 1287 { 1288 struct address_space *mapping = inode->i_mapping; 1289 struct page *page; 1290 repeat: 1291 page = f2fs_get_read_data_page(inode, index, 0, for_write); 1292 if (IS_ERR(page)) 1293 return page; 1294 1295 /* wait for read completion */ 1296 lock_page(page); 1297 if (unlikely(page->mapping != mapping)) { 1298 f2fs_put_page(page, 1); 1299 goto repeat; 1300 } 1301 if (unlikely(!PageUptodate(page))) { 1302 f2fs_put_page(page, 1); 1303 return ERR_PTR(-EIO); 1304 } 1305 return page; 1306 } 1307 1308 /* 1309 * Caller ensures that this data page is never allocated. 1310 * A new zero-filled data page is allocated in the page cache. 1311 * 1312 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and 1313 * f2fs_unlock_op(). 1314 * Note that, ipage is set only by make_empty_dir, and if any error occur, 1315 * ipage should be released by this function. 1316 */ 1317 struct page *f2fs_get_new_data_page(struct inode *inode, 1318 struct page *ipage, pgoff_t index, bool new_i_size) 1319 { 1320 struct address_space *mapping = inode->i_mapping; 1321 struct page *page; 1322 struct dnode_of_data dn; 1323 int err; 1324 1325 page = f2fs_grab_cache_page(mapping, index, true); 1326 if (!page) { 1327 /* 1328 * before exiting, we should make sure ipage will be released 1329 * if any error occur. 1330 */ 1331 f2fs_put_page(ipage, 1); 1332 return ERR_PTR(-ENOMEM); 1333 } 1334 1335 set_new_dnode(&dn, inode, ipage, NULL, 0); 1336 err = f2fs_reserve_block(&dn, index); 1337 if (err) { 1338 f2fs_put_page(page, 1); 1339 return ERR_PTR(err); 1340 } 1341 if (!ipage) 1342 f2fs_put_dnode(&dn); 1343 1344 if (PageUptodate(page)) 1345 goto got_it; 1346 1347 if (dn.data_blkaddr == NEW_ADDR) { 1348 zero_user_segment(page, 0, PAGE_SIZE); 1349 if (!PageUptodate(page)) 1350 SetPageUptodate(page); 1351 } else { 1352 f2fs_put_page(page, 1); 1353 1354 /* if ipage exists, blkaddr should be NEW_ADDR */ 1355 f2fs_bug_on(F2FS_I_SB(inode), ipage); 1356 page = f2fs_get_lock_data_page(inode, index, true); 1357 if (IS_ERR(page)) 1358 return page; 1359 } 1360 got_it: 1361 if (new_i_size && i_size_read(inode) < 1362 ((loff_t)(index + 1) << PAGE_SHIFT)) 1363 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); 1364 return page; 1365 } 1366 1367 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) 1368 { 1369 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1370 struct f2fs_summary sum; 1371 struct node_info ni; 1372 block_t old_blkaddr; 1373 blkcnt_t count = 1; 1374 int err; 1375 1376 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 1377 return -EPERM; 1378 1379 err = f2fs_get_node_info(sbi, dn->nid, &ni, false); 1380 if (err) 1381 return err; 1382 1383 dn->data_blkaddr = f2fs_data_blkaddr(dn); 1384 if (dn->data_blkaddr != NULL_ADDR) 1385 goto alloc; 1386 1387 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) 1388 return err; 1389 1390 alloc: 1391 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1392 old_blkaddr = dn->data_blkaddr; 1393 f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, 1394 &sum, seg_type, NULL); 1395 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { 1396 invalidate_mapping_pages(META_MAPPING(sbi), 1397 old_blkaddr, old_blkaddr); 1398 f2fs_invalidate_compress_page(sbi, old_blkaddr); 1399 } 1400 f2fs_update_data_blkaddr(dn, dn->data_blkaddr); 1401 return 0; 1402 } 1403 1404 void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) 1405 { 1406 if (flag == F2FS_GET_BLOCK_PRE_AIO) { 1407 if (lock) 1408 f2fs_down_read(&sbi->node_change); 1409 else 1410 f2fs_up_read(&sbi->node_change); 1411 } else { 1412 if (lock) 1413 f2fs_lock_op(sbi); 1414 else 1415 f2fs_unlock_op(sbi); 1416 } 1417 } 1418 1419 /* 1420 * f2fs_map_blocks() tries to find or build mapping relationship which 1421 * maps continuous logical blocks to physical blocks, and return such 1422 * info via f2fs_map_blocks structure. 1423 */ 1424 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, 1425 int create, int flag) 1426 { 1427 unsigned int maxblocks = map->m_len; 1428 struct dnode_of_data dn; 1429 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1430 int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE; 1431 pgoff_t pgofs, end_offset, end; 1432 int err = 0, ofs = 1; 1433 unsigned int ofs_in_node, last_ofs_in_node; 1434 blkcnt_t prealloc; 1435 struct extent_info ei = {0, }; 1436 block_t blkaddr; 1437 unsigned int start_pgofs; 1438 int bidx = 0; 1439 1440 if (!maxblocks) 1441 return 0; 1442 1443 map->m_bdev = inode->i_sb->s_bdev; 1444 map->m_multidev_dio = 1445 f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag); 1446 1447 map->m_len = 0; 1448 map->m_flags = 0; 1449 1450 /* it only supports block size == page size */ 1451 pgofs = (pgoff_t)map->m_lblk; 1452 end = pgofs + maxblocks; 1453 1454 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { 1455 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && 1456 map->m_may_create) 1457 goto next_dnode; 1458 1459 map->m_pblk = ei.blk + pgofs - ei.fofs; 1460 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs); 1461 map->m_flags = F2FS_MAP_MAPPED; 1462 if (map->m_next_extent) 1463 *map->m_next_extent = pgofs + map->m_len; 1464 1465 /* for hardware encryption, but to avoid potential issue in future */ 1466 if (flag == F2FS_GET_BLOCK_DIO) 1467 f2fs_wait_on_block_writeback_range(inode, 1468 map->m_pblk, map->m_len); 1469 1470 if (map->m_multidev_dio) { 1471 block_t blk_addr = map->m_pblk; 1472 1473 bidx = f2fs_target_device_index(sbi, map->m_pblk); 1474 1475 map->m_bdev = FDEV(bidx).bdev; 1476 map->m_pblk -= FDEV(bidx).start_blk; 1477 map->m_len = min(map->m_len, 1478 FDEV(bidx).end_blk + 1 - map->m_pblk); 1479 1480 if (map->m_may_create) 1481 f2fs_update_device_state(sbi, inode->i_ino, 1482 blk_addr, map->m_len); 1483 } 1484 goto out; 1485 } 1486 1487 next_dnode: 1488 if (map->m_may_create) 1489 f2fs_do_map_lock(sbi, flag, true); 1490 1491 /* When reading holes, we need its node page */ 1492 set_new_dnode(&dn, inode, NULL, NULL, 0); 1493 err = f2fs_get_dnode_of_data(&dn, pgofs, mode); 1494 if (err) { 1495 if (flag == F2FS_GET_BLOCK_BMAP) 1496 map->m_pblk = 0; 1497 1498 if (err == -ENOENT) { 1499 /* 1500 * There is one exceptional case that read_node_page() 1501 * may return -ENOENT due to filesystem has been 1502 * shutdown or cp_error, so force to convert error 1503 * number to EIO for such case. 1504 */ 1505 if (map->m_may_create && 1506 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || 1507 f2fs_cp_error(sbi))) { 1508 err = -EIO; 1509 goto unlock_out; 1510 } 1511 1512 err = 0; 1513 if (map->m_next_pgofs) 1514 *map->m_next_pgofs = 1515 f2fs_get_next_page_offset(&dn, pgofs); 1516 if (map->m_next_extent) 1517 *map->m_next_extent = 1518 f2fs_get_next_page_offset(&dn, pgofs); 1519 } 1520 goto unlock_out; 1521 } 1522 1523 start_pgofs = pgofs; 1524 prealloc = 0; 1525 last_ofs_in_node = ofs_in_node = dn.ofs_in_node; 1526 end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 1527 1528 next_block: 1529 blkaddr = f2fs_data_blkaddr(&dn); 1530 1531 if (__is_valid_data_blkaddr(blkaddr) && 1532 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { 1533 err = -EFSCORRUPTED; 1534 goto sync_out; 1535 } 1536 1537 if (__is_valid_data_blkaddr(blkaddr)) { 1538 /* use out-place-update for driect IO under LFS mode */ 1539 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && 1540 map->m_may_create) { 1541 err = __allocate_data_block(&dn, map->m_seg_type); 1542 if (err) 1543 goto sync_out; 1544 blkaddr = dn.data_blkaddr; 1545 set_inode_flag(inode, FI_APPEND_WRITE); 1546 } 1547 } else { 1548 if (create) { 1549 if (unlikely(f2fs_cp_error(sbi))) { 1550 err = -EIO; 1551 goto sync_out; 1552 } 1553 if (flag == F2FS_GET_BLOCK_PRE_AIO) { 1554 if (blkaddr == NULL_ADDR) { 1555 prealloc++; 1556 last_ofs_in_node = dn.ofs_in_node; 1557 } 1558 } else { 1559 WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && 1560 flag != F2FS_GET_BLOCK_DIO); 1561 err = __allocate_data_block(&dn, 1562 map->m_seg_type); 1563 if (!err) { 1564 if (flag == F2FS_GET_BLOCK_PRE_DIO) 1565 file_need_truncate(inode); 1566 set_inode_flag(inode, FI_APPEND_WRITE); 1567 } 1568 } 1569 if (err) 1570 goto sync_out; 1571 map->m_flags |= F2FS_MAP_NEW; 1572 blkaddr = dn.data_blkaddr; 1573 } else { 1574 if (f2fs_compressed_file(inode) && 1575 f2fs_sanity_check_cluster(&dn) && 1576 (flag != F2FS_GET_BLOCK_FIEMAP || 1577 IS_ENABLED(CONFIG_F2FS_CHECK_FS))) { 1578 err = -EFSCORRUPTED; 1579 goto sync_out; 1580 } 1581 if (flag == F2FS_GET_BLOCK_BMAP) { 1582 map->m_pblk = 0; 1583 goto sync_out; 1584 } 1585 if (flag == F2FS_GET_BLOCK_PRECACHE) 1586 goto sync_out; 1587 if (flag == F2FS_GET_BLOCK_FIEMAP && 1588 blkaddr == NULL_ADDR) { 1589 if (map->m_next_pgofs) 1590 *map->m_next_pgofs = pgofs + 1; 1591 goto sync_out; 1592 } 1593 if (flag != F2FS_GET_BLOCK_FIEMAP) { 1594 /* for defragment case */ 1595 if (map->m_next_pgofs) 1596 *map->m_next_pgofs = pgofs + 1; 1597 goto sync_out; 1598 } 1599 } 1600 } 1601 1602 if (flag == F2FS_GET_BLOCK_PRE_AIO) 1603 goto skip; 1604 1605 if (map->m_multidev_dio) 1606 bidx = f2fs_target_device_index(sbi, blkaddr); 1607 1608 if (map->m_len == 0) { 1609 /* preallocated unwritten block should be mapped for fiemap. */ 1610 if (blkaddr == NEW_ADDR) 1611 map->m_flags |= F2FS_MAP_UNWRITTEN; 1612 map->m_flags |= F2FS_MAP_MAPPED; 1613 1614 map->m_pblk = blkaddr; 1615 map->m_len = 1; 1616 1617 if (map->m_multidev_dio) 1618 map->m_bdev = FDEV(bidx).bdev; 1619 } else if ((map->m_pblk != NEW_ADDR && 1620 blkaddr == (map->m_pblk + ofs)) || 1621 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || 1622 flag == F2FS_GET_BLOCK_PRE_DIO) { 1623 if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) 1624 goto sync_out; 1625 ofs++; 1626 map->m_len++; 1627 } else { 1628 goto sync_out; 1629 } 1630 1631 skip: 1632 dn.ofs_in_node++; 1633 pgofs++; 1634 1635 /* preallocate blocks in batch for one dnode page */ 1636 if (flag == F2FS_GET_BLOCK_PRE_AIO && 1637 (pgofs == end || dn.ofs_in_node == end_offset)) { 1638 1639 dn.ofs_in_node = ofs_in_node; 1640 err = f2fs_reserve_new_blocks(&dn, prealloc); 1641 if (err) 1642 goto sync_out; 1643 1644 map->m_len += dn.ofs_in_node - ofs_in_node; 1645 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { 1646 err = -ENOSPC; 1647 goto sync_out; 1648 } 1649 dn.ofs_in_node = end_offset; 1650 } 1651 1652 if (pgofs >= end) 1653 goto sync_out; 1654 else if (dn.ofs_in_node < end_offset) 1655 goto next_block; 1656 1657 if (flag == F2FS_GET_BLOCK_PRECACHE) { 1658 if (map->m_flags & F2FS_MAP_MAPPED) { 1659 unsigned int ofs = start_pgofs - map->m_lblk; 1660 1661 f2fs_update_extent_cache_range(&dn, 1662 start_pgofs, map->m_pblk + ofs, 1663 map->m_len - ofs); 1664 } 1665 } 1666 1667 f2fs_put_dnode(&dn); 1668 1669 if (map->m_may_create) { 1670 f2fs_do_map_lock(sbi, flag, false); 1671 f2fs_balance_fs(sbi, dn.node_changed); 1672 } 1673 goto next_dnode; 1674 1675 sync_out: 1676 1677 if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) { 1678 /* 1679 * for hardware encryption, but to avoid potential issue 1680 * in future 1681 */ 1682 f2fs_wait_on_block_writeback_range(inode, 1683 map->m_pblk, map->m_len); 1684 invalidate_mapping_pages(META_MAPPING(sbi), 1685 map->m_pblk, map->m_pblk); 1686 1687 if (map->m_multidev_dio) { 1688 block_t blk_addr = map->m_pblk; 1689 1690 bidx = f2fs_target_device_index(sbi, map->m_pblk); 1691 1692 map->m_bdev = FDEV(bidx).bdev; 1693 map->m_pblk -= FDEV(bidx).start_blk; 1694 1695 if (map->m_may_create) 1696 f2fs_update_device_state(sbi, inode->i_ino, 1697 blk_addr, map->m_len); 1698 1699 f2fs_bug_on(sbi, blk_addr + map->m_len > 1700 FDEV(bidx).end_blk + 1); 1701 } 1702 } 1703 1704 if (flag == F2FS_GET_BLOCK_PRECACHE) { 1705 if (map->m_flags & F2FS_MAP_MAPPED) { 1706 unsigned int ofs = start_pgofs - map->m_lblk; 1707 1708 f2fs_update_extent_cache_range(&dn, 1709 start_pgofs, map->m_pblk + ofs, 1710 map->m_len - ofs); 1711 } 1712 if (map->m_next_extent) 1713 *map->m_next_extent = pgofs + 1; 1714 } 1715 f2fs_put_dnode(&dn); 1716 unlock_out: 1717 if (map->m_may_create) { 1718 f2fs_do_map_lock(sbi, flag, false); 1719 f2fs_balance_fs(sbi, dn.node_changed); 1720 } 1721 out: 1722 trace_f2fs_map_blocks(inode, map, create, flag, err); 1723 return err; 1724 } 1725 1726 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) 1727 { 1728 struct f2fs_map_blocks map; 1729 block_t last_lblk; 1730 int err; 1731 1732 if (pos + len > i_size_read(inode)) 1733 return false; 1734 1735 map.m_lblk = F2FS_BYTES_TO_BLK(pos); 1736 map.m_next_pgofs = NULL; 1737 map.m_next_extent = NULL; 1738 map.m_seg_type = NO_CHECK_TYPE; 1739 map.m_may_create = false; 1740 last_lblk = F2FS_BLK_ALIGN(pos + len); 1741 1742 while (map.m_lblk < last_lblk) { 1743 map.m_len = last_lblk - map.m_lblk; 1744 err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT); 1745 if (err || map.m_len == 0) 1746 return false; 1747 map.m_lblk += map.m_len; 1748 } 1749 return true; 1750 } 1751 1752 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) 1753 { 1754 return (bytes >> inode->i_blkbits); 1755 } 1756 1757 static inline u64 blks_to_bytes(struct inode *inode, u64 blks) 1758 { 1759 return (blks << inode->i_blkbits); 1760 } 1761 1762 static int f2fs_xattr_fiemap(struct inode *inode, 1763 struct fiemap_extent_info *fieinfo) 1764 { 1765 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1766 struct page *page; 1767 struct node_info ni; 1768 __u64 phys = 0, len; 1769 __u32 flags; 1770 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 1771 int err = 0; 1772 1773 if (f2fs_has_inline_xattr(inode)) { 1774 int offset; 1775 1776 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), 1777 inode->i_ino, false); 1778 if (!page) 1779 return -ENOMEM; 1780 1781 err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); 1782 if (err) { 1783 f2fs_put_page(page, 1); 1784 return err; 1785 } 1786 1787 phys = blks_to_bytes(inode, ni.blk_addr); 1788 offset = offsetof(struct f2fs_inode, i_addr) + 1789 sizeof(__le32) * (DEF_ADDRS_PER_INODE - 1790 get_inline_xattr_addrs(inode)); 1791 1792 phys += offset; 1793 len = inline_xattr_size(inode); 1794 1795 f2fs_put_page(page, 1); 1796 1797 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; 1798 1799 if (!xnid) 1800 flags |= FIEMAP_EXTENT_LAST; 1801 1802 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); 1803 trace_f2fs_fiemap(inode, 0, phys, len, flags, err); 1804 if (err || err == 1) 1805 return err; 1806 } 1807 1808 if (xnid) { 1809 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false); 1810 if (!page) 1811 return -ENOMEM; 1812 1813 err = f2fs_get_node_info(sbi, xnid, &ni, false); 1814 if (err) { 1815 f2fs_put_page(page, 1); 1816 return err; 1817 } 1818 1819 phys = blks_to_bytes(inode, ni.blk_addr); 1820 len = inode->i_sb->s_blocksize; 1821 1822 f2fs_put_page(page, 1); 1823 1824 flags = FIEMAP_EXTENT_LAST; 1825 } 1826 1827 if (phys) { 1828 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); 1829 trace_f2fs_fiemap(inode, 0, phys, len, flags, err); 1830 } 1831 1832 return (err < 0 ? err : 0); 1833 } 1834 1835 static loff_t max_inode_blocks(struct inode *inode) 1836 { 1837 loff_t result = ADDRS_PER_INODE(inode); 1838 loff_t leaf_count = ADDRS_PER_BLOCK(inode); 1839 1840 /* two direct node blocks */ 1841 result += (leaf_count * 2); 1842 1843 /* two indirect node blocks */ 1844 leaf_count *= NIDS_PER_BLOCK; 1845 result += (leaf_count * 2); 1846 1847 /* one double indirect node block */ 1848 leaf_count *= NIDS_PER_BLOCK; 1849 result += leaf_count; 1850 1851 return result; 1852 } 1853 1854 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1855 u64 start, u64 len) 1856 { 1857 struct f2fs_map_blocks map; 1858 sector_t start_blk, last_blk; 1859 pgoff_t next_pgofs; 1860 u64 logical = 0, phys = 0, size = 0; 1861 u32 flags = 0; 1862 int ret = 0; 1863 bool compr_cluster = false, compr_appended; 1864 unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; 1865 unsigned int count_in_cluster = 0; 1866 loff_t maxbytes; 1867 1868 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { 1869 ret = f2fs_precache_extents(inode); 1870 if (ret) 1871 return ret; 1872 } 1873 1874 ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR); 1875 if (ret) 1876 return ret; 1877 1878 inode_lock(inode); 1879 1880 maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; 1881 if (start > maxbytes) { 1882 ret = -EFBIG; 1883 goto out; 1884 } 1885 1886 if (len > maxbytes || (maxbytes - len) < start) 1887 len = maxbytes - start; 1888 1889 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 1890 ret = f2fs_xattr_fiemap(inode, fieinfo); 1891 goto out; 1892 } 1893 1894 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { 1895 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); 1896 if (ret != -EAGAIN) 1897 goto out; 1898 } 1899 1900 if (bytes_to_blks(inode, len) == 0) 1901 len = blks_to_bytes(inode, 1); 1902 1903 start_blk = bytes_to_blks(inode, start); 1904 last_blk = bytes_to_blks(inode, start + len - 1); 1905 1906 next: 1907 memset(&map, 0, sizeof(map)); 1908 map.m_lblk = start_blk; 1909 map.m_len = bytes_to_blks(inode, len); 1910 map.m_next_pgofs = &next_pgofs; 1911 map.m_seg_type = NO_CHECK_TYPE; 1912 1913 if (compr_cluster) { 1914 map.m_lblk += 1; 1915 map.m_len = cluster_size - count_in_cluster; 1916 } 1917 1918 ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); 1919 if (ret) 1920 goto out; 1921 1922 /* HOLE */ 1923 if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) { 1924 start_blk = next_pgofs; 1925 1926 if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode, 1927 max_inode_blocks(inode))) 1928 goto prep_next; 1929 1930 flags |= FIEMAP_EXTENT_LAST; 1931 } 1932 1933 compr_appended = false; 1934 /* In a case of compressed cluster, append this to the last extent */ 1935 if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) || 1936 !(map.m_flags & F2FS_MAP_FLAGS))) { 1937 compr_appended = true; 1938 goto skip_fill; 1939 } 1940 1941 if (size) { 1942 flags |= FIEMAP_EXTENT_MERGED; 1943 if (IS_ENCRYPTED(inode)) 1944 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; 1945 1946 ret = fiemap_fill_next_extent(fieinfo, logical, 1947 phys, size, flags); 1948 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret); 1949 if (ret) 1950 goto out; 1951 size = 0; 1952 } 1953 1954 if (start_blk > last_blk) 1955 goto out; 1956 1957 skip_fill: 1958 if (map.m_pblk == COMPRESS_ADDR) { 1959 compr_cluster = true; 1960 count_in_cluster = 1; 1961 } else if (compr_appended) { 1962 unsigned int appended_blks = cluster_size - 1963 count_in_cluster + 1; 1964 size += blks_to_bytes(inode, appended_blks); 1965 start_blk += appended_blks; 1966 compr_cluster = false; 1967 } else { 1968 logical = blks_to_bytes(inode, start_blk); 1969 phys = __is_valid_data_blkaddr(map.m_pblk) ? 1970 blks_to_bytes(inode, map.m_pblk) : 0; 1971 size = blks_to_bytes(inode, map.m_len); 1972 flags = 0; 1973 1974 if (compr_cluster) { 1975 flags = FIEMAP_EXTENT_ENCODED; 1976 count_in_cluster += map.m_len; 1977 if (count_in_cluster == cluster_size) { 1978 compr_cluster = false; 1979 size += blks_to_bytes(inode, 1); 1980 } 1981 } else if (map.m_flags & F2FS_MAP_UNWRITTEN) { 1982 flags = FIEMAP_EXTENT_UNWRITTEN; 1983 } 1984 1985 start_blk += bytes_to_blks(inode, size); 1986 } 1987 1988 prep_next: 1989 cond_resched(); 1990 if (fatal_signal_pending(current)) 1991 ret = -EINTR; 1992 else 1993 goto next; 1994 out: 1995 if (ret == 1) 1996 ret = 0; 1997 1998 inode_unlock(inode); 1999 return ret; 2000 } 2001 2002 static inline loff_t f2fs_readpage_limit(struct inode *inode) 2003 { 2004 if (IS_ENABLED(CONFIG_FS_VERITY) && 2005 (IS_VERITY(inode) || f2fs_verity_in_progress(inode))) 2006 return inode->i_sb->s_maxbytes; 2007 2008 return i_size_read(inode); 2009 } 2010 2011 static int f2fs_read_single_page(struct inode *inode, struct page *page, 2012 unsigned nr_pages, 2013 struct f2fs_map_blocks *map, 2014 struct bio **bio_ret, 2015 sector_t *last_block_in_bio, 2016 bool is_readahead) 2017 { 2018 struct bio *bio = *bio_ret; 2019 const unsigned blocksize = blks_to_bytes(inode, 1); 2020 sector_t block_in_file; 2021 sector_t last_block; 2022 sector_t last_block_in_file; 2023 sector_t block_nr; 2024 int ret = 0; 2025 2026 block_in_file = (sector_t)page_index(page); 2027 last_block = block_in_file + nr_pages; 2028 last_block_in_file = bytes_to_blks(inode, 2029 f2fs_readpage_limit(inode) + blocksize - 1); 2030 if (last_block > last_block_in_file) 2031 last_block = last_block_in_file; 2032 2033 /* just zeroing out page which is beyond EOF */ 2034 if (block_in_file >= last_block) 2035 goto zero_out; 2036 /* 2037 * Map blocks using the previous result first. 2038 */ 2039 if ((map->m_flags & F2FS_MAP_MAPPED) && 2040 block_in_file > map->m_lblk && 2041 block_in_file < (map->m_lblk + map->m_len)) 2042 goto got_it; 2043 2044 /* 2045 * Then do more f2fs_map_blocks() calls until we are 2046 * done with this page. 2047 */ 2048 map->m_lblk = block_in_file; 2049 map->m_len = last_block - block_in_file; 2050 2051 ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT); 2052 if (ret) 2053 goto out; 2054 got_it: 2055 if ((map->m_flags & F2FS_MAP_MAPPED)) { 2056 block_nr = map->m_pblk + block_in_file - map->m_lblk; 2057 SetPageMappedToDisk(page); 2058 2059 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, 2060 DATA_GENERIC_ENHANCE_READ)) { 2061 ret = -EFSCORRUPTED; 2062 goto out; 2063 } 2064 } else { 2065 zero_out: 2066 zero_user_segment(page, 0, PAGE_SIZE); 2067 if (f2fs_need_verity(inode, page->index) && 2068 !fsverity_verify_page(page)) { 2069 ret = -EIO; 2070 goto out; 2071 } 2072 if (!PageUptodate(page)) 2073 SetPageUptodate(page); 2074 unlock_page(page); 2075 goto out; 2076 } 2077 2078 /* 2079 * This page will go to BIO. Do we need to send this 2080 * BIO off first? 2081 */ 2082 if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, 2083 *last_block_in_bio, block_nr) || 2084 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { 2085 submit_and_realloc: 2086 __submit_bio(F2FS_I_SB(inode), bio, DATA); 2087 bio = NULL; 2088 } 2089 if (bio == NULL) { 2090 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, 2091 is_readahead ? REQ_RAHEAD : 0, page->index, 2092 false); 2093 if (IS_ERR(bio)) { 2094 ret = PTR_ERR(bio); 2095 bio = NULL; 2096 goto out; 2097 } 2098 } 2099 2100 /* 2101 * If the page is under writeback, we need to wait for 2102 * its completion to see the correct decrypted data. 2103 */ 2104 f2fs_wait_on_block_writeback(inode, block_nr); 2105 2106 if (bio_add_page(bio, page, blocksize, 0) < blocksize) 2107 goto submit_and_realloc; 2108 2109 inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); 2110 f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE); 2111 ClearPageError(page); 2112 *last_block_in_bio = block_nr; 2113 goto out; 2114 out: 2115 *bio_ret = bio; 2116 return ret; 2117 } 2118 2119 #ifdef CONFIG_F2FS_FS_COMPRESSION 2120 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, 2121 unsigned nr_pages, sector_t *last_block_in_bio, 2122 bool is_readahead, bool for_write) 2123 { 2124 struct dnode_of_data dn; 2125 struct inode *inode = cc->inode; 2126 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2127 struct bio *bio = *bio_ret; 2128 unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; 2129 sector_t last_block_in_file; 2130 const unsigned blocksize = blks_to_bytes(inode, 1); 2131 struct decompress_io_ctx *dic = NULL; 2132 struct extent_info ei = {0, }; 2133 bool from_dnode = true; 2134 int i; 2135 int ret = 0; 2136 2137 f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); 2138 2139 last_block_in_file = bytes_to_blks(inode, 2140 f2fs_readpage_limit(inode) + blocksize - 1); 2141 2142 /* get rid of pages beyond EOF */ 2143 for (i = 0; i < cc->cluster_size; i++) { 2144 struct page *page = cc->rpages[i]; 2145 2146 if (!page) 2147 continue; 2148 if ((sector_t)page->index >= last_block_in_file) { 2149 zero_user_segment(page, 0, PAGE_SIZE); 2150 if (!PageUptodate(page)) 2151 SetPageUptodate(page); 2152 } else if (!PageUptodate(page)) { 2153 continue; 2154 } 2155 unlock_page(page); 2156 if (for_write) 2157 put_page(page); 2158 cc->rpages[i] = NULL; 2159 cc->nr_rpages--; 2160 } 2161 2162 /* we are done since all pages are beyond EOF */ 2163 if (f2fs_cluster_is_empty(cc)) 2164 goto out; 2165 2166 if (f2fs_lookup_extent_cache(inode, start_idx, &ei)) 2167 from_dnode = false; 2168 2169 if (!from_dnode) 2170 goto skip_reading_dnode; 2171 2172 set_new_dnode(&dn, inode, NULL, NULL, 0); 2173 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); 2174 if (ret) 2175 goto out; 2176 2177 f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); 2178 2179 skip_reading_dnode: 2180 for (i = 1; i < cc->cluster_size; i++) { 2181 block_t blkaddr; 2182 2183 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, 2184 dn.ofs_in_node + i) : 2185 ei.blk + i - 1; 2186 2187 if (!__is_valid_data_blkaddr(blkaddr)) 2188 break; 2189 2190 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { 2191 ret = -EFAULT; 2192 goto out_put_dnode; 2193 } 2194 cc->nr_cpages++; 2195 2196 if (!from_dnode && i >= ei.c_len) 2197 break; 2198 } 2199 2200 /* nothing to decompress */ 2201 if (cc->nr_cpages == 0) { 2202 ret = 0; 2203 goto out_put_dnode; 2204 } 2205 2206 dic = f2fs_alloc_dic(cc); 2207 if (IS_ERR(dic)) { 2208 ret = PTR_ERR(dic); 2209 goto out_put_dnode; 2210 } 2211 2212 for (i = 0; i < cc->nr_cpages; i++) { 2213 struct page *page = dic->cpages[i]; 2214 block_t blkaddr; 2215 struct bio_post_read_ctx *ctx; 2216 2217 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, 2218 dn.ofs_in_node + i + 1) : 2219 ei.blk + i; 2220 2221 f2fs_wait_on_block_writeback(inode, blkaddr); 2222 2223 if (f2fs_load_compressed_page(sbi, page, blkaddr)) { 2224 if (atomic_dec_and_test(&dic->remaining_pages)) 2225 f2fs_decompress_cluster(dic); 2226 continue; 2227 } 2228 2229 if (bio && (!page_is_mergeable(sbi, bio, 2230 *last_block_in_bio, blkaddr) || 2231 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { 2232 submit_and_realloc: 2233 __submit_bio(sbi, bio, DATA); 2234 bio = NULL; 2235 } 2236 2237 if (!bio) { 2238 bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, 2239 is_readahead ? REQ_RAHEAD : 0, 2240 page->index, for_write); 2241 if (IS_ERR(bio)) { 2242 ret = PTR_ERR(bio); 2243 f2fs_decompress_end_io(dic, ret); 2244 f2fs_put_dnode(&dn); 2245 *bio_ret = NULL; 2246 return ret; 2247 } 2248 } 2249 2250 if (bio_add_page(bio, page, blocksize, 0) < blocksize) 2251 goto submit_and_realloc; 2252 2253 ctx = get_post_read_ctx(bio); 2254 ctx->enabled_steps |= STEP_DECOMPRESS; 2255 refcount_inc(&dic->refcnt); 2256 2257 inc_page_count(sbi, F2FS_RD_DATA); 2258 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE); 2259 f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE); 2260 ClearPageError(page); 2261 *last_block_in_bio = blkaddr; 2262 } 2263 2264 if (from_dnode) 2265 f2fs_put_dnode(&dn); 2266 2267 *bio_ret = bio; 2268 return 0; 2269 2270 out_put_dnode: 2271 if (from_dnode) 2272 f2fs_put_dnode(&dn); 2273 out: 2274 for (i = 0; i < cc->cluster_size; i++) { 2275 if (cc->rpages[i]) { 2276 ClearPageUptodate(cc->rpages[i]); 2277 ClearPageError(cc->rpages[i]); 2278 unlock_page(cc->rpages[i]); 2279 } 2280 } 2281 *bio_ret = bio; 2282 return ret; 2283 } 2284 #endif 2285 2286 /* 2287 * This function was originally taken from fs/mpage.c, and customized for f2fs. 2288 * Major change was from block_size == page_size in f2fs by default. 2289 */ 2290 static int f2fs_mpage_readpages(struct inode *inode, 2291 struct readahead_control *rac, struct page *page) 2292 { 2293 struct bio *bio = NULL; 2294 sector_t last_block_in_bio = 0; 2295 struct f2fs_map_blocks map; 2296 #ifdef CONFIG_F2FS_FS_COMPRESSION 2297 struct compress_ctx cc = { 2298 .inode = inode, 2299 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, 2300 .cluster_size = F2FS_I(inode)->i_cluster_size, 2301 .cluster_idx = NULL_CLUSTER, 2302 .rpages = NULL, 2303 .cpages = NULL, 2304 .nr_rpages = 0, 2305 .nr_cpages = 0, 2306 }; 2307 pgoff_t nc_cluster_idx = NULL_CLUSTER; 2308 #endif 2309 unsigned nr_pages = rac ? readahead_count(rac) : 1; 2310 unsigned max_nr_pages = nr_pages; 2311 int ret = 0; 2312 2313 map.m_pblk = 0; 2314 map.m_lblk = 0; 2315 map.m_len = 0; 2316 map.m_flags = 0; 2317 map.m_next_pgofs = NULL; 2318 map.m_next_extent = NULL; 2319 map.m_seg_type = NO_CHECK_TYPE; 2320 map.m_may_create = false; 2321 2322 for (; nr_pages; nr_pages--) { 2323 if (rac) { 2324 page = readahead_page(rac); 2325 prefetchw(&page->flags); 2326 } 2327 2328 #ifdef CONFIG_F2FS_FS_COMPRESSION 2329 if (f2fs_compressed_file(inode)) { 2330 /* there are remained comressed pages, submit them */ 2331 if (!f2fs_cluster_can_merge_page(&cc, page->index)) { 2332 ret = f2fs_read_multi_pages(&cc, &bio, 2333 max_nr_pages, 2334 &last_block_in_bio, 2335 rac != NULL, false); 2336 f2fs_destroy_compress_ctx(&cc, false); 2337 if (ret) 2338 goto set_error_page; 2339 } 2340 if (cc.cluster_idx == NULL_CLUSTER) { 2341 if (nc_cluster_idx == 2342 page->index >> cc.log_cluster_size) { 2343 goto read_single_page; 2344 } 2345 2346 ret = f2fs_is_compressed_cluster(inode, page->index); 2347 if (ret < 0) 2348 goto set_error_page; 2349 else if (!ret) { 2350 nc_cluster_idx = 2351 page->index >> cc.log_cluster_size; 2352 goto read_single_page; 2353 } 2354 2355 nc_cluster_idx = NULL_CLUSTER; 2356 } 2357 ret = f2fs_init_compress_ctx(&cc); 2358 if (ret) 2359 goto set_error_page; 2360 2361 f2fs_compress_ctx_add_page(&cc, page); 2362 2363 goto next_page; 2364 } 2365 read_single_page: 2366 #endif 2367 2368 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, 2369 &bio, &last_block_in_bio, rac); 2370 if (ret) { 2371 #ifdef CONFIG_F2FS_FS_COMPRESSION 2372 set_error_page: 2373 #endif 2374 SetPageError(page); 2375 zero_user_segment(page, 0, PAGE_SIZE); 2376 unlock_page(page); 2377 } 2378 #ifdef CONFIG_F2FS_FS_COMPRESSION 2379 next_page: 2380 #endif 2381 if (rac) 2382 put_page(page); 2383 2384 #ifdef CONFIG_F2FS_FS_COMPRESSION 2385 if (f2fs_compressed_file(inode)) { 2386 /* last page */ 2387 if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) { 2388 ret = f2fs_read_multi_pages(&cc, &bio, 2389 max_nr_pages, 2390 &last_block_in_bio, 2391 rac != NULL, false); 2392 f2fs_destroy_compress_ctx(&cc, false); 2393 } 2394 } 2395 #endif 2396 } 2397 if (bio) 2398 __submit_bio(F2FS_I_SB(inode), bio, DATA); 2399 return ret; 2400 } 2401 2402 static int f2fs_read_data_folio(struct file *file, struct folio *folio) 2403 { 2404 struct page *page = &folio->page; 2405 struct inode *inode = page_file_mapping(page)->host; 2406 int ret = -EAGAIN; 2407 2408 trace_f2fs_readpage(page, DATA); 2409 2410 if (!f2fs_is_compress_backend_ready(inode)) { 2411 unlock_page(page); 2412 return -EOPNOTSUPP; 2413 } 2414 2415 /* If the file has inline data, try to read it directly */ 2416 if (f2fs_has_inline_data(inode)) 2417 ret = f2fs_read_inline_data(inode, page); 2418 if (ret == -EAGAIN) 2419 ret = f2fs_mpage_readpages(inode, NULL, page); 2420 return ret; 2421 } 2422 2423 static void f2fs_readahead(struct readahead_control *rac) 2424 { 2425 struct inode *inode = rac->mapping->host; 2426 2427 trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac)); 2428 2429 if (!f2fs_is_compress_backend_ready(inode)) 2430 return; 2431 2432 /* If the file has inline data, skip readahead */ 2433 if (f2fs_has_inline_data(inode)) 2434 return; 2435 2436 f2fs_mpage_readpages(inode, rac, NULL); 2437 } 2438 2439 int f2fs_encrypt_one_page(struct f2fs_io_info *fio) 2440 { 2441 struct inode *inode = fio->page->mapping->host; 2442 struct page *mpage, *page; 2443 gfp_t gfp_flags = GFP_NOFS; 2444 2445 if (!f2fs_encrypted_file(inode)) 2446 return 0; 2447 2448 page = fio->compressed_page ? fio->compressed_page : fio->page; 2449 2450 /* wait for GCed page writeback via META_MAPPING */ 2451 f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); 2452 2453 if (fscrypt_inode_uses_inline_crypto(inode)) 2454 return 0; 2455 2456 retry_encrypt: 2457 fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, 2458 PAGE_SIZE, 0, gfp_flags); 2459 if (IS_ERR(fio->encrypted_page)) { 2460 /* flush pending IOs and wait for a while in the ENOMEM case */ 2461 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { 2462 f2fs_flush_merged_writes(fio->sbi); 2463 memalloc_retry_wait(GFP_NOFS); 2464 gfp_flags |= __GFP_NOFAIL; 2465 goto retry_encrypt; 2466 } 2467 return PTR_ERR(fio->encrypted_page); 2468 } 2469 2470 mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr); 2471 if (mpage) { 2472 if (PageUptodate(mpage)) 2473 memcpy(page_address(mpage), 2474 page_address(fio->encrypted_page), PAGE_SIZE); 2475 f2fs_put_page(mpage, 1); 2476 } 2477 return 0; 2478 } 2479 2480 static inline bool check_inplace_update_policy(struct inode *inode, 2481 struct f2fs_io_info *fio) 2482 { 2483 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2484 unsigned int policy = SM_I(sbi)->ipu_policy; 2485 2486 if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) && 2487 is_inode_flag_set(inode, FI_OPU_WRITE)) 2488 return false; 2489 if (policy & (0x1 << F2FS_IPU_FORCE)) 2490 return true; 2491 if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) 2492 return true; 2493 if (policy & (0x1 << F2FS_IPU_UTIL) && 2494 utilization(sbi) > SM_I(sbi)->min_ipu_util) 2495 return true; 2496 if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) && 2497 utilization(sbi) > SM_I(sbi)->min_ipu_util) 2498 return true; 2499 2500 /* 2501 * IPU for rewrite async pages 2502 */ 2503 if (policy & (0x1 << F2FS_IPU_ASYNC) && 2504 fio && fio->op == REQ_OP_WRITE && 2505 !(fio->op_flags & REQ_SYNC) && 2506 !IS_ENCRYPTED(inode)) 2507 return true; 2508 2509 /* this is only set during fdatasync */ 2510 if (policy & (0x1 << F2FS_IPU_FSYNC) && 2511 is_inode_flag_set(inode, FI_NEED_IPU)) 2512 return true; 2513 2514 if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) && 2515 !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) 2516 return true; 2517 2518 return false; 2519 } 2520 2521 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) 2522 { 2523 /* swap file is migrating in aligned write mode */ 2524 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) 2525 return false; 2526 2527 if (f2fs_is_pinned_file(inode)) 2528 return true; 2529 2530 /* if this is cold file, we should overwrite to avoid fragmentation */ 2531 if (file_is_cold(inode)) 2532 return true; 2533 2534 return check_inplace_update_policy(inode, fio); 2535 } 2536 2537 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) 2538 { 2539 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2540 2541 /* The below cases were checked when setting it. */ 2542 if (f2fs_is_pinned_file(inode)) 2543 return false; 2544 if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 2545 return true; 2546 if (f2fs_lfs_mode(sbi)) 2547 return true; 2548 if (S_ISDIR(inode->i_mode)) 2549 return true; 2550 if (IS_NOQUOTA(inode)) 2551 return true; 2552 if (f2fs_is_atomic_file(inode)) 2553 return true; 2554 2555 /* swap file is migrating in aligned write mode */ 2556 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) 2557 return true; 2558 2559 if (is_inode_flag_set(inode, FI_OPU_WRITE)) 2560 return true; 2561 2562 if (fio) { 2563 if (page_private_gcing(fio->page)) 2564 return true; 2565 if (page_private_dummy(fio->page)) 2566 return true; 2567 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && 2568 f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) 2569 return true; 2570 } 2571 return false; 2572 } 2573 2574 static inline bool need_inplace_update(struct f2fs_io_info *fio) 2575 { 2576 struct inode *inode = fio->page->mapping->host; 2577 2578 if (f2fs_should_update_outplace(inode, fio)) 2579 return false; 2580 2581 return f2fs_should_update_inplace(inode, fio); 2582 } 2583 2584 int f2fs_do_write_data_page(struct f2fs_io_info *fio) 2585 { 2586 struct page *page = fio->page; 2587 struct inode *inode = page->mapping->host; 2588 struct dnode_of_data dn; 2589 struct extent_info ei = {0, }; 2590 struct node_info ni; 2591 bool ipu_force = false; 2592 int err = 0; 2593 2594 /* Use COW inode to make dnode_of_data for atomic write */ 2595 if (f2fs_is_atomic_file(inode)) 2596 set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); 2597 else 2598 set_new_dnode(&dn, inode, NULL, NULL, 0); 2599 2600 if (need_inplace_update(fio) && 2601 f2fs_lookup_extent_cache(inode, page->index, &ei)) { 2602 fio->old_blkaddr = ei.blk + page->index - ei.fofs; 2603 2604 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, 2605 DATA_GENERIC_ENHANCE)) 2606 return -EFSCORRUPTED; 2607 2608 ipu_force = true; 2609 fio->need_lock = LOCK_DONE; 2610 goto got_it; 2611 } 2612 2613 /* Deadlock due to between page->lock and f2fs_lock_op */ 2614 if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) 2615 return -EAGAIN; 2616 2617 err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); 2618 if (err) 2619 goto out; 2620 2621 fio->old_blkaddr = dn.data_blkaddr; 2622 2623 /* This page is already truncated */ 2624 if (fio->old_blkaddr == NULL_ADDR) { 2625 ClearPageUptodate(page); 2626 clear_page_private_gcing(page); 2627 goto out_writepage; 2628 } 2629 got_it: 2630 if (__is_valid_data_blkaddr(fio->old_blkaddr) && 2631 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, 2632 DATA_GENERIC_ENHANCE)) { 2633 err = -EFSCORRUPTED; 2634 goto out_writepage; 2635 } 2636 2637 /* 2638 * If current allocation needs SSR, 2639 * it had better in-place writes for updated data. 2640 */ 2641 if (ipu_force || 2642 (__is_valid_data_blkaddr(fio->old_blkaddr) && 2643 need_inplace_update(fio))) { 2644 err = f2fs_encrypt_one_page(fio); 2645 if (err) 2646 goto out_writepage; 2647 2648 set_page_writeback(page); 2649 ClearPageError(page); 2650 f2fs_put_dnode(&dn); 2651 if (fio->need_lock == LOCK_REQ) 2652 f2fs_unlock_op(fio->sbi); 2653 err = f2fs_inplace_write_data(fio); 2654 if (err) { 2655 if (fscrypt_inode_uses_fs_layer_crypto(inode)) 2656 fscrypt_finalize_bounce_page(&fio->encrypted_page); 2657 if (PageWriteback(page)) 2658 end_page_writeback(page); 2659 } else { 2660 set_inode_flag(inode, FI_UPDATE_WRITE); 2661 } 2662 trace_f2fs_do_write_data_page(fio->page, IPU); 2663 return err; 2664 } 2665 2666 if (fio->need_lock == LOCK_RETRY) { 2667 if (!f2fs_trylock_op(fio->sbi)) { 2668 err = -EAGAIN; 2669 goto out_writepage; 2670 } 2671 fio->need_lock = LOCK_REQ; 2672 } 2673 2674 err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false); 2675 if (err) 2676 goto out_writepage; 2677 2678 fio->version = ni.version; 2679 2680 err = f2fs_encrypt_one_page(fio); 2681 if (err) 2682 goto out_writepage; 2683 2684 set_page_writeback(page); 2685 ClearPageError(page); 2686 2687 if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) 2688 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); 2689 2690 /* LFS mode write path */ 2691 f2fs_outplace_write_data(&dn, fio); 2692 trace_f2fs_do_write_data_page(page, OPU); 2693 set_inode_flag(inode, FI_APPEND_WRITE); 2694 if (page->index == 0) 2695 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); 2696 out_writepage: 2697 f2fs_put_dnode(&dn); 2698 out: 2699 if (fio->need_lock == LOCK_REQ) 2700 f2fs_unlock_op(fio->sbi); 2701 return err; 2702 } 2703 2704 int f2fs_write_single_data_page(struct page *page, int *submitted, 2705 struct bio **bio, 2706 sector_t *last_block, 2707 struct writeback_control *wbc, 2708 enum iostat_type io_type, 2709 int compr_blocks, 2710 bool allow_balance) 2711 { 2712 struct inode *inode = page->mapping->host; 2713 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2714 loff_t i_size = i_size_read(inode); 2715 const pgoff_t end_index = ((unsigned long long)i_size) 2716 >> PAGE_SHIFT; 2717 loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; 2718 unsigned offset = 0; 2719 bool need_balance_fs = false; 2720 int err = 0; 2721 struct f2fs_io_info fio = { 2722 .sbi = sbi, 2723 .ino = inode->i_ino, 2724 .type = DATA, 2725 .op = REQ_OP_WRITE, 2726 .op_flags = wbc_to_write_flags(wbc), 2727 .old_blkaddr = NULL_ADDR, 2728 .page = page, 2729 .encrypted_page = NULL, 2730 .submitted = false, 2731 .compr_blocks = compr_blocks, 2732 .need_lock = LOCK_RETRY, 2733 .io_type = io_type, 2734 .io_wbc = wbc, 2735 .bio = bio, 2736 .last_block = last_block, 2737 }; 2738 2739 trace_f2fs_writepage(page, DATA); 2740 2741 /* we should bypass data pages to proceed the kworkder jobs */ 2742 if (unlikely(f2fs_cp_error(sbi))) { 2743 mapping_set_error(page->mapping, -EIO); 2744 /* 2745 * don't drop any dirty dentry pages for keeping lastest 2746 * directory structure. 2747 */ 2748 if (S_ISDIR(inode->i_mode)) 2749 goto redirty_out; 2750 goto out; 2751 } 2752 2753 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 2754 goto redirty_out; 2755 2756 if (page->index < end_index || 2757 f2fs_verity_in_progress(inode) || 2758 compr_blocks) 2759 goto write; 2760 2761 /* 2762 * If the offset is out-of-range of file size, 2763 * this page does not have to be written to disk. 2764 */ 2765 offset = i_size & (PAGE_SIZE - 1); 2766 if ((page->index >= end_index + 1) || !offset) 2767 goto out; 2768 2769 zero_user_segment(page, offset, PAGE_SIZE); 2770 write: 2771 if (f2fs_is_drop_cache(inode)) 2772 goto out; 2773 2774 /* Dentry/quota blocks are controlled by checkpoint */ 2775 if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) { 2776 /* 2777 * We need to wait for node_write to avoid block allocation during 2778 * checkpoint. This can only happen to quota writes which can cause 2779 * the below discard race condition. 2780 */ 2781 if (IS_NOQUOTA(inode)) 2782 f2fs_down_read(&sbi->node_write); 2783 2784 fio.need_lock = LOCK_DONE; 2785 err = f2fs_do_write_data_page(&fio); 2786 2787 if (IS_NOQUOTA(inode)) 2788 f2fs_up_read(&sbi->node_write); 2789 2790 goto done; 2791 } 2792 2793 if (!wbc->for_reclaim) 2794 need_balance_fs = true; 2795 else if (has_not_enough_free_secs(sbi, 0, 0)) 2796 goto redirty_out; 2797 else 2798 set_inode_flag(inode, FI_HOT_DATA); 2799 2800 err = -EAGAIN; 2801 if (f2fs_has_inline_data(inode)) { 2802 err = f2fs_write_inline_data(inode, page); 2803 if (!err) 2804 goto out; 2805 } 2806 2807 if (err == -EAGAIN) { 2808 err = f2fs_do_write_data_page(&fio); 2809 if (err == -EAGAIN) { 2810 fio.need_lock = LOCK_REQ; 2811 err = f2fs_do_write_data_page(&fio); 2812 } 2813 } 2814 2815 if (err) { 2816 file_set_keep_isize(inode); 2817 } else { 2818 spin_lock(&F2FS_I(inode)->i_size_lock); 2819 if (F2FS_I(inode)->last_disk_size < psize) 2820 F2FS_I(inode)->last_disk_size = psize; 2821 spin_unlock(&F2FS_I(inode)->i_size_lock); 2822 } 2823 2824 done: 2825 if (err && err != -ENOENT) 2826 goto redirty_out; 2827 2828 out: 2829 inode_dec_dirty_pages(inode); 2830 if (err) { 2831 ClearPageUptodate(page); 2832 clear_page_private_gcing(page); 2833 } 2834 2835 if (wbc->for_reclaim) { 2836 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); 2837 clear_inode_flag(inode, FI_HOT_DATA); 2838 f2fs_remove_dirty_inode(inode); 2839 submitted = NULL; 2840 } 2841 unlock_page(page); 2842 if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && 2843 !F2FS_I(inode)->cp_task && allow_balance) 2844 f2fs_balance_fs(sbi, need_balance_fs); 2845 2846 if (unlikely(f2fs_cp_error(sbi))) { 2847 f2fs_submit_merged_write(sbi, DATA); 2848 f2fs_submit_merged_ipu_write(sbi, bio, NULL); 2849 submitted = NULL; 2850 } 2851 2852 if (submitted) 2853 *submitted = fio.submitted ? 1 : 0; 2854 2855 return 0; 2856 2857 redirty_out: 2858 redirty_page_for_writepage(wbc, page); 2859 /* 2860 * pageout() in MM traslates EAGAIN, so calls handle_write_error() 2861 * -> mapping_set_error() -> set_bit(AS_EIO, ...). 2862 * file_write_and_wait_range() will see EIO error, which is critical 2863 * to return value of fsync() followed by atomic_write failure to user. 2864 */ 2865 if (!err || wbc->for_reclaim) 2866 return AOP_WRITEPAGE_ACTIVATE; 2867 unlock_page(page); 2868 return err; 2869 } 2870 2871 static int f2fs_write_data_page(struct page *page, 2872 struct writeback_control *wbc) 2873 { 2874 #ifdef CONFIG_F2FS_FS_COMPRESSION 2875 struct inode *inode = page->mapping->host; 2876 2877 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 2878 goto out; 2879 2880 if (f2fs_compressed_file(inode)) { 2881 if (f2fs_is_compressed_cluster(inode, page->index)) { 2882 redirty_page_for_writepage(wbc, page); 2883 return AOP_WRITEPAGE_ACTIVATE; 2884 } 2885 } 2886 out: 2887 #endif 2888 2889 return f2fs_write_single_data_page(page, NULL, NULL, NULL, 2890 wbc, FS_DATA_IO, 0, true); 2891 } 2892 2893 /* 2894 * This function was copied from write_cche_pages from mm/page-writeback.c. 2895 * The major change is making write step of cold data page separately from 2896 * warm/hot data page. 2897 */ 2898 static int f2fs_write_cache_pages(struct address_space *mapping, 2899 struct writeback_control *wbc, 2900 enum iostat_type io_type) 2901 { 2902 int ret = 0; 2903 int done = 0, retry = 0; 2904 struct pagevec pvec; 2905 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 2906 struct bio *bio = NULL; 2907 sector_t last_block; 2908 #ifdef CONFIG_F2FS_FS_COMPRESSION 2909 struct inode *inode = mapping->host; 2910 struct compress_ctx cc = { 2911 .inode = inode, 2912 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, 2913 .cluster_size = F2FS_I(inode)->i_cluster_size, 2914 .cluster_idx = NULL_CLUSTER, 2915 .rpages = NULL, 2916 .nr_rpages = 0, 2917 .cpages = NULL, 2918 .valid_nr_cpages = 0, 2919 .rbuf = NULL, 2920 .cbuf = NULL, 2921 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size, 2922 .private = NULL, 2923 }; 2924 #endif 2925 int nr_pages; 2926 pgoff_t index; 2927 pgoff_t end; /* Inclusive */ 2928 pgoff_t done_index; 2929 int range_whole = 0; 2930 xa_mark_t tag; 2931 int nwritten = 0; 2932 int submitted = 0; 2933 int i; 2934 2935 pagevec_init(&pvec); 2936 2937 if (get_dirty_pages(mapping->host) <= 2938 SM_I(F2FS_M_SB(mapping))->min_hot_blocks) 2939 set_inode_flag(mapping->host, FI_HOT_DATA); 2940 else 2941 clear_inode_flag(mapping->host, FI_HOT_DATA); 2942 2943 if (wbc->range_cyclic) { 2944 index = mapping->writeback_index; /* prev offset */ 2945 end = -1; 2946 } else { 2947 index = wbc->range_start >> PAGE_SHIFT; 2948 end = wbc->range_end >> PAGE_SHIFT; 2949 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 2950 range_whole = 1; 2951 } 2952 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2953 tag = PAGECACHE_TAG_TOWRITE; 2954 else 2955 tag = PAGECACHE_TAG_DIRTY; 2956 retry: 2957 retry = 0; 2958 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 2959 tag_pages_for_writeback(mapping, index, end); 2960 done_index = index; 2961 while (!done && !retry && (index <= end)) { 2962 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, 2963 tag); 2964 if (nr_pages == 0) 2965 break; 2966 2967 for (i = 0; i < nr_pages; i++) { 2968 struct page *page = pvec.pages[i]; 2969 bool need_readd; 2970 readd: 2971 need_readd = false; 2972 #ifdef CONFIG_F2FS_FS_COMPRESSION 2973 if (f2fs_compressed_file(inode)) { 2974 void *fsdata = NULL; 2975 struct page *pagep; 2976 int ret2; 2977 2978 ret = f2fs_init_compress_ctx(&cc); 2979 if (ret) { 2980 done = 1; 2981 break; 2982 } 2983 2984 if (!f2fs_cluster_can_merge_page(&cc, 2985 page->index)) { 2986 ret = f2fs_write_multi_pages(&cc, 2987 &submitted, wbc, io_type); 2988 if (!ret) 2989 need_readd = true; 2990 goto result; 2991 } 2992 2993 if (unlikely(f2fs_cp_error(sbi))) 2994 goto lock_page; 2995 2996 if (!f2fs_cluster_is_empty(&cc)) 2997 goto lock_page; 2998 2999 ret2 = f2fs_prepare_compress_overwrite( 3000 inode, &pagep, 3001 page->index, &fsdata); 3002 if (ret2 < 0) { 3003 ret = ret2; 3004 done = 1; 3005 break; 3006 } else if (ret2 && 3007 (!f2fs_compress_write_end(inode, 3008 fsdata, page->index, 1) || 3009 !f2fs_all_cluster_page_loaded(&cc, 3010 &pvec, i, nr_pages))) { 3011 retry = 1; 3012 break; 3013 } 3014 } 3015 #endif 3016 /* give a priority to WB_SYNC threads */ 3017 if (atomic_read(&sbi->wb_sync_req[DATA]) && 3018 wbc->sync_mode == WB_SYNC_NONE) { 3019 done = 1; 3020 break; 3021 } 3022 #ifdef CONFIG_F2FS_FS_COMPRESSION 3023 lock_page: 3024 #endif 3025 done_index = page->index; 3026 retry_write: 3027 lock_page(page); 3028 3029 if (unlikely(page->mapping != mapping)) { 3030 continue_unlock: 3031 unlock_page(page); 3032 continue; 3033 } 3034 3035 if (!PageDirty(page)) { 3036 /* someone wrote it for us */ 3037 goto continue_unlock; 3038 } 3039 3040 if (PageWriteback(page)) { 3041 if (wbc->sync_mode != WB_SYNC_NONE) 3042 f2fs_wait_on_page_writeback(page, 3043 DATA, true, true); 3044 else 3045 goto continue_unlock; 3046 } 3047 3048 if (!clear_page_dirty_for_io(page)) 3049 goto continue_unlock; 3050 3051 #ifdef CONFIG_F2FS_FS_COMPRESSION 3052 if (f2fs_compressed_file(inode)) { 3053 get_page(page); 3054 f2fs_compress_ctx_add_page(&cc, page); 3055 continue; 3056 } 3057 #endif 3058 ret = f2fs_write_single_data_page(page, &submitted, 3059 &bio, &last_block, wbc, io_type, 3060 0, true); 3061 if (ret == AOP_WRITEPAGE_ACTIVATE) 3062 unlock_page(page); 3063 #ifdef CONFIG_F2FS_FS_COMPRESSION 3064 result: 3065 #endif 3066 nwritten += submitted; 3067 wbc->nr_to_write -= submitted; 3068 3069 if (unlikely(ret)) { 3070 /* 3071 * keep nr_to_write, since vfs uses this to 3072 * get # of written pages. 3073 */ 3074 if (ret == AOP_WRITEPAGE_ACTIVATE) { 3075 ret = 0; 3076 goto next; 3077 } else if (ret == -EAGAIN) { 3078 ret = 0; 3079 if (wbc->sync_mode == WB_SYNC_ALL) { 3080 f2fs_io_schedule_timeout( 3081 DEFAULT_IO_TIMEOUT); 3082 goto retry_write; 3083 } 3084 goto next; 3085 } 3086 done_index = page->index + 1; 3087 done = 1; 3088 break; 3089 } 3090 3091 if (wbc->nr_to_write <= 0 && 3092 wbc->sync_mode == WB_SYNC_NONE) { 3093 done = 1; 3094 break; 3095 } 3096 next: 3097 if (need_readd) 3098 goto readd; 3099 } 3100 pagevec_release(&pvec); 3101 cond_resched(); 3102 } 3103 #ifdef CONFIG_F2FS_FS_COMPRESSION 3104 /* flush remained pages in compress cluster */ 3105 if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) { 3106 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type); 3107 nwritten += submitted; 3108 wbc->nr_to_write -= submitted; 3109 if (ret) { 3110 done = 1; 3111 retry = 0; 3112 } 3113 } 3114 if (f2fs_compressed_file(inode)) 3115 f2fs_destroy_compress_ctx(&cc, false); 3116 #endif 3117 if (retry) { 3118 index = 0; 3119 end = -1; 3120 goto retry; 3121 } 3122 if (wbc->range_cyclic && !done) 3123 done_index = 0; 3124 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 3125 mapping->writeback_index = done_index; 3126 3127 if (nwritten) 3128 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, 3129 NULL, 0, DATA); 3130 /* submit cached bio of IPU write */ 3131 if (bio) 3132 f2fs_submit_merged_ipu_write(sbi, &bio, NULL); 3133 3134 return ret; 3135 } 3136 3137 static inline bool __should_serialize_io(struct inode *inode, 3138 struct writeback_control *wbc) 3139 { 3140 /* to avoid deadlock in path of data flush */ 3141 if (F2FS_I(inode)->cp_task) 3142 return false; 3143 3144 if (!S_ISREG(inode->i_mode)) 3145 return false; 3146 if (IS_NOQUOTA(inode)) 3147 return false; 3148 3149 if (f2fs_need_compress_data(inode)) 3150 return true; 3151 if (wbc->sync_mode != WB_SYNC_ALL) 3152 return true; 3153 if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) 3154 return true; 3155 return false; 3156 } 3157 3158 static int __f2fs_write_data_pages(struct address_space *mapping, 3159 struct writeback_control *wbc, 3160 enum iostat_type io_type) 3161 { 3162 struct inode *inode = mapping->host; 3163 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3164 struct blk_plug plug; 3165 int ret; 3166 bool locked = false; 3167 3168 /* deal with chardevs and other special file */ 3169 if (!mapping->a_ops->writepage) 3170 return 0; 3171 3172 /* skip writing if there is no dirty page in this inode */ 3173 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) 3174 return 0; 3175 3176 /* during POR, we don't need to trigger writepage at all. */ 3177 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 3178 goto skip_write; 3179 3180 if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) && 3181 wbc->sync_mode == WB_SYNC_NONE && 3182 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && 3183 f2fs_available_free_memory(sbi, DIRTY_DENTS)) 3184 goto skip_write; 3185 3186 /* skip writing in file defragment preparing stage */ 3187 if (is_inode_flag_set(inode, FI_SKIP_WRITES)) 3188 goto skip_write; 3189 3190 trace_f2fs_writepages(mapping->host, wbc, DATA); 3191 3192 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ 3193 if (wbc->sync_mode == WB_SYNC_ALL) 3194 atomic_inc(&sbi->wb_sync_req[DATA]); 3195 else if (atomic_read(&sbi->wb_sync_req[DATA])) { 3196 /* to avoid potential deadlock */ 3197 if (current->plug) 3198 blk_finish_plug(current->plug); 3199 goto skip_write; 3200 } 3201 3202 if (__should_serialize_io(inode, wbc)) { 3203 mutex_lock(&sbi->writepages); 3204 locked = true; 3205 } 3206 3207 blk_start_plug(&plug); 3208 ret = f2fs_write_cache_pages(mapping, wbc, io_type); 3209 blk_finish_plug(&plug); 3210 3211 if (locked) 3212 mutex_unlock(&sbi->writepages); 3213 3214 if (wbc->sync_mode == WB_SYNC_ALL) 3215 atomic_dec(&sbi->wb_sync_req[DATA]); 3216 /* 3217 * if some pages were truncated, we cannot guarantee its mapping->host 3218 * to detect pending bios. 3219 */ 3220 3221 f2fs_remove_dirty_inode(inode); 3222 return ret; 3223 3224 skip_write: 3225 wbc->pages_skipped += get_dirty_pages(inode); 3226 trace_f2fs_writepages(mapping->host, wbc, DATA); 3227 return 0; 3228 } 3229 3230 static int f2fs_write_data_pages(struct address_space *mapping, 3231 struct writeback_control *wbc) 3232 { 3233 struct inode *inode = mapping->host; 3234 3235 return __f2fs_write_data_pages(mapping, wbc, 3236 F2FS_I(inode)->cp_task == current ? 3237 FS_CP_DATA_IO : FS_DATA_IO); 3238 } 3239 3240 void f2fs_write_failed(struct inode *inode, loff_t to) 3241 { 3242 loff_t i_size = i_size_read(inode); 3243 3244 if (IS_NOQUOTA(inode)) 3245 return; 3246 3247 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ 3248 if (to > i_size && !f2fs_verity_in_progress(inode)) { 3249 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 3250 filemap_invalidate_lock(inode->i_mapping); 3251 3252 truncate_pagecache(inode, i_size); 3253 f2fs_truncate_blocks(inode, i_size, true); 3254 3255 filemap_invalidate_unlock(inode->i_mapping); 3256 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 3257 } 3258 } 3259 3260 static int prepare_write_begin(struct f2fs_sb_info *sbi, 3261 struct page *page, loff_t pos, unsigned len, 3262 block_t *blk_addr, bool *node_changed) 3263 { 3264 struct inode *inode = page->mapping->host; 3265 pgoff_t index = page->index; 3266 struct dnode_of_data dn; 3267 struct page *ipage; 3268 bool locked = false; 3269 struct extent_info ei = {0, }; 3270 int err = 0; 3271 int flag; 3272 3273 /* 3274 * If a whole page is being written and we already preallocated all the 3275 * blocks, then there is no need to get a block address now. 3276 */ 3277 if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL)) 3278 return 0; 3279 3280 /* f2fs_lock_op avoids race between write CP and convert_inline_page */ 3281 if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode)) 3282 flag = F2FS_GET_BLOCK_DEFAULT; 3283 else 3284 flag = F2FS_GET_BLOCK_PRE_AIO; 3285 3286 if (f2fs_has_inline_data(inode) || 3287 (pos & PAGE_MASK) >= i_size_read(inode)) { 3288 f2fs_do_map_lock(sbi, flag, true); 3289 locked = true; 3290 } 3291 3292 restart: 3293 /* check inline_data */ 3294 ipage = f2fs_get_node_page(sbi, inode->i_ino); 3295 if (IS_ERR(ipage)) { 3296 err = PTR_ERR(ipage); 3297 goto unlock_out; 3298 } 3299 3300 set_new_dnode(&dn, inode, ipage, ipage, 0); 3301 3302 if (f2fs_has_inline_data(inode)) { 3303 if (pos + len <= MAX_INLINE_DATA(inode)) { 3304 f2fs_do_read_inline_data(page, ipage); 3305 set_inode_flag(inode, FI_DATA_EXIST); 3306 if (inode->i_nlink) 3307 set_page_private_inline(ipage); 3308 } else { 3309 err = f2fs_convert_inline_page(&dn, page); 3310 if (err) 3311 goto out; 3312 if (dn.data_blkaddr == NULL_ADDR) 3313 err = f2fs_get_block(&dn, index); 3314 } 3315 } else if (locked) { 3316 err = f2fs_get_block(&dn, index); 3317 } else { 3318 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 3319 dn.data_blkaddr = ei.blk + index - ei.fofs; 3320 } else { 3321 /* hole case */ 3322 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 3323 if (err || dn.data_blkaddr == NULL_ADDR) { 3324 f2fs_put_dnode(&dn); 3325 f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, 3326 true); 3327 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); 3328 locked = true; 3329 goto restart; 3330 } 3331 } 3332 } 3333 3334 /* convert_inline_page can make node_changed */ 3335 *blk_addr = dn.data_blkaddr; 3336 *node_changed = dn.node_changed; 3337 out: 3338 f2fs_put_dnode(&dn); 3339 unlock_out: 3340 if (locked) 3341 f2fs_do_map_lock(sbi, flag, false); 3342 return err; 3343 } 3344 3345 static int __find_data_block(struct inode *inode, pgoff_t index, 3346 block_t *blk_addr) 3347 { 3348 struct dnode_of_data dn; 3349 struct page *ipage; 3350 struct extent_info ei = {0, }; 3351 int err = 0; 3352 3353 ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); 3354 if (IS_ERR(ipage)) 3355 return PTR_ERR(ipage); 3356 3357 set_new_dnode(&dn, inode, ipage, ipage, 0); 3358 3359 if (f2fs_lookup_extent_cache(inode, index, &ei)) { 3360 dn.data_blkaddr = ei.blk + index - ei.fofs; 3361 } else { 3362 /* hole case */ 3363 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 3364 if (err) { 3365 dn.data_blkaddr = NULL_ADDR; 3366 err = 0; 3367 } 3368 } 3369 *blk_addr = dn.data_blkaddr; 3370 f2fs_put_dnode(&dn); 3371 return err; 3372 } 3373 3374 static int __reserve_data_block(struct inode *inode, pgoff_t index, 3375 block_t *blk_addr, bool *node_changed) 3376 { 3377 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3378 struct dnode_of_data dn; 3379 struct page *ipage; 3380 int err = 0; 3381 3382 f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); 3383 3384 ipage = f2fs_get_node_page(sbi, inode->i_ino); 3385 if (IS_ERR(ipage)) { 3386 err = PTR_ERR(ipage); 3387 goto unlock_out; 3388 } 3389 set_new_dnode(&dn, inode, ipage, ipage, 0); 3390 3391 err = f2fs_get_block(&dn, index); 3392 3393 *blk_addr = dn.data_blkaddr; 3394 *node_changed = dn.node_changed; 3395 f2fs_put_dnode(&dn); 3396 3397 unlock_out: 3398 f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); 3399 return err; 3400 } 3401 3402 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, 3403 struct page *page, loff_t pos, unsigned int len, 3404 block_t *blk_addr, bool *node_changed) 3405 { 3406 struct inode *inode = page->mapping->host; 3407 struct inode *cow_inode = F2FS_I(inode)->cow_inode; 3408 pgoff_t index = page->index; 3409 int err = 0; 3410 block_t ori_blk_addr; 3411 3412 /* If pos is beyond the end of file, reserve a new block in COW inode */ 3413 if ((pos & PAGE_MASK) >= i_size_read(inode)) 3414 return __reserve_data_block(cow_inode, index, blk_addr, 3415 node_changed); 3416 3417 /* Look for the block in COW inode first */ 3418 err = __find_data_block(cow_inode, index, blk_addr); 3419 if (err) 3420 return err; 3421 else if (*blk_addr != NULL_ADDR) 3422 return 0; 3423 3424 /* Look for the block in the original inode */ 3425 err = __find_data_block(inode, index, &ori_blk_addr); 3426 if (err) 3427 return err; 3428 3429 /* Finally, we should reserve a new block in COW inode for the update */ 3430 err = __reserve_data_block(cow_inode, index, blk_addr, node_changed); 3431 if (err) 3432 return err; 3433 3434 if (ori_blk_addr != NULL_ADDR) 3435 *blk_addr = ori_blk_addr; 3436 return 0; 3437 } 3438 3439 static int f2fs_write_begin(struct file *file, struct address_space *mapping, 3440 loff_t pos, unsigned len, struct page **pagep, void **fsdata) 3441 { 3442 struct inode *inode = mapping->host; 3443 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3444 struct page *page = NULL; 3445 pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; 3446 bool need_balance = false; 3447 block_t blkaddr = NULL_ADDR; 3448 int err = 0; 3449 3450 trace_f2fs_write_begin(inode, pos, len); 3451 3452 if (!f2fs_is_checkpoint_ready(sbi)) { 3453 err = -ENOSPC; 3454 goto fail; 3455 } 3456 3457 /* 3458 * We should check this at this moment to avoid deadlock on inode page 3459 * and #0 page. The locking rule for inline_data conversion should be: 3460 * lock_page(page #0) -> lock_page(inode_page) 3461 */ 3462 if (index != 0) { 3463 err = f2fs_convert_inline_inode(inode); 3464 if (err) 3465 goto fail; 3466 } 3467 3468 #ifdef CONFIG_F2FS_FS_COMPRESSION 3469 if (f2fs_compressed_file(inode)) { 3470 int ret; 3471 3472 *fsdata = NULL; 3473 3474 if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) 3475 goto repeat; 3476 3477 ret = f2fs_prepare_compress_overwrite(inode, pagep, 3478 index, fsdata); 3479 if (ret < 0) { 3480 err = ret; 3481 goto fail; 3482 } else if (ret) { 3483 return 0; 3484 } 3485 } 3486 #endif 3487 3488 repeat: 3489 /* 3490 * Do not use grab_cache_page_write_begin() to avoid deadlock due to 3491 * wait_for_stable_page. Will wait that below with our IO control. 3492 */ 3493 page = f2fs_pagecache_get_page(mapping, index, 3494 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); 3495 if (!page) { 3496 err = -ENOMEM; 3497 goto fail; 3498 } 3499 3500 /* TODO: cluster can be compressed due to race with .writepage */ 3501 3502 *pagep = page; 3503 3504 if (f2fs_is_atomic_file(inode)) 3505 err = prepare_atomic_write_begin(sbi, page, pos, len, 3506 &blkaddr, &need_balance); 3507 else 3508 err = prepare_write_begin(sbi, page, pos, len, 3509 &blkaddr, &need_balance); 3510 if (err) 3511 goto fail; 3512 3513 if (need_balance && !IS_NOQUOTA(inode) && 3514 has_not_enough_free_secs(sbi, 0, 0)) { 3515 unlock_page(page); 3516 f2fs_balance_fs(sbi, true); 3517 lock_page(page); 3518 if (page->mapping != mapping) { 3519 /* The page got truncated from under us */ 3520 f2fs_put_page(page, 1); 3521 goto repeat; 3522 } 3523 } 3524 3525 f2fs_wait_on_page_writeback(page, DATA, false, true); 3526 3527 if (len == PAGE_SIZE || PageUptodate(page)) 3528 return 0; 3529 3530 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && 3531 !f2fs_verity_in_progress(inode)) { 3532 zero_user_segment(page, len, PAGE_SIZE); 3533 return 0; 3534 } 3535 3536 if (blkaddr == NEW_ADDR) { 3537 zero_user_segment(page, 0, PAGE_SIZE); 3538 SetPageUptodate(page); 3539 } else { 3540 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 3541 DATA_GENERIC_ENHANCE_READ)) { 3542 err = -EFSCORRUPTED; 3543 goto fail; 3544 } 3545 err = f2fs_submit_page_read(inode, page, blkaddr, 0, true); 3546 if (err) 3547 goto fail; 3548 3549 lock_page(page); 3550 if (unlikely(page->mapping != mapping)) { 3551 f2fs_put_page(page, 1); 3552 goto repeat; 3553 } 3554 if (unlikely(!PageUptodate(page))) { 3555 err = -EIO; 3556 goto fail; 3557 } 3558 } 3559 return 0; 3560 3561 fail: 3562 f2fs_put_page(page, 1); 3563 f2fs_write_failed(inode, pos + len); 3564 return err; 3565 } 3566 3567 static int f2fs_write_end(struct file *file, 3568 struct address_space *mapping, 3569 loff_t pos, unsigned len, unsigned copied, 3570 struct page *page, void *fsdata) 3571 { 3572 struct inode *inode = page->mapping->host; 3573 3574 trace_f2fs_write_end(inode, pos, len, copied); 3575 3576 /* 3577 * This should be come from len == PAGE_SIZE, and we expect copied 3578 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and 3579 * let generic_perform_write() try to copy data again through copied=0. 3580 */ 3581 if (!PageUptodate(page)) { 3582 if (unlikely(copied != len)) 3583 copied = 0; 3584 else 3585 SetPageUptodate(page); 3586 } 3587 3588 #ifdef CONFIG_F2FS_FS_COMPRESSION 3589 /* overwrite compressed file */ 3590 if (f2fs_compressed_file(inode) && fsdata) { 3591 f2fs_compress_write_end(inode, fsdata, page->index, copied); 3592 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3593 3594 if (pos + copied > i_size_read(inode) && 3595 !f2fs_verity_in_progress(inode)) 3596 f2fs_i_size_write(inode, pos + copied); 3597 return copied; 3598 } 3599 #endif 3600 3601 if (!copied) 3602 goto unlock_out; 3603 3604 set_page_dirty(page); 3605 3606 if (pos + copied > i_size_read(inode) && 3607 !f2fs_verity_in_progress(inode)) { 3608 f2fs_i_size_write(inode, pos + copied); 3609 if (f2fs_is_atomic_file(inode)) 3610 f2fs_i_size_write(F2FS_I(inode)->cow_inode, 3611 pos + copied); 3612 } 3613 unlock_out: 3614 f2fs_put_page(page, 1); 3615 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3616 return copied; 3617 } 3618 3619 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length) 3620 { 3621 struct inode *inode = folio->mapping->host; 3622 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3623 3624 if (inode->i_ino >= F2FS_ROOT_INO(sbi) && 3625 (offset || length != folio_size(folio))) 3626 return; 3627 3628 if (folio_test_dirty(folio)) { 3629 if (inode->i_ino == F2FS_META_INO(sbi)) { 3630 dec_page_count(sbi, F2FS_DIRTY_META); 3631 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) { 3632 dec_page_count(sbi, F2FS_DIRTY_NODES); 3633 } else { 3634 inode_dec_dirty_pages(inode); 3635 f2fs_remove_dirty_inode(inode); 3636 } 3637 } 3638 3639 clear_page_private_gcing(&folio->page); 3640 3641 if (test_opt(sbi, COMPRESS_CACHE) && 3642 inode->i_ino == F2FS_COMPRESS_INO(sbi)) 3643 clear_page_private_data(&folio->page); 3644 3645 folio_detach_private(folio); 3646 } 3647 3648 bool f2fs_release_folio(struct folio *folio, gfp_t wait) 3649 { 3650 struct f2fs_sb_info *sbi; 3651 3652 /* If this is dirty folio, keep private data */ 3653 if (folio_test_dirty(folio)) 3654 return false; 3655 3656 sbi = F2FS_M_SB(folio->mapping); 3657 if (test_opt(sbi, COMPRESS_CACHE)) { 3658 struct inode *inode = folio->mapping->host; 3659 3660 if (inode->i_ino == F2FS_COMPRESS_INO(sbi)) 3661 clear_page_private_data(&folio->page); 3662 } 3663 3664 clear_page_private_gcing(&folio->page); 3665 3666 folio_detach_private(folio); 3667 return true; 3668 } 3669 3670 static bool f2fs_dirty_data_folio(struct address_space *mapping, 3671 struct folio *folio) 3672 { 3673 struct inode *inode = mapping->host; 3674 3675 trace_f2fs_set_page_dirty(&folio->page, DATA); 3676 3677 if (!folio_test_uptodate(folio)) 3678 folio_mark_uptodate(folio); 3679 BUG_ON(folio_test_swapcache(folio)); 3680 3681 if (!folio_test_dirty(folio)) { 3682 filemap_dirty_folio(mapping, folio); 3683 f2fs_update_dirty_folio(inode, folio); 3684 return true; 3685 } 3686 return false; 3687 } 3688 3689 3690 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) 3691 { 3692 #ifdef CONFIG_F2FS_FS_COMPRESSION 3693 struct dnode_of_data dn; 3694 sector_t start_idx, blknr = 0; 3695 int ret; 3696 3697 start_idx = round_down(block, F2FS_I(inode)->i_cluster_size); 3698 3699 set_new_dnode(&dn, inode, NULL, NULL, 0); 3700 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); 3701 if (ret) 3702 return 0; 3703 3704 if (dn.data_blkaddr != COMPRESS_ADDR) { 3705 dn.ofs_in_node += block - start_idx; 3706 blknr = f2fs_data_blkaddr(&dn); 3707 if (!__is_valid_data_blkaddr(blknr)) 3708 blknr = 0; 3709 } 3710 3711 f2fs_put_dnode(&dn); 3712 return blknr; 3713 #else 3714 return 0; 3715 #endif 3716 } 3717 3718 3719 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 3720 { 3721 struct inode *inode = mapping->host; 3722 sector_t blknr = 0; 3723 3724 if (f2fs_has_inline_data(inode)) 3725 goto out; 3726 3727 /* make sure allocating whole blocks */ 3728 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 3729 filemap_write_and_wait(mapping); 3730 3731 /* Block number less than F2FS MAX BLOCKS */ 3732 if (unlikely(block >= max_file_blocks(inode))) 3733 goto out; 3734 3735 if (f2fs_compressed_file(inode)) { 3736 blknr = f2fs_bmap_compress(inode, block); 3737 } else { 3738 struct f2fs_map_blocks map; 3739 3740 memset(&map, 0, sizeof(map)); 3741 map.m_lblk = block; 3742 map.m_len = 1; 3743 map.m_next_pgofs = NULL; 3744 map.m_seg_type = NO_CHECK_TYPE; 3745 3746 if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP)) 3747 blknr = map.m_pblk; 3748 } 3749 out: 3750 trace_f2fs_bmap(inode, block, blknr); 3751 return blknr; 3752 } 3753 3754 #ifdef CONFIG_MIGRATION 3755 #include <linux/migrate.h> 3756 3757 int f2fs_migrate_page(struct address_space *mapping, 3758 struct page *newpage, struct page *page, enum migrate_mode mode) 3759 { 3760 int rc, extra_count = 0; 3761 3762 BUG_ON(PageWriteback(page)); 3763 3764 rc = migrate_page_move_mapping(mapping, newpage, 3765 page, extra_count); 3766 if (rc != MIGRATEPAGE_SUCCESS) 3767 return rc; 3768 3769 /* guarantee to start from no stale private field */ 3770 set_page_private(newpage, 0); 3771 if (PagePrivate(page)) { 3772 set_page_private(newpage, page_private(page)); 3773 SetPagePrivate(newpage); 3774 get_page(newpage); 3775 3776 set_page_private(page, 0); 3777 ClearPagePrivate(page); 3778 put_page(page); 3779 } 3780 3781 if (mode != MIGRATE_SYNC_NO_COPY) 3782 migrate_page_copy(newpage, page); 3783 else 3784 migrate_page_states(newpage, page); 3785 3786 return MIGRATEPAGE_SUCCESS; 3787 } 3788 #endif 3789 3790 #ifdef CONFIG_SWAP 3791 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, 3792 unsigned int blkcnt) 3793 { 3794 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3795 unsigned int blkofs; 3796 unsigned int blk_per_sec = BLKS_PER_SEC(sbi); 3797 unsigned int secidx = start_blk / blk_per_sec; 3798 unsigned int end_sec = secidx + blkcnt / blk_per_sec; 3799 int ret = 0; 3800 3801 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 3802 filemap_invalidate_lock(inode->i_mapping); 3803 3804 set_inode_flag(inode, FI_ALIGNED_WRITE); 3805 set_inode_flag(inode, FI_OPU_WRITE); 3806 3807 for (; secidx < end_sec; secidx++) { 3808 f2fs_down_write(&sbi->pin_sem); 3809 3810 f2fs_lock_op(sbi); 3811 f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); 3812 f2fs_unlock_op(sbi); 3813 3814 set_inode_flag(inode, FI_SKIP_WRITES); 3815 3816 for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { 3817 struct page *page; 3818 unsigned int blkidx = secidx * blk_per_sec + blkofs; 3819 3820 page = f2fs_get_lock_data_page(inode, blkidx, true); 3821 if (IS_ERR(page)) { 3822 f2fs_up_write(&sbi->pin_sem); 3823 ret = PTR_ERR(page); 3824 goto done; 3825 } 3826 3827 set_page_dirty(page); 3828 f2fs_put_page(page, 1); 3829 } 3830 3831 clear_inode_flag(inode, FI_SKIP_WRITES); 3832 3833 ret = filemap_fdatawrite(inode->i_mapping); 3834 3835 f2fs_up_write(&sbi->pin_sem); 3836 3837 if (ret) 3838 break; 3839 } 3840 3841 done: 3842 clear_inode_flag(inode, FI_SKIP_WRITES); 3843 clear_inode_flag(inode, FI_OPU_WRITE); 3844 clear_inode_flag(inode, FI_ALIGNED_WRITE); 3845 3846 filemap_invalidate_unlock(inode->i_mapping); 3847 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 3848 3849 return ret; 3850 } 3851 3852 static int check_swap_activate(struct swap_info_struct *sis, 3853 struct file *swap_file, sector_t *span) 3854 { 3855 struct address_space *mapping = swap_file->f_mapping; 3856 struct inode *inode = mapping->host; 3857 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3858 sector_t cur_lblock; 3859 sector_t last_lblock; 3860 sector_t pblock; 3861 sector_t lowest_pblock = -1; 3862 sector_t highest_pblock = 0; 3863 int nr_extents = 0; 3864 unsigned long nr_pblocks; 3865 unsigned int blks_per_sec = BLKS_PER_SEC(sbi); 3866 unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1; 3867 unsigned int not_aligned = 0; 3868 int ret = 0; 3869 3870 /* 3871 * Map all the blocks into the extent list. This code doesn't try 3872 * to be very smart. 3873 */ 3874 cur_lblock = 0; 3875 last_lblock = bytes_to_blks(inode, i_size_read(inode)); 3876 3877 while (cur_lblock < last_lblock && cur_lblock < sis->max) { 3878 struct f2fs_map_blocks map; 3879 retry: 3880 cond_resched(); 3881 3882 memset(&map, 0, sizeof(map)); 3883 map.m_lblk = cur_lblock; 3884 map.m_len = last_lblock - cur_lblock; 3885 map.m_next_pgofs = NULL; 3886 map.m_next_extent = NULL; 3887 map.m_seg_type = NO_CHECK_TYPE; 3888 map.m_may_create = false; 3889 3890 ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP); 3891 if (ret) 3892 goto out; 3893 3894 /* hole */ 3895 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 3896 f2fs_err(sbi, "Swapfile has holes"); 3897 ret = -EINVAL; 3898 goto out; 3899 } 3900 3901 pblock = map.m_pblk; 3902 nr_pblocks = map.m_len; 3903 3904 if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || 3905 nr_pblocks & sec_blks_mask) { 3906 not_aligned++; 3907 3908 nr_pblocks = roundup(nr_pblocks, blks_per_sec); 3909 if (cur_lblock + nr_pblocks > sis->max) 3910 nr_pblocks -= blks_per_sec; 3911 3912 if (!nr_pblocks) { 3913 /* this extent is last one */ 3914 nr_pblocks = map.m_len; 3915 f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); 3916 goto next; 3917 } 3918 3919 ret = f2fs_migrate_blocks(inode, cur_lblock, 3920 nr_pblocks); 3921 if (ret) 3922 goto out; 3923 goto retry; 3924 } 3925 next: 3926 if (cur_lblock + nr_pblocks >= sis->max) 3927 nr_pblocks = sis->max - cur_lblock; 3928 3929 if (cur_lblock) { /* exclude the header page */ 3930 if (pblock < lowest_pblock) 3931 lowest_pblock = pblock; 3932 if (pblock + nr_pblocks - 1 > highest_pblock) 3933 highest_pblock = pblock + nr_pblocks - 1; 3934 } 3935 3936 /* 3937 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks 3938 */ 3939 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock); 3940 if (ret < 0) 3941 goto out; 3942 nr_extents += ret; 3943 cur_lblock += nr_pblocks; 3944 } 3945 ret = nr_extents; 3946 *span = 1 + highest_pblock - lowest_pblock; 3947 if (cur_lblock == 0) 3948 cur_lblock = 1; /* force Empty message */ 3949 sis->max = cur_lblock; 3950 sis->pages = cur_lblock - 1; 3951 sis->highest_bit = cur_lblock - 1; 3952 out: 3953 if (not_aligned) 3954 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)", 3955 not_aligned, blks_per_sec * F2FS_BLKSIZE); 3956 return ret; 3957 } 3958 3959 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, 3960 sector_t *span) 3961 { 3962 struct inode *inode = file_inode(file); 3963 int ret; 3964 3965 if (!S_ISREG(inode->i_mode)) 3966 return -EINVAL; 3967 3968 if (f2fs_readonly(F2FS_I_SB(inode)->sb)) 3969 return -EROFS; 3970 3971 if (f2fs_lfs_mode(F2FS_I_SB(inode))) { 3972 f2fs_err(F2FS_I_SB(inode), 3973 "Swapfile not supported in LFS mode"); 3974 return -EINVAL; 3975 } 3976 3977 ret = f2fs_convert_inline_inode(inode); 3978 if (ret) 3979 return ret; 3980 3981 if (!f2fs_disable_compressed_file(inode)) 3982 return -EINVAL; 3983 3984 f2fs_precache_extents(inode); 3985 3986 ret = check_swap_activate(sis, file, span); 3987 if (ret < 0) 3988 return ret; 3989 3990 set_inode_flag(inode, FI_PIN_FILE); 3991 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3992 return ret; 3993 } 3994 3995 static void f2fs_swap_deactivate(struct file *file) 3996 { 3997 struct inode *inode = file_inode(file); 3998 3999 clear_inode_flag(inode, FI_PIN_FILE); 4000 } 4001 #else 4002 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, 4003 sector_t *span) 4004 { 4005 return -EOPNOTSUPP; 4006 } 4007 4008 static void f2fs_swap_deactivate(struct file *file) 4009 { 4010 } 4011 #endif 4012 4013 const struct address_space_operations f2fs_dblock_aops = { 4014 .read_folio = f2fs_read_data_folio, 4015 .readahead = f2fs_readahead, 4016 .writepage = f2fs_write_data_page, 4017 .writepages = f2fs_write_data_pages, 4018 .write_begin = f2fs_write_begin, 4019 .write_end = f2fs_write_end, 4020 .dirty_folio = f2fs_dirty_data_folio, 4021 .invalidate_folio = f2fs_invalidate_folio, 4022 .release_folio = f2fs_release_folio, 4023 .direct_IO = noop_direct_IO, 4024 .bmap = f2fs_bmap, 4025 .swap_activate = f2fs_swap_activate, 4026 .swap_deactivate = f2fs_swap_deactivate, 4027 #ifdef CONFIG_MIGRATION 4028 .migratepage = f2fs_migrate_page, 4029 #endif 4030 }; 4031 4032 void f2fs_clear_page_cache_dirty_tag(struct page *page) 4033 { 4034 struct address_space *mapping = page_mapping(page); 4035 unsigned long flags; 4036 4037 xa_lock_irqsave(&mapping->i_pages, flags); 4038 __xa_clear_mark(&mapping->i_pages, page_index(page), 4039 PAGECACHE_TAG_DIRTY); 4040 xa_unlock_irqrestore(&mapping->i_pages, flags); 4041 } 4042 4043 int __init f2fs_init_post_read_processing(void) 4044 { 4045 bio_post_read_ctx_cache = 4046 kmem_cache_create("f2fs_bio_post_read_ctx", 4047 sizeof(struct bio_post_read_ctx), 0, 0, NULL); 4048 if (!bio_post_read_ctx_cache) 4049 goto fail; 4050 bio_post_read_ctx_pool = 4051 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, 4052 bio_post_read_ctx_cache); 4053 if (!bio_post_read_ctx_pool) 4054 goto fail_free_cache; 4055 return 0; 4056 4057 fail_free_cache: 4058 kmem_cache_destroy(bio_post_read_ctx_cache); 4059 fail: 4060 return -ENOMEM; 4061 } 4062 4063 void f2fs_destroy_post_read_processing(void) 4064 { 4065 mempool_destroy(bio_post_read_ctx_pool); 4066 kmem_cache_destroy(bio_post_read_ctx_cache); 4067 } 4068 4069 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi) 4070 { 4071 if (!f2fs_sb_has_encrypt(sbi) && 4072 !f2fs_sb_has_verity(sbi) && 4073 !f2fs_sb_has_compression(sbi)) 4074 return 0; 4075 4076 sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq", 4077 WQ_UNBOUND | WQ_HIGHPRI, 4078 num_online_cpus()); 4079 if (!sbi->post_read_wq) 4080 return -ENOMEM; 4081 return 0; 4082 } 4083 4084 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) 4085 { 4086 if (sbi->post_read_wq) 4087 destroy_workqueue(sbi->post_read_wq); 4088 } 4089 4090 int __init f2fs_init_bio_entry_cache(void) 4091 { 4092 bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab", 4093 sizeof(struct bio_entry)); 4094 if (!bio_entry_slab) 4095 return -ENOMEM; 4096 return 0; 4097 } 4098 4099 void f2fs_destroy_bio_entry_cache(void) 4100 { 4101 kmem_cache_destroy(bio_entry_slab); 4102 } 4103 4104 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 4105 unsigned int flags, struct iomap *iomap, 4106 struct iomap *srcmap) 4107 { 4108 struct f2fs_map_blocks map = {}; 4109 pgoff_t next_pgofs = 0; 4110 int err; 4111 4112 map.m_lblk = bytes_to_blks(inode, offset); 4113 map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1; 4114 map.m_next_pgofs = &next_pgofs; 4115 map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); 4116 if (flags & IOMAP_WRITE) 4117 map.m_may_create = true; 4118 4119 err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE, 4120 F2FS_GET_BLOCK_DIO); 4121 if (err) 4122 return err; 4123 4124 iomap->offset = blks_to_bytes(inode, map.m_lblk); 4125 4126 /* 4127 * When inline encryption is enabled, sometimes I/O to an encrypted file 4128 * has to be broken up to guarantee DUN contiguity. Handle this by 4129 * limiting the length of the mapping returned. 4130 */ 4131 map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); 4132 4133 if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) { 4134 iomap->length = blks_to_bytes(inode, map.m_len); 4135 if (map.m_flags & F2FS_MAP_MAPPED) { 4136 iomap->type = IOMAP_MAPPED; 4137 iomap->flags |= IOMAP_F_MERGED; 4138 } else { 4139 iomap->type = IOMAP_UNWRITTEN; 4140 } 4141 if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk))) 4142 return -EINVAL; 4143 4144 iomap->bdev = map.m_bdev; 4145 iomap->addr = blks_to_bytes(inode, map.m_pblk); 4146 } else { 4147 iomap->length = blks_to_bytes(inode, next_pgofs) - 4148 iomap->offset; 4149 iomap->type = IOMAP_HOLE; 4150 iomap->addr = IOMAP_NULL_ADDR; 4151 } 4152 4153 if (map.m_flags & F2FS_MAP_NEW) 4154 iomap->flags |= IOMAP_F_NEW; 4155 if ((inode->i_state & I_DIRTY_DATASYNC) || 4156 offset + length > i_size_read(inode)) 4157 iomap->flags |= IOMAP_F_DIRTY; 4158 4159 return 0; 4160 } 4161 4162 const struct iomap_ops f2fs_iomap_ops = { 4163 .iomap_begin = f2fs_iomap_begin, 4164 }; 4165