xref: /openbmc/linux/fs/f2fs/data.c (revision 88a6f899)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/data.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/buffer_head.h>
11 #include <linux/sched/mm.h>
12 #include <linux/mpage.h>
13 #include <linux/writeback.h>
14 #include <linux/pagevec.h>
15 #include <linux/blkdev.h>
16 #include <linux/bio.h>
17 #include <linux/blk-crypto.h>
18 #include <linux/swap.h>
19 #include <linux/prefetch.h>
20 #include <linux/uio.h>
21 #include <linux/sched/signal.h>
22 #include <linux/fiemap.h>
23 #include <linux/iomap.h>
24 
25 #include "f2fs.h"
26 #include "node.h"
27 #include "segment.h"
28 #include "iostat.h"
29 #include <trace/events/f2fs.h>
30 
31 #define NUM_PREALLOC_POST_READ_CTXS	128
32 
33 static struct kmem_cache *bio_post_read_ctx_cache;
34 static struct kmem_cache *bio_entry_slab;
35 static mempool_t *bio_post_read_ctx_pool;
36 static struct bio_set f2fs_bioset;
37 
38 #define	F2FS_BIO_POOL_SIZE	NR_CURSEG_TYPE
39 
40 int __init f2fs_init_bioset(void)
41 {
42 	return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
43 					0, BIOSET_NEED_BVECS);
44 }
45 
46 void f2fs_destroy_bioset(void)
47 {
48 	bioset_exit(&f2fs_bioset);
49 }
50 
51 static bool __is_cp_guaranteed(struct page *page)
52 {
53 	struct address_space *mapping = page->mapping;
54 	struct inode *inode;
55 	struct f2fs_sb_info *sbi;
56 
57 	if (!mapping)
58 		return false;
59 
60 	inode = mapping->host;
61 	sbi = F2FS_I_SB(inode);
62 
63 	if (inode->i_ino == F2FS_META_INO(sbi) ||
64 			inode->i_ino == F2FS_NODE_INO(sbi) ||
65 			S_ISDIR(inode->i_mode))
66 		return true;
67 
68 	if (f2fs_is_compressed_page(page))
69 		return false;
70 	if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
71 			page_private_gcing(page))
72 		return true;
73 	return false;
74 }
75 
76 static enum count_type __read_io_type(struct page *page)
77 {
78 	struct address_space *mapping = page_file_mapping(page);
79 
80 	if (mapping) {
81 		struct inode *inode = mapping->host;
82 		struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
83 
84 		if (inode->i_ino == F2FS_META_INO(sbi))
85 			return F2FS_RD_META;
86 
87 		if (inode->i_ino == F2FS_NODE_INO(sbi))
88 			return F2FS_RD_NODE;
89 	}
90 	return F2FS_RD_DATA;
91 }
92 
93 /* postprocessing steps for read bios */
94 enum bio_post_read_step {
95 #ifdef CONFIG_FS_ENCRYPTION
96 	STEP_DECRYPT	= BIT(0),
97 #else
98 	STEP_DECRYPT	= 0,	/* compile out the decryption-related code */
99 #endif
100 #ifdef CONFIG_F2FS_FS_COMPRESSION
101 	STEP_DECOMPRESS	= BIT(1),
102 #else
103 	STEP_DECOMPRESS	= 0,	/* compile out the decompression-related code */
104 #endif
105 #ifdef CONFIG_FS_VERITY
106 	STEP_VERITY	= BIT(2),
107 #else
108 	STEP_VERITY	= 0,	/* compile out the verity-related code */
109 #endif
110 };
111 
112 struct bio_post_read_ctx {
113 	struct bio *bio;
114 	struct f2fs_sb_info *sbi;
115 	struct work_struct work;
116 	unsigned int enabled_steps;
117 	/*
118 	 * decompression_attempted keeps track of whether
119 	 * f2fs_end_read_compressed_page() has been called on the pages in the
120 	 * bio that belong to a compressed cluster yet.
121 	 */
122 	bool decompression_attempted;
123 	block_t fs_blkaddr;
124 };
125 
126 /*
127  * Update and unlock a bio's pages, and free the bio.
128  *
129  * This marks pages up-to-date only if there was no error in the bio (I/O error,
130  * decryption error, or verity error), as indicated by bio->bi_status.
131  *
132  * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk)
133  * aren't marked up-to-date here, as decompression is done on a per-compression-
134  * cluster basis rather than a per-bio basis.  Instead, we only must do two
135  * things for each compressed page here: call f2fs_end_read_compressed_page()
136  * with failed=true if an error occurred before it would have normally gotten
137  * called (i.e., I/O error or decryption error, but *not* verity error), and
138  * release the bio's reference to the decompress_io_ctx of the page's cluster.
139  */
140 static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
141 {
142 	struct bio_vec *bv;
143 	struct bvec_iter_all iter_all;
144 	struct bio_post_read_ctx *ctx = bio->bi_private;
145 
146 	bio_for_each_segment_all(bv, bio, iter_all) {
147 		struct page *page = bv->bv_page;
148 
149 		if (f2fs_is_compressed_page(page)) {
150 			if (ctx && !ctx->decompression_attempted)
151 				f2fs_end_read_compressed_page(page, true, 0,
152 							in_task);
153 			f2fs_put_page_dic(page, in_task);
154 			continue;
155 		}
156 
157 		if (bio->bi_status)
158 			ClearPageUptodate(page);
159 		else
160 			SetPageUptodate(page);
161 		dec_page_count(F2FS_P_SB(page), __read_io_type(page));
162 		unlock_page(page);
163 	}
164 
165 	if (ctx)
166 		mempool_free(ctx, bio_post_read_ctx_pool);
167 	bio_put(bio);
168 }
169 
170 static void f2fs_verify_bio(struct work_struct *work)
171 {
172 	struct bio_post_read_ctx *ctx =
173 		container_of(work, struct bio_post_read_ctx, work);
174 	struct bio *bio = ctx->bio;
175 	bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
176 
177 	/*
178 	 * fsverity_verify_bio() may call readahead() again, and while verity
179 	 * will be disabled for this, decryption and/or decompression may still
180 	 * be needed, resulting in another bio_post_read_ctx being allocated.
181 	 * So to prevent deadlocks we need to release the current ctx to the
182 	 * mempool first.  This assumes that verity is the last post-read step.
183 	 */
184 	mempool_free(ctx, bio_post_read_ctx_pool);
185 	bio->bi_private = NULL;
186 
187 	/*
188 	 * Verify the bio's pages with fs-verity.  Exclude compressed pages,
189 	 * as those were handled separately by f2fs_end_read_compressed_page().
190 	 */
191 	if (may_have_compressed_pages) {
192 		struct bio_vec *bv;
193 		struct bvec_iter_all iter_all;
194 
195 		bio_for_each_segment_all(bv, bio, iter_all) {
196 			struct page *page = bv->bv_page;
197 
198 			if (!f2fs_is_compressed_page(page) &&
199 			    !fsverity_verify_page(page)) {
200 				bio->bi_status = BLK_STS_IOERR;
201 				break;
202 			}
203 		}
204 	} else {
205 		fsverity_verify_bio(bio);
206 	}
207 
208 	f2fs_finish_read_bio(bio, true);
209 }
210 
211 /*
212  * If the bio's data needs to be verified with fs-verity, then enqueue the
213  * verity work for the bio.  Otherwise finish the bio now.
214  *
215  * Note that to avoid deadlocks, the verity work can't be done on the
216  * decryption/decompression workqueue.  This is because verifying the data pages
217  * can involve reading verity metadata pages from the file, and these verity
218  * metadata pages may be encrypted and/or compressed.
219  */
220 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
221 {
222 	struct bio_post_read_ctx *ctx = bio->bi_private;
223 
224 	if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
225 		INIT_WORK(&ctx->work, f2fs_verify_bio);
226 		fsverity_enqueue_verify_work(&ctx->work);
227 	} else {
228 		f2fs_finish_read_bio(bio, in_task);
229 	}
230 }
231 
232 /*
233  * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
234  * remaining page was read by @ctx->bio.
235  *
236  * Note that a bio may span clusters (even a mix of compressed and uncompressed
237  * clusters) or be for just part of a cluster.  STEP_DECOMPRESS just indicates
238  * that the bio includes at least one compressed page.  The actual decompression
239  * is done on a per-cluster basis, not a per-bio basis.
240  */
241 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
242 		bool in_task)
243 {
244 	struct bio_vec *bv;
245 	struct bvec_iter_all iter_all;
246 	bool all_compressed = true;
247 	block_t blkaddr = ctx->fs_blkaddr;
248 
249 	bio_for_each_segment_all(bv, ctx->bio, iter_all) {
250 		struct page *page = bv->bv_page;
251 
252 		if (f2fs_is_compressed_page(page))
253 			f2fs_end_read_compressed_page(page, false, blkaddr,
254 						      in_task);
255 		else
256 			all_compressed = false;
257 
258 		blkaddr++;
259 	}
260 
261 	ctx->decompression_attempted = true;
262 
263 	/*
264 	 * Optimization: if all the bio's pages are compressed, then scheduling
265 	 * the per-bio verity work is unnecessary, as verity will be fully
266 	 * handled at the compression cluster level.
267 	 */
268 	if (all_compressed)
269 		ctx->enabled_steps &= ~STEP_VERITY;
270 }
271 
272 static void f2fs_post_read_work(struct work_struct *work)
273 {
274 	struct bio_post_read_ctx *ctx =
275 		container_of(work, struct bio_post_read_ctx, work);
276 	struct bio *bio = ctx->bio;
277 
278 	if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
279 		f2fs_finish_read_bio(bio, true);
280 		return;
281 	}
282 
283 	if (ctx->enabled_steps & STEP_DECOMPRESS)
284 		f2fs_handle_step_decompress(ctx, true);
285 
286 	f2fs_verify_and_finish_bio(bio, true);
287 }
288 
289 static void f2fs_read_end_io(struct bio *bio)
290 {
291 	struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
292 	struct bio_post_read_ctx *ctx;
293 	bool intask = in_task();
294 
295 	iostat_update_and_unbind_ctx(bio);
296 	ctx = bio->bi_private;
297 
298 	if (time_to_inject(sbi, FAULT_READ_IO))
299 		bio->bi_status = BLK_STS_IOERR;
300 
301 	if (bio->bi_status) {
302 		f2fs_finish_read_bio(bio, intask);
303 		return;
304 	}
305 
306 	if (ctx) {
307 		unsigned int enabled_steps = ctx->enabled_steps &
308 					(STEP_DECRYPT | STEP_DECOMPRESS);
309 
310 		/*
311 		 * If we have only decompression step between decompression and
312 		 * decrypt, we don't need post processing for this.
313 		 */
314 		if (enabled_steps == STEP_DECOMPRESS &&
315 				!f2fs_low_mem_mode(sbi)) {
316 			f2fs_handle_step_decompress(ctx, intask);
317 		} else if (enabled_steps) {
318 			INIT_WORK(&ctx->work, f2fs_post_read_work);
319 			queue_work(ctx->sbi->post_read_wq, &ctx->work);
320 			return;
321 		}
322 	}
323 
324 	f2fs_verify_and_finish_bio(bio, intask);
325 }
326 
327 static void f2fs_write_end_io(struct bio *bio)
328 {
329 	struct f2fs_sb_info *sbi;
330 	struct bio_vec *bvec;
331 	struct bvec_iter_all iter_all;
332 
333 	iostat_update_and_unbind_ctx(bio);
334 	sbi = bio->bi_private;
335 
336 	if (time_to_inject(sbi, FAULT_WRITE_IO))
337 		bio->bi_status = BLK_STS_IOERR;
338 
339 	bio_for_each_segment_all(bvec, bio, iter_all) {
340 		struct page *page = bvec->bv_page;
341 		enum count_type type = WB_DATA_TYPE(page);
342 
343 		if (page_private_dummy(page)) {
344 			clear_page_private_dummy(page);
345 			unlock_page(page);
346 			mempool_free(page, sbi->write_io_dummy);
347 
348 			if (unlikely(bio->bi_status))
349 				f2fs_stop_checkpoint(sbi, true,
350 						STOP_CP_REASON_WRITE_FAIL);
351 			continue;
352 		}
353 
354 		fscrypt_finalize_bounce_page(&page);
355 
356 #ifdef CONFIG_F2FS_FS_COMPRESSION
357 		if (f2fs_is_compressed_page(page)) {
358 			f2fs_compress_write_end_io(bio, page);
359 			continue;
360 		}
361 #endif
362 
363 		if (unlikely(bio->bi_status)) {
364 			mapping_set_error(page->mapping, -EIO);
365 			if (type == F2FS_WB_CP_DATA)
366 				f2fs_stop_checkpoint(sbi, true,
367 						STOP_CP_REASON_WRITE_FAIL);
368 		}
369 
370 		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
371 					page->index != nid_of_node(page));
372 
373 		dec_page_count(sbi, type);
374 		if (f2fs_in_warm_node_list(sbi, page))
375 			f2fs_del_fsync_node_entry(sbi, page);
376 		clear_page_private_gcing(page);
377 		end_page_writeback(page);
378 	}
379 	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
380 				wq_has_sleeper(&sbi->cp_wait))
381 		wake_up(&sbi->cp_wait);
382 
383 	bio_put(bio);
384 }
385 
386 #ifdef CONFIG_BLK_DEV_ZONED
387 static void f2fs_zone_write_end_io(struct bio *bio)
388 {
389 	struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
390 
391 	bio->bi_private = io->bi_private;
392 	complete(&io->zone_wait);
393 	f2fs_write_end_io(bio);
394 }
395 #endif
396 
397 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
398 		block_t blk_addr, sector_t *sector)
399 {
400 	struct block_device *bdev = sbi->sb->s_bdev;
401 	int i;
402 
403 	if (f2fs_is_multi_device(sbi)) {
404 		for (i = 0; i < sbi->s_ndevs; i++) {
405 			if (FDEV(i).start_blk <= blk_addr &&
406 			    FDEV(i).end_blk >= blk_addr) {
407 				blk_addr -= FDEV(i).start_blk;
408 				bdev = FDEV(i).bdev;
409 				break;
410 			}
411 		}
412 	}
413 
414 	if (sector)
415 		*sector = SECTOR_FROM_BLOCK(blk_addr);
416 	return bdev;
417 }
418 
419 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
420 {
421 	int i;
422 
423 	if (!f2fs_is_multi_device(sbi))
424 		return 0;
425 
426 	for (i = 0; i < sbi->s_ndevs; i++)
427 		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
428 			return i;
429 	return 0;
430 }
431 
432 static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
433 {
434 	unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
435 	unsigned int fua_flag, meta_flag, io_flag;
436 	blk_opf_t op_flags = 0;
437 
438 	if (fio->op != REQ_OP_WRITE)
439 		return 0;
440 	if (fio->type == DATA)
441 		io_flag = fio->sbi->data_io_flag;
442 	else if (fio->type == NODE)
443 		io_flag = fio->sbi->node_io_flag;
444 	else
445 		return 0;
446 
447 	fua_flag = io_flag & temp_mask;
448 	meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
449 
450 	/*
451 	 * data/node io flag bits per temp:
452 	 *      REQ_META     |      REQ_FUA      |
453 	 *    5 |    4 |   3 |    2 |    1 |   0 |
454 	 * Cold | Warm | Hot | Cold | Warm | Hot |
455 	 */
456 	if (BIT(fio->temp) & meta_flag)
457 		op_flags |= REQ_META;
458 	if (BIT(fio->temp) & fua_flag)
459 		op_flags |= REQ_FUA;
460 	return op_flags;
461 }
462 
463 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
464 {
465 	struct f2fs_sb_info *sbi = fio->sbi;
466 	struct block_device *bdev;
467 	sector_t sector;
468 	struct bio *bio;
469 
470 	bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
471 	bio = bio_alloc_bioset(bdev, npages,
472 				fio->op | fio->op_flags | f2fs_io_flags(fio),
473 				GFP_NOIO, &f2fs_bioset);
474 	bio->bi_iter.bi_sector = sector;
475 	if (is_read_io(fio->op)) {
476 		bio->bi_end_io = f2fs_read_end_io;
477 		bio->bi_private = NULL;
478 	} else {
479 		bio->bi_end_io = f2fs_write_end_io;
480 		bio->bi_private = sbi;
481 	}
482 	iostat_alloc_and_bind_ctx(sbi, bio, NULL);
483 
484 	if (fio->io_wbc)
485 		wbc_init_bio(fio->io_wbc, bio);
486 
487 	return bio;
488 }
489 
490 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
491 				  pgoff_t first_idx,
492 				  const struct f2fs_io_info *fio,
493 				  gfp_t gfp_mask)
494 {
495 	/*
496 	 * The f2fs garbage collector sets ->encrypted_page when it wants to
497 	 * read/write raw data without encryption.
498 	 */
499 	if (!fio || !fio->encrypted_page)
500 		fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
501 }
502 
503 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
504 				     pgoff_t next_idx,
505 				     const struct f2fs_io_info *fio)
506 {
507 	/*
508 	 * The f2fs garbage collector sets ->encrypted_page when it wants to
509 	 * read/write raw data without encryption.
510 	 */
511 	if (fio && fio->encrypted_page)
512 		return !bio_has_crypt_ctx(bio);
513 
514 	return fscrypt_mergeable_bio(bio, inode, next_idx);
515 }
516 
517 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
518 				 enum page_type type)
519 {
520 	WARN_ON_ONCE(!is_read_io(bio_op(bio)));
521 	trace_f2fs_submit_read_bio(sbi->sb, type, bio);
522 
523 	iostat_update_submit_ctx(bio, type);
524 	submit_bio(bio);
525 }
526 
527 static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio)
528 {
529 	unsigned int start =
530 		(bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi);
531 
532 	if (start == 0)
533 		return;
534 
535 	/* fill dummy pages */
536 	for (; start < F2FS_IO_SIZE(sbi); start++) {
537 		struct page *page =
538 			mempool_alloc(sbi->write_io_dummy,
539 				      GFP_NOIO | __GFP_NOFAIL);
540 		f2fs_bug_on(sbi, !page);
541 
542 		lock_page(page);
543 
544 		zero_user_segment(page, 0, PAGE_SIZE);
545 		set_page_private_dummy(page);
546 
547 		if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
548 			f2fs_bug_on(sbi, 1);
549 	}
550 }
551 
552 static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
553 				  enum page_type type)
554 {
555 	WARN_ON_ONCE(is_read_io(bio_op(bio)));
556 
557 	if (type == DATA || type == NODE) {
558 		if (f2fs_lfs_mode(sbi) && current->plug)
559 			blk_finish_plug(current->plug);
560 
561 		if (F2FS_IO_ALIGNED(sbi)) {
562 			f2fs_align_write_bio(sbi, bio);
563 			/*
564 			 * In the NODE case, we lose next block address chain.
565 			 * So, we need to do checkpoint in f2fs_sync_file.
566 			 */
567 			if (type == NODE)
568 				set_sbi_flag(sbi, SBI_NEED_CP);
569 		}
570 	}
571 
572 	trace_f2fs_submit_write_bio(sbi->sb, type, bio);
573 	iostat_update_submit_ctx(bio, type);
574 	submit_bio(bio);
575 }
576 
577 static void __submit_merged_bio(struct f2fs_bio_info *io)
578 {
579 	struct f2fs_io_info *fio = &io->fio;
580 
581 	if (!io->bio)
582 		return;
583 
584 	if (is_read_io(fio->op)) {
585 		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
586 		f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
587 	} else {
588 		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
589 		f2fs_submit_write_bio(io->sbi, io->bio, fio->type);
590 	}
591 	io->bio = NULL;
592 }
593 
594 static bool __has_merged_page(struct bio *bio, struct inode *inode,
595 						struct page *page, nid_t ino)
596 {
597 	struct bio_vec *bvec;
598 	struct bvec_iter_all iter_all;
599 
600 	if (!bio)
601 		return false;
602 
603 	if (!inode && !page && !ino)
604 		return true;
605 
606 	bio_for_each_segment_all(bvec, bio, iter_all) {
607 		struct page *target = bvec->bv_page;
608 
609 		if (fscrypt_is_bounce_page(target)) {
610 			target = fscrypt_pagecache_page(target);
611 			if (IS_ERR(target))
612 				continue;
613 		}
614 		if (f2fs_is_compressed_page(target)) {
615 			target = f2fs_compress_control_page(target);
616 			if (IS_ERR(target))
617 				continue;
618 		}
619 
620 		if (inode && inode == target->mapping->host)
621 			return true;
622 		if (page && page == target)
623 			return true;
624 		if (ino && ino == ino_of_node(target))
625 			return true;
626 	}
627 
628 	return false;
629 }
630 
631 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
632 {
633 	int i;
634 
635 	for (i = 0; i < NR_PAGE_TYPE; i++) {
636 		int n = (i == META) ? 1 : NR_TEMP_TYPE;
637 		int j;
638 
639 		sbi->write_io[i] = f2fs_kmalloc(sbi,
640 				array_size(n, sizeof(struct f2fs_bio_info)),
641 				GFP_KERNEL);
642 		if (!sbi->write_io[i])
643 			return -ENOMEM;
644 
645 		for (j = HOT; j < n; j++) {
646 			init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
647 			sbi->write_io[i][j].sbi = sbi;
648 			sbi->write_io[i][j].bio = NULL;
649 			spin_lock_init(&sbi->write_io[i][j].io_lock);
650 			INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
651 			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
652 			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
653 #ifdef CONFIG_BLK_DEV_ZONED
654 			init_completion(&sbi->write_io[i][j].zone_wait);
655 			sbi->write_io[i][j].zone_pending_bio = NULL;
656 			sbi->write_io[i][j].bi_private = NULL;
657 #endif
658 		}
659 	}
660 
661 	return 0;
662 }
663 
664 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
665 				enum page_type type, enum temp_type temp)
666 {
667 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
668 	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
669 
670 	f2fs_down_write(&io->io_rwsem);
671 
672 	if (!io->bio)
673 		goto unlock_out;
674 
675 	/* change META to META_FLUSH in the checkpoint procedure */
676 	if (type >= META_FLUSH) {
677 		io->fio.type = META_FLUSH;
678 		io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
679 		if (!test_opt(sbi, NOBARRIER))
680 			io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
681 	}
682 	__submit_merged_bio(io);
683 unlock_out:
684 	f2fs_up_write(&io->io_rwsem);
685 }
686 
687 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
688 				struct inode *inode, struct page *page,
689 				nid_t ino, enum page_type type, bool force)
690 {
691 	enum temp_type temp;
692 	bool ret = true;
693 
694 	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
695 		if (!force)	{
696 			enum page_type btype = PAGE_TYPE_OF_BIO(type);
697 			struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
698 
699 			f2fs_down_read(&io->io_rwsem);
700 			ret = __has_merged_page(io->bio, inode, page, ino);
701 			f2fs_up_read(&io->io_rwsem);
702 		}
703 		if (ret)
704 			__f2fs_submit_merged_write(sbi, type, temp);
705 
706 		/* TODO: use HOT temp only for meta pages now. */
707 		if (type >= META)
708 			break;
709 	}
710 }
711 
712 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
713 {
714 	__submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
715 }
716 
717 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
718 				struct inode *inode, struct page *page,
719 				nid_t ino, enum page_type type)
720 {
721 	__submit_merged_write_cond(sbi, inode, page, ino, type, false);
722 }
723 
724 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
725 {
726 	f2fs_submit_merged_write(sbi, DATA);
727 	f2fs_submit_merged_write(sbi, NODE);
728 	f2fs_submit_merged_write(sbi, META);
729 }
730 
731 /*
732  * Fill the locked page with data located in the block address.
733  * A caller needs to unlock the page on failure.
734  */
735 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
736 {
737 	struct bio *bio;
738 	struct page *page = fio->encrypted_page ?
739 			fio->encrypted_page : fio->page;
740 
741 	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
742 			fio->is_por ? META_POR : (__is_meta_io(fio) ?
743 			META_GENERIC : DATA_GENERIC_ENHANCE))) {
744 		f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
745 		return -EFSCORRUPTED;
746 	}
747 
748 	trace_f2fs_submit_page_bio(page, fio);
749 
750 	/* Allocate a new bio */
751 	bio = __bio_alloc(fio, 1);
752 
753 	f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
754 			       fio->page->index, fio, GFP_NOIO);
755 
756 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
757 		bio_put(bio);
758 		return -EFAULT;
759 	}
760 
761 	if (fio->io_wbc && !is_read_io(fio->op))
762 		wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
763 
764 	inc_page_count(fio->sbi, is_read_io(fio->op) ?
765 			__read_io_type(page) : WB_DATA_TYPE(fio->page));
766 
767 	if (is_read_io(bio_op(bio)))
768 		f2fs_submit_read_bio(fio->sbi, bio, fio->type);
769 	else
770 		f2fs_submit_write_bio(fio->sbi, bio, fio->type);
771 	return 0;
772 }
773 
774 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
775 				block_t last_blkaddr, block_t cur_blkaddr)
776 {
777 	if (unlikely(sbi->max_io_bytes &&
778 			bio->bi_iter.bi_size >= sbi->max_io_bytes))
779 		return false;
780 	if (last_blkaddr + 1 != cur_blkaddr)
781 		return false;
782 	return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
783 }
784 
785 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
786 						struct f2fs_io_info *fio)
787 {
788 	if (io->fio.op != fio->op)
789 		return false;
790 	return io->fio.op_flags == fio->op_flags;
791 }
792 
793 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
794 					struct f2fs_bio_info *io,
795 					struct f2fs_io_info *fio,
796 					block_t last_blkaddr,
797 					block_t cur_blkaddr)
798 {
799 	if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
800 		unsigned int filled_blocks =
801 				F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
802 		unsigned int io_size = F2FS_IO_SIZE(sbi);
803 		unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
804 
805 		/* IOs in bio is aligned and left space of vectors is not enough */
806 		if (!(filled_blocks % io_size) && left_vecs < io_size)
807 			return false;
808 	}
809 	if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
810 		return false;
811 	return io_type_is_mergeable(io, fio);
812 }
813 
814 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
815 				struct page *page, enum temp_type temp)
816 {
817 	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
818 	struct bio_entry *be;
819 
820 	be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
821 	be->bio = bio;
822 	bio_get(bio);
823 
824 	if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
825 		f2fs_bug_on(sbi, 1);
826 
827 	f2fs_down_write(&io->bio_list_lock);
828 	list_add_tail(&be->list, &io->bio_list);
829 	f2fs_up_write(&io->bio_list_lock);
830 }
831 
832 static void del_bio_entry(struct bio_entry *be)
833 {
834 	list_del(&be->list);
835 	kmem_cache_free(bio_entry_slab, be);
836 }
837 
838 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
839 							struct page *page)
840 {
841 	struct f2fs_sb_info *sbi = fio->sbi;
842 	enum temp_type temp;
843 	bool found = false;
844 	int ret = -EAGAIN;
845 
846 	for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
847 		struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
848 		struct list_head *head = &io->bio_list;
849 		struct bio_entry *be;
850 
851 		f2fs_down_write(&io->bio_list_lock);
852 		list_for_each_entry(be, head, list) {
853 			if (be->bio != *bio)
854 				continue;
855 
856 			found = true;
857 
858 			f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
859 							    *fio->last_block,
860 							    fio->new_blkaddr));
861 			if (f2fs_crypt_mergeable_bio(*bio,
862 					fio->page->mapping->host,
863 					fio->page->index, fio) &&
864 			    bio_add_page(*bio, page, PAGE_SIZE, 0) ==
865 					PAGE_SIZE) {
866 				ret = 0;
867 				break;
868 			}
869 
870 			/* page can't be merged into bio; submit the bio */
871 			del_bio_entry(be);
872 			f2fs_submit_write_bio(sbi, *bio, DATA);
873 			break;
874 		}
875 		f2fs_up_write(&io->bio_list_lock);
876 	}
877 
878 	if (ret) {
879 		bio_put(*bio);
880 		*bio = NULL;
881 	}
882 
883 	return ret;
884 }
885 
886 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
887 					struct bio **bio, struct page *page)
888 {
889 	enum temp_type temp;
890 	bool found = false;
891 	struct bio *target = bio ? *bio : NULL;
892 
893 	f2fs_bug_on(sbi, !target && !page);
894 
895 	for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
896 		struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
897 		struct list_head *head = &io->bio_list;
898 		struct bio_entry *be;
899 
900 		if (list_empty(head))
901 			continue;
902 
903 		f2fs_down_read(&io->bio_list_lock);
904 		list_for_each_entry(be, head, list) {
905 			if (target)
906 				found = (target == be->bio);
907 			else
908 				found = __has_merged_page(be->bio, NULL,
909 								page, 0);
910 			if (found)
911 				break;
912 		}
913 		f2fs_up_read(&io->bio_list_lock);
914 
915 		if (!found)
916 			continue;
917 
918 		found = false;
919 
920 		f2fs_down_write(&io->bio_list_lock);
921 		list_for_each_entry(be, head, list) {
922 			if (target)
923 				found = (target == be->bio);
924 			else
925 				found = __has_merged_page(be->bio, NULL,
926 								page, 0);
927 			if (found) {
928 				target = be->bio;
929 				del_bio_entry(be);
930 				break;
931 			}
932 		}
933 		f2fs_up_write(&io->bio_list_lock);
934 	}
935 
936 	if (found)
937 		f2fs_submit_write_bio(sbi, target, DATA);
938 	if (bio && *bio) {
939 		bio_put(*bio);
940 		*bio = NULL;
941 	}
942 }
943 
944 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
945 {
946 	struct bio *bio = *fio->bio;
947 	struct page *page = fio->encrypted_page ?
948 			fio->encrypted_page : fio->page;
949 
950 	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
951 			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) {
952 		f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
953 		return -EFSCORRUPTED;
954 	}
955 
956 	trace_f2fs_submit_page_bio(page, fio);
957 
958 	if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
959 						fio->new_blkaddr))
960 		f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
961 alloc_new:
962 	if (!bio) {
963 		bio = __bio_alloc(fio, BIO_MAX_VECS);
964 		f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
965 				       fio->page->index, fio, GFP_NOIO);
966 
967 		add_bio_entry(fio->sbi, bio, page, fio->temp);
968 	} else {
969 		if (add_ipu_page(fio, &bio, page))
970 			goto alloc_new;
971 	}
972 
973 	if (fio->io_wbc)
974 		wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
975 
976 	inc_page_count(fio->sbi, WB_DATA_TYPE(page));
977 
978 	*fio->last_block = fio->new_blkaddr;
979 	*fio->bio = bio;
980 
981 	return 0;
982 }
983 
984 #ifdef CONFIG_BLK_DEV_ZONED
985 static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
986 {
987 	int devi = 0;
988 
989 	if (f2fs_is_multi_device(sbi)) {
990 		devi = f2fs_target_device_index(sbi, blkaddr);
991 		if (blkaddr < FDEV(devi).start_blk ||
992 		    blkaddr > FDEV(devi).end_blk) {
993 			f2fs_err(sbi, "Invalid block %x", blkaddr);
994 			return false;
995 		}
996 		blkaddr -= FDEV(devi).start_blk;
997 	}
998 	return bdev_zoned_model(FDEV(devi).bdev) == BLK_ZONED_HM &&
999 		f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
1000 		(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
1001 }
1002 #endif
1003 
1004 void f2fs_submit_page_write(struct f2fs_io_info *fio)
1005 {
1006 	struct f2fs_sb_info *sbi = fio->sbi;
1007 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
1008 	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
1009 	struct page *bio_page;
1010 
1011 	f2fs_bug_on(sbi, is_read_io(fio->op));
1012 
1013 	f2fs_down_write(&io->io_rwsem);
1014 
1015 #ifdef CONFIG_BLK_DEV_ZONED
1016 	if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
1017 		wait_for_completion_io(&io->zone_wait);
1018 		bio_put(io->zone_pending_bio);
1019 		io->zone_pending_bio = NULL;
1020 		io->bi_private = NULL;
1021 	}
1022 #endif
1023 
1024 next:
1025 	if (fio->in_list) {
1026 		spin_lock(&io->io_lock);
1027 		if (list_empty(&io->io_list)) {
1028 			spin_unlock(&io->io_lock);
1029 			goto out;
1030 		}
1031 		fio = list_first_entry(&io->io_list,
1032 						struct f2fs_io_info, list);
1033 		list_del(&fio->list);
1034 		spin_unlock(&io->io_lock);
1035 	}
1036 
1037 	verify_fio_blkaddr(fio);
1038 
1039 	if (fio->encrypted_page)
1040 		bio_page = fio->encrypted_page;
1041 	else if (fio->compressed_page)
1042 		bio_page = fio->compressed_page;
1043 	else
1044 		bio_page = fio->page;
1045 
1046 	/* set submitted = true as a return value */
1047 	fio->submitted = 1;
1048 
1049 	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
1050 
1051 	if (io->bio &&
1052 	    (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
1053 			      fio->new_blkaddr) ||
1054 	     !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
1055 				       bio_page->index, fio)))
1056 		__submit_merged_bio(io);
1057 alloc_new:
1058 	if (io->bio == NULL) {
1059 		if (F2FS_IO_ALIGNED(sbi) &&
1060 				(fio->type == DATA || fio->type == NODE) &&
1061 				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
1062 			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
1063 			fio->retry = 1;
1064 			goto skip;
1065 		}
1066 		io->bio = __bio_alloc(fio, BIO_MAX_VECS);
1067 		f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
1068 				       bio_page->index, fio, GFP_NOIO);
1069 		io->fio = *fio;
1070 	}
1071 
1072 	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
1073 		__submit_merged_bio(io);
1074 		goto alloc_new;
1075 	}
1076 
1077 	if (fio->io_wbc)
1078 		wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
1079 
1080 	io->last_block_in_bio = fio->new_blkaddr;
1081 
1082 	trace_f2fs_submit_page_write(fio->page, fio);
1083 skip:
1084 	if (fio->in_list)
1085 		goto next;
1086 out:
1087 #ifdef CONFIG_BLK_DEV_ZONED
1088 	if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
1089 			is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
1090 		bio_get(io->bio);
1091 		reinit_completion(&io->zone_wait);
1092 		io->bi_private = io->bio->bi_private;
1093 		io->bio->bi_private = io;
1094 		io->bio->bi_end_io = f2fs_zone_write_end_io;
1095 		io->zone_pending_bio = io->bio;
1096 		__submit_merged_bio(io);
1097 	}
1098 #endif
1099 	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1100 				!f2fs_is_checkpoint_ready(sbi))
1101 		__submit_merged_bio(io);
1102 	f2fs_up_write(&io->io_rwsem);
1103 }
1104 
1105 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
1106 				      unsigned nr_pages, blk_opf_t op_flag,
1107 				      pgoff_t first_idx, bool for_write)
1108 {
1109 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1110 	struct bio *bio;
1111 	struct bio_post_read_ctx *ctx = NULL;
1112 	unsigned int post_read_steps = 0;
1113 	sector_t sector;
1114 	struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
1115 
1116 	bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
1117 			       REQ_OP_READ | op_flag,
1118 			       for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
1119 	if (!bio)
1120 		return ERR_PTR(-ENOMEM);
1121 	bio->bi_iter.bi_sector = sector;
1122 	f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1123 	bio->bi_end_io = f2fs_read_end_io;
1124 
1125 	if (fscrypt_inode_uses_fs_layer_crypto(inode))
1126 		post_read_steps |= STEP_DECRYPT;
1127 
1128 	if (f2fs_need_verity(inode, first_idx))
1129 		post_read_steps |= STEP_VERITY;
1130 
1131 	/*
1132 	 * STEP_DECOMPRESS is handled specially, since a compressed file might
1133 	 * contain both compressed and uncompressed clusters.  We'll allocate a
1134 	 * bio_post_read_ctx if the file is compressed, but the caller is
1135 	 * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1136 	 */
1137 
1138 	if (post_read_steps || f2fs_compressed_file(inode)) {
1139 		/* Due to the mempool, this never fails. */
1140 		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1141 		ctx->bio = bio;
1142 		ctx->sbi = sbi;
1143 		ctx->enabled_steps = post_read_steps;
1144 		ctx->fs_blkaddr = blkaddr;
1145 		ctx->decompression_attempted = false;
1146 		bio->bi_private = ctx;
1147 	}
1148 	iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1149 
1150 	return bio;
1151 }
1152 
1153 /* This can handle encryption stuffs */
1154 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1155 				 block_t blkaddr, blk_opf_t op_flags,
1156 				 bool for_write)
1157 {
1158 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1159 	struct bio *bio;
1160 
1161 	bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1162 					page->index, for_write);
1163 	if (IS_ERR(bio))
1164 		return PTR_ERR(bio);
1165 
1166 	/* wait for GCed page writeback via META_MAPPING */
1167 	f2fs_wait_on_block_writeback(inode, blkaddr);
1168 
1169 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1170 		bio_put(bio);
1171 		return -EFAULT;
1172 	}
1173 	inc_page_count(sbi, F2FS_RD_DATA);
1174 	f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
1175 	f2fs_submit_read_bio(sbi, bio, DATA);
1176 	return 0;
1177 }
1178 
1179 static void __set_data_blkaddr(struct dnode_of_data *dn)
1180 {
1181 	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
1182 	__le32 *addr_array;
1183 	int base = 0;
1184 
1185 	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
1186 		base = get_extra_isize(dn->inode);
1187 
1188 	/* Get physical address of data block */
1189 	addr_array = blkaddr_in_node(rn);
1190 	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1191 }
1192 
1193 /*
1194  * Lock ordering for the change of data block address:
1195  * ->data_page
1196  *  ->node_page
1197  *    update block addresses in the node page
1198  */
1199 void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
1200 {
1201 	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1202 	__set_data_blkaddr(dn);
1203 	if (set_page_dirty(dn->node_page))
1204 		dn->node_changed = true;
1205 }
1206 
1207 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1208 {
1209 	dn->data_blkaddr = blkaddr;
1210 	f2fs_set_data_blkaddr(dn);
1211 	f2fs_update_read_extent_cache(dn);
1212 }
1213 
1214 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
1215 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1216 {
1217 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1218 	int err;
1219 
1220 	if (!count)
1221 		return 0;
1222 
1223 	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1224 		return -EPERM;
1225 	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1226 		return err;
1227 
1228 	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1229 						dn->ofs_in_node, count);
1230 
1231 	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1232 
1233 	for (; count > 0; dn->ofs_in_node++) {
1234 		block_t blkaddr = f2fs_data_blkaddr(dn);
1235 
1236 		if (blkaddr == NULL_ADDR) {
1237 			dn->data_blkaddr = NEW_ADDR;
1238 			__set_data_blkaddr(dn);
1239 			count--;
1240 		}
1241 	}
1242 
1243 	if (set_page_dirty(dn->node_page))
1244 		dn->node_changed = true;
1245 	return 0;
1246 }
1247 
1248 /* Should keep dn->ofs_in_node unchanged */
1249 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1250 {
1251 	unsigned int ofs_in_node = dn->ofs_in_node;
1252 	int ret;
1253 
1254 	ret = f2fs_reserve_new_blocks(dn, 1);
1255 	dn->ofs_in_node = ofs_in_node;
1256 	return ret;
1257 }
1258 
1259 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1260 {
1261 	bool need_put = dn->inode_page ? false : true;
1262 	int err;
1263 
1264 	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1265 	if (err)
1266 		return err;
1267 
1268 	if (dn->data_blkaddr == NULL_ADDR)
1269 		err = f2fs_reserve_new_block(dn);
1270 	if (err || need_put)
1271 		f2fs_put_dnode(dn);
1272 	return err;
1273 }
1274 
1275 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1276 				     blk_opf_t op_flags, bool for_write,
1277 				     pgoff_t *next_pgofs)
1278 {
1279 	struct address_space *mapping = inode->i_mapping;
1280 	struct dnode_of_data dn;
1281 	struct page *page;
1282 	int err;
1283 
1284 	page = f2fs_grab_cache_page(mapping, index, for_write);
1285 	if (!page)
1286 		return ERR_PTR(-ENOMEM);
1287 
1288 	if (f2fs_lookup_read_extent_cache_block(inode, index,
1289 						&dn.data_blkaddr)) {
1290 		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1291 						DATA_GENERIC_ENHANCE_READ)) {
1292 			err = -EFSCORRUPTED;
1293 			f2fs_handle_error(F2FS_I_SB(inode),
1294 						ERROR_INVALID_BLKADDR);
1295 			goto put_err;
1296 		}
1297 		goto got_it;
1298 	}
1299 
1300 	set_new_dnode(&dn, inode, NULL, NULL, 0);
1301 	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1302 	if (err) {
1303 		if (err == -ENOENT && next_pgofs)
1304 			*next_pgofs = f2fs_get_next_page_offset(&dn, index);
1305 		goto put_err;
1306 	}
1307 	f2fs_put_dnode(&dn);
1308 
1309 	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1310 		err = -ENOENT;
1311 		if (next_pgofs)
1312 			*next_pgofs = index + 1;
1313 		goto put_err;
1314 	}
1315 	if (dn.data_blkaddr != NEW_ADDR &&
1316 			!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1317 						dn.data_blkaddr,
1318 						DATA_GENERIC_ENHANCE)) {
1319 		err = -EFSCORRUPTED;
1320 		f2fs_handle_error(F2FS_I_SB(inode),
1321 					ERROR_INVALID_BLKADDR);
1322 		goto put_err;
1323 	}
1324 got_it:
1325 	if (PageUptodate(page)) {
1326 		unlock_page(page);
1327 		return page;
1328 	}
1329 
1330 	/*
1331 	 * A new dentry page is allocated but not able to be written, since its
1332 	 * new inode page couldn't be allocated due to -ENOSPC.
1333 	 * In such the case, its blkaddr can be remained as NEW_ADDR.
1334 	 * see, f2fs_add_link -> f2fs_get_new_data_page ->
1335 	 * f2fs_init_inode_metadata.
1336 	 */
1337 	if (dn.data_blkaddr == NEW_ADDR) {
1338 		zero_user_segment(page, 0, PAGE_SIZE);
1339 		if (!PageUptodate(page))
1340 			SetPageUptodate(page);
1341 		unlock_page(page);
1342 		return page;
1343 	}
1344 
1345 	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
1346 						op_flags, for_write);
1347 	if (err)
1348 		goto put_err;
1349 	return page;
1350 
1351 put_err:
1352 	f2fs_put_page(page, 1);
1353 	return ERR_PTR(err);
1354 }
1355 
1356 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index,
1357 					pgoff_t *next_pgofs)
1358 {
1359 	struct address_space *mapping = inode->i_mapping;
1360 	struct page *page;
1361 
1362 	page = find_get_page(mapping, index);
1363 	if (page && PageUptodate(page))
1364 		return page;
1365 	f2fs_put_page(page, 0);
1366 
1367 	page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs);
1368 	if (IS_ERR(page))
1369 		return page;
1370 
1371 	if (PageUptodate(page))
1372 		return page;
1373 
1374 	wait_on_page_locked(page);
1375 	if (unlikely(!PageUptodate(page))) {
1376 		f2fs_put_page(page, 0);
1377 		return ERR_PTR(-EIO);
1378 	}
1379 	return page;
1380 }
1381 
1382 /*
1383  * If it tries to access a hole, return an error.
1384  * Because, the callers, functions in dir.c and GC, should be able to know
1385  * whether this page exists or not.
1386  */
1387 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1388 							bool for_write)
1389 {
1390 	struct address_space *mapping = inode->i_mapping;
1391 	struct page *page;
1392 repeat:
1393 	page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL);
1394 	if (IS_ERR(page))
1395 		return page;
1396 
1397 	/* wait for read completion */
1398 	lock_page(page);
1399 	if (unlikely(page->mapping != mapping)) {
1400 		f2fs_put_page(page, 1);
1401 		goto repeat;
1402 	}
1403 	if (unlikely(!PageUptodate(page))) {
1404 		f2fs_put_page(page, 1);
1405 		return ERR_PTR(-EIO);
1406 	}
1407 	return page;
1408 }
1409 
1410 /*
1411  * Caller ensures that this data page is never allocated.
1412  * A new zero-filled data page is allocated in the page cache.
1413  *
1414  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1415  * f2fs_unlock_op().
1416  * Note that, ipage is set only by make_empty_dir, and if any error occur,
1417  * ipage should be released by this function.
1418  */
1419 struct page *f2fs_get_new_data_page(struct inode *inode,
1420 		struct page *ipage, pgoff_t index, bool new_i_size)
1421 {
1422 	struct address_space *mapping = inode->i_mapping;
1423 	struct page *page;
1424 	struct dnode_of_data dn;
1425 	int err;
1426 
1427 	page = f2fs_grab_cache_page(mapping, index, true);
1428 	if (!page) {
1429 		/*
1430 		 * before exiting, we should make sure ipage will be released
1431 		 * if any error occur.
1432 		 */
1433 		f2fs_put_page(ipage, 1);
1434 		return ERR_PTR(-ENOMEM);
1435 	}
1436 
1437 	set_new_dnode(&dn, inode, ipage, NULL, 0);
1438 	err = f2fs_reserve_block(&dn, index);
1439 	if (err) {
1440 		f2fs_put_page(page, 1);
1441 		return ERR_PTR(err);
1442 	}
1443 	if (!ipage)
1444 		f2fs_put_dnode(&dn);
1445 
1446 	if (PageUptodate(page))
1447 		goto got_it;
1448 
1449 	if (dn.data_blkaddr == NEW_ADDR) {
1450 		zero_user_segment(page, 0, PAGE_SIZE);
1451 		if (!PageUptodate(page))
1452 			SetPageUptodate(page);
1453 	} else {
1454 		f2fs_put_page(page, 1);
1455 
1456 		/* if ipage exists, blkaddr should be NEW_ADDR */
1457 		f2fs_bug_on(F2FS_I_SB(inode), ipage);
1458 		page = f2fs_get_lock_data_page(inode, index, true);
1459 		if (IS_ERR(page))
1460 			return page;
1461 	}
1462 got_it:
1463 	if (new_i_size && i_size_read(inode) <
1464 				((loff_t)(index + 1) << PAGE_SHIFT))
1465 		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1466 	return page;
1467 }
1468 
1469 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1470 {
1471 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1472 	struct f2fs_summary sum;
1473 	struct node_info ni;
1474 	block_t old_blkaddr;
1475 	blkcnt_t count = 1;
1476 	int err;
1477 
1478 	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1479 		return -EPERM;
1480 
1481 	err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
1482 	if (err)
1483 		return err;
1484 
1485 	dn->data_blkaddr = f2fs_data_blkaddr(dn);
1486 	if (dn->data_blkaddr == NULL_ADDR) {
1487 		err = inc_valid_block_count(sbi, dn->inode, &count);
1488 		if (unlikely(err))
1489 			return err;
1490 	}
1491 
1492 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1493 	old_blkaddr = dn->data_blkaddr;
1494 	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1495 				&sum, seg_type, NULL);
1496 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
1497 		invalidate_mapping_pages(META_MAPPING(sbi),
1498 					old_blkaddr, old_blkaddr);
1499 		f2fs_invalidate_compress_page(sbi, old_blkaddr);
1500 	}
1501 	f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1502 	return 0;
1503 }
1504 
1505 static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag)
1506 {
1507 	if (flag == F2FS_GET_BLOCK_PRE_AIO)
1508 		f2fs_down_read(&sbi->node_change);
1509 	else
1510 		f2fs_lock_op(sbi);
1511 }
1512 
1513 static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag)
1514 {
1515 	if (flag == F2FS_GET_BLOCK_PRE_AIO)
1516 		f2fs_up_read(&sbi->node_change);
1517 	else
1518 		f2fs_unlock_op(sbi);
1519 }
1520 
1521 int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
1522 {
1523 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1524 	int err = 0;
1525 
1526 	f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
1527 	if (!f2fs_lookup_read_extent_cache_block(dn->inode, index,
1528 						&dn->data_blkaddr))
1529 		err = f2fs_reserve_block(dn, index);
1530 	f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
1531 
1532 	return err;
1533 }
1534 
1535 static int f2fs_map_no_dnode(struct inode *inode,
1536 		struct f2fs_map_blocks *map, struct dnode_of_data *dn,
1537 		pgoff_t pgoff)
1538 {
1539 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1540 
1541 	/*
1542 	 * There is one exceptional case that read_node_page() may return
1543 	 * -ENOENT due to filesystem has been shutdown or cp_error, return
1544 	 * -EIO in that case.
1545 	 */
1546 	if (map->m_may_create &&
1547 	    (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi)))
1548 		return -EIO;
1549 
1550 	if (map->m_next_pgofs)
1551 		*map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
1552 	if (map->m_next_extent)
1553 		*map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
1554 	return 0;
1555 }
1556 
1557 static bool f2fs_map_blocks_cached(struct inode *inode,
1558 		struct f2fs_map_blocks *map, int flag)
1559 {
1560 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1561 	unsigned int maxblocks = map->m_len;
1562 	pgoff_t pgoff = (pgoff_t)map->m_lblk;
1563 	struct extent_info ei = {};
1564 
1565 	if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei))
1566 		return false;
1567 
1568 	map->m_pblk = ei.blk + pgoff - ei.fofs;
1569 	map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
1570 	map->m_flags = F2FS_MAP_MAPPED;
1571 	if (map->m_next_extent)
1572 		*map->m_next_extent = pgoff + map->m_len;
1573 
1574 	/* for hardware encryption, but to avoid potential issue in future */
1575 	if (flag == F2FS_GET_BLOCK_DIO)
1576 		f2fs_wait_on_block_writeback_range(inode,
1577 					map->m_pblk, map->m_len);
1578 
1579 	if (f2fs_allow_multi_device_dio(sbi, flag)) {
1580 		int bidx = f2fs_target_device_index(sbi, map->m_pblk);
1581 		struct f2fs_dev_info *dev = &sbi->devs[bidx];
1582 
1583 		map->m_bdev = dev->bdev;
1584 		map->m_pblk -= dev->start_blk;
1585 		map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
1586 	} else {
1587 		map->m_bdev = inode->i_sb->s_bdev;
1588 	}
1589 	return true;
1590 }
1591 
1592 /*
1593  * f2fs_map_blocks() tries to find or build mapping relationship which
1594  * maps continuous logical blocks to physical blocks, and return such
1595  * info via f2fs_map_blocks structure.
1596  */
1597 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
1598 {
1599 	unsigned int maxblocks = map->m_len;
1600 	struct dnode_of_data dn;
1601 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1602 	int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1603 	pgoff_t pgofs, end_offset, end;
1604 	int err = 0, ofs = 1;
1605 	unsigned int ofs_in_node, last_ofs_in_node;
1606 	blkcnt_t prealloc;
1607 	block_t blkaddr;
1608 	unsigned int start_pgofs;
1609 	int bidx = 0;
1610 	bool is_hole;
1611 
1612 	if (!maxblocks)
1613 		return 0;
1614 
1615 	if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
1616 		goto out;
1617 
1618 	map->m_bdev = inode->i_sb->s_bdev;
1619 	map->m_multidev_dio =
1620 		f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1621 
1622 	map->m_len = 0;
1623 	map->m_flags = 0;
1624 
1625 	/* it only supports block size == page size */
1626 	pgofs =	(pgoff_t)map->m_lblk;
1627 	end = pgofs + maxblocks;
1628 
1629 next_dnode:
1630 	if (map->m_may_create)
1631 		f2fs_map_lock(sbi, flag);
1632 
1633 	/* When reading holes, we need its node page */
1634 	set_new_dnode(&dn, inode, NULL, NULL, 0);
1635 	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1636 	if (err) {
1637 		if (flag == F2FS_GET_BLOCK_BMAP)
1638 			map->m_pblk = 0;
1639 		if (err == -ENOENT)
1640 			err = f2fs_map_no_dnode(inode, map, &dn, pgofs);
1641 		goto unlock_out;
1642 	}
1643 
1644 	start_pgofs = pgofs;
1645 	prealloc = 0;
1646 	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1647 	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1648 
1649 next_block:
1650 	blkaddr = f2fs_data_blkaddr(&dn);
1651 	is_hole = !__is_valid_data_blkaddr(blkaddr);
1652 	if (!is_hole &&
1653 	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1654 		err = -EFSCORRUPTED;
1655 		f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
1656 		goto sync_out;
1657 	}
1658 
1659 	/* use out-place-update for direct IO under LFS mode */
1660 	if (map->m_may_create &&
1661 	    (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
1662 		if (unlikely(f2fs_cp_error(sbi))) {
1663 			err = -EIO;
1664 			goto sync_out;
1665 		}
1666 
1667 		switch (flag) {
1668 		case F2FS_GET_BLOCK_PRE_AIO:
1669 			if (blkaddr == NULL_ADDR) {
1670 				prealloc++;
1671 				last_ofs_in_node = dn.ofs_in_node;
1672 			}
1673 			break;
1674 		case F2FS_GET_BLOCK_PRE_DIO:
1675 		case F2FS_GET_BLOCK_DIO:
1676 			err = __allocate_data_block(&dn, map->m_seg_type);
1677 			if (err)
1678 				goto sync_out;
1679 			if (flag == F2FS_GET_BLOCK_PRE_DIO)
1680 				file_need_truncate(inode);
1681 			set_inode_flag(inode, FI_APPEND_WRITE);
1682 			break;
1683 		default:
1684 			WARN_ON_ONCE(1);
1685 			err = -EIO;
1686 			goto sync_out;
1687 		}
1688 
1689 		blkaddr = dn.data_blkaddr;
1690 		if (is_hole)
1691 			map->m_flags |= F2FS_MAP_NEW;
1692 	} else if (is_hole) {
1693 		if (f2fs_compressed_file(inode) &&
1694 		    f2fs_sanity_check_cluster(&dn) &&
1695 		    (flag != F2FS_GET_BLOCK_FIEMAP ||
1696 		     IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
1697 			err = -EFSCORRUPTED;
1698 			f2fs_handle_error(sbi,
1699 					ERROR_CORRUPTED_CLUSTER);
1700 			goto sync_out;
1701 		}
1702 
1703 		switch (flag) {
1704 		case F2FS_GET_BLOCK_PRECACHE:
1705 			goto sync_out;
1706 		case F2FS_GET_BLOCK_BMAP:
1707 			map->m_pblk = 0;
1708 			goto sync_out;
1709 		case F2FS_GET_BLOCK_FIEMAP:
1710 			if (blkaddr == NULL_ADDR) {
1711 				if (map->m_next_pgofs)
1712 					*map->m_next_pgofs = pgofs + 1;
1713 				goto sync_out;
1714 			}
1715 			break;
1716 		default:
1717 			/* for defragment case */
1718 			if (map->m_next_pgofs)
1719 				*map->m_next_pgofs = pgofs + 1;
1720 			goto sync_out;
1721 		}
1722 	}
1723 
1724 	if (flag == F2FS_GET_BLOCK_PRE_AIO)
1725 		goto skip;
1726 
1727 	if (map->m_multidev_dio)
1728 		bidx = f2fs_target_device_index(sbi, blkaddr);
1729 
1730 	if (map->m_len == 0) {
1731 		/* reserved delalloc block should be mapped for fiemap. */
1732 		if (blkaddr == NEW_ADDR)
1733 			map->m_flags |= F2FS_MAP_DELALLOC;
1734 		map->m_flags |= F2FS_MAP_MAPPED;
1735 
1736 		map->m_pblk = blkaddr;
1737 		map->m_len = 1;
1738 
1739 		if (map->m_multidev_dio)
1740 			map->m_bdev = FDEV(bidx).bdev;
1741 	} else if ((map->m_pblk != NEW_ADDR &&
1742 			blkaddr == (map->m_pblk + ofs)) ||
1743 			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1744 			flag == F2FS_GET_BLOCK_PRE_DIO) {
1745 		if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1746 			goto sync_out;
1747 		ofs++;
1748 		map->m_len++;
1749 	} else {
1750 		goto sync_out;
1751 	}
1752 
1753 skip:
1754 	dn.ofs_in_node++;
1755 	pgofs++;
1756 
1757 	/* preallocate blocks in batch for one dnode page */
1758 	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1759 			(pgofs == end || dn.ofs_in_node == end_offset)) {
1760 
1761 		dn.ofs_in_node = ofs_in_node;
1762 		err = f2fs_reserve_new_blocks(&dn, prealloc);
1763 		if (err)
1764 			goto sync_out;
1765 
1766 		map->m_len += dn.ofs_in_node - ofs_in_node;
1767 		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1768 			err = -ENOSPC;
1769 			goto sync_out;
1770 		}
1771 		dn.ofs_in_node = end_offset;
1772 	}
1773 
1774 	if (pgofs >= end)
1775 		goto sync_out;
1776 	else if (dn.ofs_in_node < end_offset)
1777 		goto next_block;
1778 
1779 	if (flag == F2FS_GET_BLOCK_PRECACHE) {
1780 		if (map->m_flags & F2FS_MAP_MAPPED) {
1781 			unsigned int ofs = start_pgofs - map->m_lblk;
1782 
1783 			f2fs_update_read_extent_cache_range(&dn,
1784 				start_pgofs, map->m_pblk + ofs,
1785 				map->m_len - ofs);
1786 		}
1787 	}
1788 
1789 	f2fs_put_dnode(&dn);
1790 
1791 	if (map->m_may_create) {
1792 		f2fs_map_unlock(sbi, flag);
1793 		f2fs_balance_fs(sbi, dn.node_changed);
1794 	}
1795 	goto next_dnode;
1796 
1797 sync_out:
1798 
1799 	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1800 		/*
1801 		 * for hardware encryption, but to avoid potential issue
1802 		 * in future
1803 		 */
1804 		f2fs_wait_on_block_writeback_range(inode,
1805 						map->m_pblk, map->m_len);
1806 
1807 		if (map->m_multidev_dio) {
1808 			block_t blk_addr = map->m_pblk;
1809 
1810 			bidx = f2fs_target_device_index(sbi, map->m_pblk);
1811 
1812 			map->m_bdev = FDEV(bidx).bdev;
1813 			map->m_pblk -= FDEV(bidx).start_blk;
1814 
1815 			if (map->m_may_create)
1816 				f2fs_update_device_state(sbi, inode->i_ino,
1817 							blk_addr, map->m_len);
1818 
1819 			f2fs_bug_on(sbi, blk_addr + map->m_len >
1820 						FDEV(bidx).end_blk + 1);
1821 		}
1822 	}
1823 
1824 	if (flag == F2FS_GET_BLOCK_PRECACHE) {
1825 		if (map->m_flags & F2FS_MAP_MAPPED) {
1826 			unsigned int ofs = start_pgofs - map->m_lblk;
1827 
1828 			f2fs_update_read_extent_cache_range(&dn,
1829 				start_pgofs, map->m_pblk + ofs,
1830 				map->m_len - ofs);
1831 		}
1832 		if (map->m_next_extent)
1833 			*map->m_next_extent = pgofs + 1;
1834 	}
1835 	f2fs_put_dnode(&dn);
1836 unlock_out:
1837 	if (map->m_may_create) {
1838 		f2fs_map_unlock(sbi, flag);
1839 		f2fs_balance_fs(sbi, dn.node_changed);
1840 	}
1841 out:
1842 	trace_f2fs_map_blocks(inode, map, flag, err);
1843 	return err;
1844 }
1845 
1846 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1847 {
1848 	struct f2fs_map_blocks map;
1849 	block_t last_lblk;
1850 	int err;
1851 
1852 	if (pos + len > i_size_read(inode))
1853 		return false;
1854 
1855 	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1856 	map.m_next_pgofs = NULL;
1857 	map.m_next_extent = NULL;
1858 	map.m_seg_type = NO_CHECK_TYPE;
1859 	map.m_may_create = false;
1860 	last_lblk = F2FS_BLK_ALIGN(pos + len);
1861 
1862 	while (map.m_lblk < last_lblk) {
1863 		map.m_len = last_lblk - map.m_lblk;
1864 		err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
1865 		if (err || map.m_len == 0)
1866 			return false;
1867 		map.m_lblk += map.m_len;
1868 	}
1869 	return true;
1870 }
1871 
1872 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1873 {
1874 	return (bytes >> inode->i_blkbits);
1875 }
1876 
1877 static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1878 {
1879 	return (blks << inode->i_blkbits);
1880 }
1881 
1882 static int f2fs_xattr_fiemap(struct inode *inode,
1883 				struct fiemap_extent_info *fieinfo)
1884 {
1885 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1886 	struct page *page;
1887 	struct node_info ni;
1888 	__u64 phys = 0, len;
1889 	__u32 flags;
1890 	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1891 	int err = 0;
1892 
1893 	if (f2fs_has_inline_xattr(inode)) {
1894 		int offset;
1895 
1896 		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1897 						inode->i_ino, false);
1898 		if (!page)
1899 			return -ENOMEM;
1900 
1901 		err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
1902 		if (err) {
1903 			f2fs_put_page(page, 1);
1904 			return err;
1905 		}
1906 
1907 		phys = blks_to_bytes(inode, ni.blk_addr);
1908 		offset = offsetof(struct f2fs_inode, i_addr) +
1909 					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1910 					get_inline_xattr_addrs(inode));
1911 
1912 		phys += offset;
1913 		len = inline_xattr_size(inode);
1914 
1915 		f2fs_put_page(page, 1);
1916 
1917 		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1918 
1919 		if (!xnid)
1920 			flags |= FIEMAP_EXTENT_LAST;
1921 
1922 		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1923 		trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1924 		if (err)
1925 			return err;
1926 	}
1927 
1928 	if (xnid) {
1929 		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1930 		if (!page)
1931 			return -ENOMEM;
1932 
1933 		err = f2fs_get_node_info(sbi, xnid, &ni, false);
1934 		if (err) {
1935 			f2fs_put_page(page, 1);
1936 			return err;
1937 		}
1938 
1939 		phys = blks_to_bytes(inode, ni.blk_addr);
1940 		len = inode->i_sb->s_blocksize;
1941 
1942 		f2fs_put_page(page, 1);
1943 
1944 		flags = FIEMAP_EXTENT_LAST;
1945 	}
1946 
1947 	if (phys) {
1948 		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1949 		trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1950 	}
1951 
1952 	return (err < 0 ? err : 0);
1953 }
1954 
1955 static loff_t max_inode_blocks(struct inode *inode)
1956 {
1957 	loff_t result = ADDRS_PER_INODE(inode);
1958 	loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1959 
1960 	/* two direct node blocks */
1961 	result += (leaf_count * 2);
1962 
1963 	/* two indirect node blocks */
1964 	leaf_count *= NIDS_PER_BLOCK;
1965 	result += (leaf_count * 2);
1966 
1967 	/* one double indirect node block */
1968 	leaf_count *= NIDS_PER_BLOCK;
1969 	result += leaf_count;
1970 
1971 	return result;
1972 }
1973 
1974 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1975 		u64 start, u64 len)
1976 {
1977 	struct f2fs_map_blocks map;
1978 	sector_t start_blk, last_blk;
1979 	pgoff_t next_pgofs;
1980 	u64 logical = 0, phys = 0, size = 0;
1981 	u32 flags = 0;
1982 	int ret = 0;
1983 	bool compr_cluster = false, compr_appended;
1984 	unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1985 	unsigned int count_in_cluster = 0;
1986 	loff_t maxbytes;
1987 
1988 	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1989 		ret = f2fs_precache_extents(inode);
1990 		if (ret)
1991 			return ret;
1992 	}
1993 
1994 	ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
1995 	if (ret)
1996 		return ret;
1997 
1998 	inode_lock(inode);
1999 
2000 	maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
2001 	if (start > maxbytes) {
2002 		ret = -EFBIG;
2003 		goto out;
2004 	}
2005 
2006 	if (len > maxbytes || (maxbytes - len) < start)
2007 		len = maxbytes - start;
2008 
2009 	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
2010 		ret = f2fs_xattr_fiemap(inode, fieinfo);
2011 		goto out;
2012 	}
2013 
2014 	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
2015 		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
2016 		if (ret != -EAGAIN)
2017 			goto out;
2018 	}
2019 
2020 	if (bytes_to_blks(inode, len) == 0)
2021 		len = blks_to_bytes(inode, 1);
2022 
2023 	start_blk = bytes_to_blks(inode, start);
2024 	last_blk = bytes_to_blks(inode, start + len - 1);
2025 
2026 next:
2027 	memset(&map, 0, sizeof(map));
2028 	map.m_lblk = start_blk;
2029 	map.m_len = bytes_to_blks(inode, len);
2030 	map.m_next_pgofs = &next_pgofs;
2031 	map.m_seg_type = NO_CHECK_TYPE;
2032 
2033 	if (compr_cluster) {
2034 		map.m_lblk += 1;
2035 		map.m_len = cluster_size - count_in_cluster;
2036 	}
2037 
2038 	ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
2039 	if (ret)
2040 		goto out;
2041 
2042 	/* HOLE */
2043 	if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
2044 		start_blk = next_pgofs;
2045 
2046 		if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
2047 						max_inode_blocks(inode)))
2048 			goto prep_next;
2049 
2050 		flags |= FIEMAP_EXTENT_LAST;
2051 	}
2052 
2053 	compr_appended = false;
2054 	/* In a case of compressed cluster, append this to the last extent */
2055 	if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
2056 			!(map.m_flags & F2FS_MAP_FLAGS))) {
2057 		compr_appended = true;
2058 		goto skip_fill;
2059 	}
2060 
2061 	if (size) {
2062 		flags |= FIEMAP_EXTENT_MERGED;
2063 		if (IS_ENCRYPTED(inode))
2064 			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
2065 
2066 		ret = fiemap_fill_next_extent(fieinfo, logical,
2067 				phys, size, flags);
2068 		trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
2069 		if (ret)
2070 			goto out;
2071 		size = 0;
2072 	}
2073 
2074 	if (start_blk > last_blk)
2075 		goto out;
2076 
2077 skip_fill:
2078 	if (map.m_pblk == COMPRESS_ADDR) {
2079 		compr_cluster = true;
2080 		count_in_cluster = 1;
2081 	} else if (compr_appended) {
2082 		unsigned int appended_blks = cluster_size -
2083 						count_in_cluster + 1;
2084 		size += blks_to_bytes(inode, appended_blks);
2085 		start_blk += appended_blks;
2086 		compr_cluster = false;
2087 	} else {
2088 		logical = blks_to_bytes(inode, start_blk);
2089 		phys = __is_valid_data_blkaddr(map.m_pblk) ?
2090 			blks_to_bytes(inode, map.m_pblk) : 0;
2091 		size = blks_to_bytes(inode, map.m_len);
2092 		flags = 0;
2093 
2094 		if (compr_cluster) {
2095 			flags = FIEMAP_EXTENT_ENCODED;
2096 			count_in_cluster += map.m_len;
2097 			if (count_in_cluster == cluster_size) {
2098 				compr_cluster = false;
2099 				size += blks_to_bytes(inode, 1);
2100 			}
2101 		} else if (map.m_flags & F2FS_MAP_DELALLOC) {
2102 			flags = FIEMAP_EXTENT_UNWRITTEN;
2103 		}
2104 
2105 		start_blk += bytes_to_blks(inode, size);
2106 	}
2107 
2108 prep_next:
2109 	cond_resched();
2110 	if (fatal_signal_pending(current))
2111 		ret = -EINTR;
2112 	else
2113 		goto next;
2114 out:
2115 	if (ret == 1)
2116 		ret = 0;
2117 
2118 	inode_unlock(inode);
2119 	return ret;
2120 }
2121 
2122 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2123 {
2124 	if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
2125 		return inode->i_sb->s_maxbytes;
2126 
2127 	return i_size_read(inode);
2128 }
2129 
2130 static int f2fs_read_single_page(struct inode *inode, struct page *page,
2131 					unsigned nr_pages,
2132 					struct f2fs_map_blocks *map,
2133 					struct bio **bio_ret,
2134 					sector_t *last_block_in_bio,
2135 					bool is_readahead)
2136 {
2137 	struct bio *bio = *bio_ret;
2138 	const unsigned blocksize = blks_to_bytes(inode, 1);
2139 	sector_t block_in_file;
2140 	sector_t last_block;
2141 	sector_t last_block_in_file;
2142 	sector_t block_nr;
2143 	int ret = 0;
2144 
2145 	block_in_file = (sector_t)page_index(page);
2146 	last_block = block_in_file + nr_pages;
2147 	last_block_in_file = bytes_to_blks(inode,
2148 			f2fs_readpage_limit(inode) + blocksize - 1);
2149 	if (last_block > last_block_in_file)
2150 		last_block = last_block_in_file;
2151 
2152 	/* just zeroing out page which is beyond EOF */
2153 	if (block_in_file >= last_block)
2154 		goto zero_out;
2155 	/*
2156 	 * Map blocks using the previous result first.
2157 	 */
2158 	if ((map->m_flags & F2FS_MAP_MAPPED) &&
2159 			block_in_file > map->m_lblk &&
2160 			block_in_file < (map->m_lblk + map->m_len))
2161 		goto got_it;
2162 
2163 	/*
2164 	 * Then do more f2fs_map_blocks() calls until we are
2165 	 * done with this page.
2166 	 */
2167 	map->m_lblk = block_in_file;
2168 	map->m_len = last_block - block_in_file;
2169 
2170 	ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT);
2171 	if (ret)
2172 		goto out;
2173 got_it:
2174 	if ((map->m_flags & F2FS_MAP_MAPPED)) {
2175 		block_nr = map->m_pblk + block_in_file - map->m_lblk;
2176 		SetPageMappedToDisk(page);
2177 
2178 		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2179 						DATA_GENERIC_ENHANCE_READ)) {
2180 			ret = -EFSCORRUPTED;
2181 			f2fs_handle_error(F2FS_I_SB(inode),
2182 						ERROR_INVALID_BLKADDR);
2183 			goto out;
2184 		}
2185 	} else {
2186 zero_out:
2187 		zero_user_segment(page, 0, PAGE_SIZE);
2188 		if (f2fs_need_verity(inode, page->index) &&
2189 		    !fsverity_verify_page(page)) {
2190 			ret = -EIO;
2191 			goto out;
2192 		}
2193 		if (!PageUptodate(page))
2194 			SetPageUptodate(page);
2195 		unlock_page(page);
2196 		goto out;
2197 	}
2198 
2199 	/*
2200 	 * This page will go to BIO.  Do we need to send this
2201 	 * BIO off first?
2202 	 */
2203 	if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2204 				       *last_block_in_bio, block_nr) ||
2205 		    !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2206 submit_and_realloc:
2207 		f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2208 		bio = NULL;
2209 	}
2210 	if (bio == NULL) {
2211 		bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2212 				is_readahead ? REQ_RAHEAD : 0, page->index,
2213 				false);
2214 		if (IS_ERR(bio)) {
2215 			ret = PTR_ERR(bio);
2216 			bio = NULL;
2217 			goto out;
2218 		}
2219 	}
2220 
2221 	/*
2222 	 * If the page is under writeback, we need to wait for
2223 	 * its completion to see the correct decrypted data.
2224 	 */
2225 	f2fs_wait_on_block_writeback(inode, block_nr);
2226 
2227 	if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2228 		goto submit_and_realloc;
2229 
2230 	inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2231 	f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
2232 							F2FS_BLKSIZE);
2233 	*last_block_in_bio = block_nr;
2234 out:
2235 	*bio_ret = bio;
2236 	return ret;
2237 }
2238 
2239 #ifdef CONFIG_F2FS_FS_COMPRESSION
2240 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2241 				unsigned nr_pages, sector_t *last_block_in_bio,
2242 				bool is_readahead, bool for_write)
2243 {
2244 	struct dnode_of_data dn;
2245 	struct inode *inode = cc->inode;
2246 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2247 	struct bio *bio = *bio_ret;
2248 	unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2249 	sector_t last_block_in_file;
2250 	const unsigned blocksize = blks_to_bytes(inode, 1);
2251 	struct decompress_io_ctx *dic = NULL;
2252 	struct extent_info ei = {};
2253 	bool from_dnode = true;
2254 	int i;
2255 	int ret = 0;
2256 
2257 	f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2258 
2259 	last_block_in_file = bytes_to_blks(inode,
2260 			f2fs_readpage_limit(inode) + blocksize - 1);
2261 
2262 	/* get rid of pages beyond EOF */
2263 	for (i = 0; i < cc->cluster_size; i++) {
2264 		struct page *page = cc->rpages[i];
2265 
2266 		if (!page)
2267 			continue;
2268 		if ((sector_t)page->index >= last_block_in_file) {
2269 			zero_user_segment(page, 0, PAGE_SIZE);
2270 			if (!PageUptodate(page))
2271 				SetPageUptodate(page);
2272 		} else if (!PageUptodate(page)) {
2273 			continue;
2274 		}
2275 		unlock_page(page);
2276 		if (for_write)
2277 			put_page(page);
2278 		cc->rpages[i] = NULL;
2279 		cc->nr_rpages--;
2280 	}
2281 
2282 	/* we are done since all pages are beyond EOF */
2283 	if (f2fs_cluster_is_empty(cc))
2284 		goto out;
2285 
2286 	if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
2287 		from_dnode = false;
2288 
2289 	if (!from_dnode)
2290 		goto skip_reading_dnode;
2291 
2292 	set_new_dnode(&dn, inode, NULL, NULL, 0);
2293 	ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2294 	if (ret)
2295 		goto out;
2296 
2297 	if (unlikely(f2fs_cp_error(sbi))) {
2298 		ret = -EIO;
2299 		goto out_put_dnode;
2300 	}
2301 	f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2302 
2303 skip_reading_dnode:
2304 	for (i = 1; i < cc->cluster_size; i++) {
2305 		block_t blkaddr;
2306 
2307 		blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2308 					dn.ofs_in_node + i) :
2309 					ei.blk + i - 1;
2310 
2311 		if (!__is_valid_data_blkaddr(blkaddr))
2312 			break;
2313 
2314 		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2315 			ret = -EFAULT;
2316 			goto out_put_dnode;
2317 		}
2318 		cc->nr_cpages++;
2319 
2320 		if (!from_dnode && i >= ei.c_len)
2321 			break;
2322 	}
2323 
2324 	/* nothing to decompress */
2325 	if (cc->nr_cpages == 0) {
2326 		ret = 0;
2327 		goto out_put_dnode;
2328 	}
2329 
2330 	dic = f2fs_alloc_dic(cc);
2331 	if (IS_ERR(dic)) {
2332 		ret = PTR_ERR(dic);
2333 		goto out_put_dnode;
2334 	}
2335 
2336 	for (i = 0; i < cc->nr_cpages; i++) {
2337 		struct page *page = dic->cpages[i];
2338 		block_t blkaddr;
2339 		struct bio_post_read_ctx *ctx;
2340 
2341 		blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2342 					dn.ofs_in_node + i + 1) :
2343 					ei.blk + i;
2344 
2345 		f2fs_wait_on_block_writeback(inode, blkaddr);
2346 
2347 		if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2348 			if (atomic_dec_and_test(&dic->remaining_pages))
2349 				f2fs_decompress_cluster(dic, true);
2350 			continue;
2351 		}
2352 
2353 		if (bio && (!page_is_mergeable(sbi, bio,
2354 					*last_block_in_bio, blkaddr) ||
2355 		    !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2356 submit_and_realloc:
2357 			f2fs_submit_read_bio(sbi, bio, DATA);
2358 			bio = NULL;
2359 		}
2360 
2361 		if (!bio) {
2362 			bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2363 					is_readahead ? REQ_RAHEAD : 0,
2364 					page->index, for_write);
2365 			if (IS_ERR(bio)) {
2366 				ret = PTR_ERR(bio);
2367 				f2fs_decompress_end_io(dic, ret, true);
2368 				f2fs_put_dnode(&dn);
2369 				*bio_ret = NULL;
2370 				return ret;
2371 			}
2372 		}
2373 
2374 		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2375 			goto submit_and_realloc;
2376 
2377 		ctx = get_post_read_ctx(bio);
2378 		ctx->enabled_steps |= STEP_DECOMPRESS;
2379 		refcount_inc(&dic->refcnt);
2380 
2381 		inc_page_count(sbi, F2FS_RD_DATA);
2382 		f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
2383 		*last_block_in_bio = blkaddr;
2384 	}
2385 
2386 	if (from_dnode)
2387 		f2fs_put_dnode(&dn);
2388 
2389 	*bio_ret = bio;
2390 	return 0;
2391 
2392 out_put_dnode:
2393 	if (from_dnode)
2394 		f2fs_put_dnode(&dn);
2395 out:
2396 	for (i = 0; i < cc->cluster_size; i++) {
2397 		if (cc->rpages[i]) {
2398 			ClearPageUptodate(cc->rpages[i]);
2399 			unlock_page(cc->rpages[i]);
2400 		}
2401 	}
2402 	*bio_ret = bio;
2403 	return ret;
2404 }
2405 #endif
2406 
2407 /*
2408  * This function was originally taken from fs/mpage.c, and customized for f2fs.
2409  * Major change was from block_size == page_size in f2fs by default.
2410  */
2411 static int f2fs_mpage_readpages(struct inode *inode,
2412 		struct readahead_control *rac, struct page *page)
2413 {
2414 	struct bio *bio = NULL;
2415 	sector_t last_block_in_bio = 0;
2416 	struct f2fs_map_blocks map;
2417 #ifdef CONFIG_F2FS_FS_COMPRESSION
2418 	struct compress_ctx cc = {
2419 		.inode = inode,
2420 		.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2421 		.cluster_size = F2FS_I(inode)->i_cluster_size,
2422 		.cluster_idx = NULL_CLUSTER,
2423 		.rpages = NULL,
2424 		.cpages = NULL,
2425 		.nr_rpages = 0,
2426 		.nr_cpages = 0,
2427 	};
2428 	pgoff_t nc_cluster_idx = NULL_CLUSTER;
2429 #endif
2430 	unsigned nr_pages = rac ? readahead_count(rac) : 1;
2431 	unsigned max_nr_pages = nr_pages;
2432 	int ret = 0;
2433 
2434 	map.m_pblk = 0;
2435 	map.m_lblk = 0;
2436 	map.m_len = 0;
2437 	map.m_flags = 0;
2438 	map.m_next_pgofs = NULL;
2439 	map.m_next_extent = NULL;
2440 	map.m_seg_type = NO_CHECK_TYPE;
2441 	map.m_may_create = false;
2442 
2443 	for (; nr_pages; nr_pages--) {
2444 		if (rac) {
2445 			page = readahead_page(rac);
2446 			prefetchw(&page->flags);
2447 		}
2448 
2449 #ifdef CONFIG_F2FS_FS_COMPRESSION
2450 		if (f2fs_compressed_file(inode)) {
2451 			/* there are remained compressed pages, submit them */
2452 			if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
2453 				ret = f2fs_read_multi_pages(&cc, &bio,
2454 							max_nr_pages,
2455 							&last_block_in_bio,
2456 							rac != NULL, false);
2457 				f2fs_destroy_compress_ctx(&cc, false);
2458 				if (ret)
2459 					goto set_error_page;
2460 			}
2461 			if (cc.cluster_idx == NULL_CLUSTER) {
2462 				if (nc_cluster_idx ==
2463 					page->index >> cc.log_cluster_size) {
2464 					goto read_single_page;
2465 				}
2466 
2467 				ret = f2fs_is_compressed_cluster(inode, page->index);
2468 				if (ret < 0)
2469 					goto set_error_page;
2470 				else if (!ret) {
2471 					nc_cluster_idx =
2472 						page->index >> cc.log_cluster_size;
2473 					goto read_single_page;
2474 				}
2475 
2476 				nc_cluster_idx = NULL_CLUSTER;
2477 			}
2478 			ret = f2fs_init_compress_ctx(&cc);
2479 			if (ret)
2480 				goto set_error_page;
2481 
2482 			f2fs_compress_ctx_add_page(&cc, page);
2483 
2484 			goto next_page;
2485 		}
2486 read_single_page:
2487 #endif
2488 
2489 		ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2490 					&bio, &last_block_in_bio, rac);
2491 		if (ret) {
2492 #ifdef CONFIG_F2FS_FS_COMPRESSION
2493 set_error_page:
2494 #endif
2495 			zero_user_segment(page, 0, PAGE_SIZE);
2496 			unlock_page(page);
2497 		}
2498 #ifdef CONFIG_F2FS_FS_COMPRESSION
2499 next_page:
2500 #endif
2501 		if (rac)
2502 			put_page(page);
2503 
2504 #ifdef CONFIG_F2FS_FS_COMPRESSION
2505 		if (f2fs_compressed_file(inode)) {
2506 			/* last page */
2507 			if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2508 				ret = f2fs_read_multi_pages(&cc, &bio,
2509 							max_nr_pages,
2510 							&last_block_in_bio,
2511 							rac != NULL, false);
2512 				f2fs_destroy_compress_ctx(&cc, false);
2513 			}
2514 		}
2515 #endif
2516 	}
2517 	if (bio)
2518 		f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2519 	return ret;
2520 }
2521 
2522 static int f2fs_read_data_folio(struct file *file, struct folio *folio)
2523 {
2524 	struct page *page = &folio->page;
2525 	struct inode *inode = page_file_mapping(page)->host;
2526 	int ret = -EAGAIN;
2527 
2528 	trace_f2fs_readpage(page, DATA);
2529 
2530 	if (!f2fs_is_compress_backend_ready(inode)) {
2531 		unlock_page(page);
2532 		return -EOPNOTSUPP;
2533 	}
2534 
2535 	/* If the file has inline data, try to read it directly */
2536 	if (f2fs_has_inline_data(inode))
2537 		ret = f2fs_read_inline_data(inode, page);
2538 	if (ret == -EAGAIN)
2539 		ret = f2fs_mpage_readpages(inode, NULL, page);
2540 	return ret;
2541 }
2542 
2543 static void f2fs_readahead(struct readahead_control *rac)
2544 {
2545 	struct inode *inode = rac->mapping->host;
2546 
2547 	trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
2548 
2549 	if (!f2fs_is_compress_backend_ready(inode))
2550 		return;
2551 
2552 	/* If the file has inline data, skip readahead */
2553 	if (f2fs_has_inline_data(inode))
2554 		return;
2555 
2556 	f2fs_mpage_readpages(inode, rac, NULL);
2557 }
2558 
2559 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2560 {
2561 	struct inode *inode = fio->page->mapping->host;
2562 	struct page *mpage, *page;
2563 	gfp_t gfp_flags = GFP_NOFS;
2564 
2565 	if (!f2fs_encrypted_file(inode))
2566 		return 0;
2567 
2568 	page = fio->compressed_page ? fio->compressed_page : fio->page;
2569 
2570 	/* wait for GCed page writeback via META_MAPPING */
2571 	f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2572 
2573 	if (fscrypt_inode_uses_inline_crypto(inode))
2574 		return 0;
2575 
2576 retry_encrypt:
2577 	fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2578 					PAGE_SIZE, 0, gfp_flags);
2579 	if (IS_ERR(fio->encrypted_page)) {
2580 		/* flush pending IOs and wait for a while in the ENOMEM case */
2581 		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2582 			f2fs_flush_merged_writes(fio->sbi);
2583 			memalloc_retry_wait(GFP_NOFS);
2584 			gfp_flags |= __GFP_NOFAIL;
2585 			goto retry_encrypt;
2586 		}
2587 		return PTR_ERR(fio->encrypted_page);
2588 	}
2589 
2590 	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2591 	if (mpage) {
2592 		if (PageUptodate(mpage))
2593 			memcpy(page_address(mpage),
2594 				page_address(fio->encrypted_page), PAGE_SIZE);
2595 		f2fs_put_page(mpage, 1);
2596 	}
2597 	return 0;
2598 }
2599 
2600 static inline bool check_inplace_update_policy(struct inode *inode,
2601 				struct f2fs_io_info *fio)
2602 {
2603 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2604 
2605 	if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
2606 	    is_inode_flag_set(inode, FI_OPU_WRITE))
2607 		return false;
2608 	if (IS_F2FS_IPU_FORCE(sbi))
2609 		return true;
2610 	if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
2611 		return true;
2612 	if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
2613 		return true;
2614 	if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
2615 	    utilization(sbi) > SM_I(sbi)->min_ipu_util)
2616 		return true;
2617 
2618 	/*
2619 	 * IPU for rewrite async pages
2620 	 */
2621 	if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
2622 	    !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
2623 		return true;
2624 
2625 	/* this is only set during fdatasync */
2626 	if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU))
2627 		return true;
2628 
2629 	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2630 			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2631 		return true;
2632 
2633 	return false;
2634 }
2635 
2636 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2637 {
2638 	/* swap file is migrating in aligned write mode */
2639 	if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2640 		return false;
2641 
2642 	if (f2fs_is_pinned_file(inode))
2643 		return true;
2644 
2645 	/* if this is cold file, we should overwrite to avoid fragmentation */
2646 	if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
2647 		return true;
2648 
2649 	return check_inplace_update_policy(inode, fio);
2650 }
2651 
2652 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2653 {
2654 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2655 
2656 	/* The below cases were checked when setting it. */
2657 	if (f2fs_is_pinned_file(inode))
2658 		return false;
2659 	if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2660 		return true;
2661 	if (f2fs_lfs_mode(sbi))
2662 		return true;
2663 	if (S_ISDIR(inode->i_mode))
2664 		return true;
2665 	if (IS_NOQUOTA(inode))
2666 		return true;
2667 	if (f2fs_is_atomic_file(inode))
2668 		return true;
2669 
2670 	/* swap file is migrating in aligned write mode */
2671 	if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2672 		return true;
2673 
2674 	if (is_inode_flag_set(inode, FI_OPU_WRITE))
2675 		return true;
2676 
2677 	if (fio) {
2678 		if (page_private_gcing(fio->page))
2679 			return true;
2680 		if (page_private_dummy(fio->page))
2681 			return true;
2682 		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2683 			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2684 			return true;
2685 	}
2686 	return false;
2687 }
2688 
2689 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2690 {
2691 	struct inode *inode = fio->page->mapping->host;
2692 
2693 	if (f2fs_should_update_outplace(inode, fio))
2694 		return false;
2695 
2696 	return f2fs_should_update_inplace(inode, fio);
2697 }
2698 
2699 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2700 {
2701 	struct page *page = fio->page;
2702 	struct inode *inode = page->mapping->host;
2703 	struct dnode_of_data dn;
2704 	struct node_info ni;
2705 	bool ipu_force = false;
2706 	int err = 0;
2707 
2708 	/* Use COW inode to make dnode_of_data for atomic write */
2709 	if (f2fs_is_atomic_file(inode))
2710 		set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
2711 	else
2712 		set_new_dnode(&dn, inode, NULL, NULL, 0);
2713 
2714 	if (need_inplace_update(fio) &&
2715 	    f2fs_lookup_read_extent_cache_block(inode, page->index,
2716 						&fio->old_blkaddr)) {
2717 		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2718 						DATA_GENERIC_ENHANCE)) {
2719 			f2fs_handle_error(fio->sbi,
2720 						ERROR_INVALID_BLKADDR);
2721 			return -EFSCORRUPTED;
2722 		}
2723 
2724 		ipu_force = true;
2725 		fio->need_lock = LOCK_DONE;
2726 		goto got_it;
2727 	}
2728 
2729 	/* Deadlock due to between page->lock and f2fs_lock_op */
2730 	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2731 		return -EAGAIN;
2732 
2733 	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
2734 	if (err)
2735 		goto out;
2736 
2737 	fio->old_blkaddr = dn.data_blkaddr;
2738 
2739 	/* This page is already truncated */
2740 	if (fio->old_blkaddr == NULL_ADDR) {
2741 		ClearPageUptodate(page);
2742 		clear_page_private_gcing(page);
2743 		goto out_writepage;
2744 	}
2745 got_it:
2746 	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2747 		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2748 						DATA_GENERIC_ENHANCE)) {
2749 		err = -EFSCORRUPTED;
2750 		f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
2751 		goto out_writepage;
2752 	}
2753 
2754 	/*
2755 	 * If current allocation needs SSR,
2756 	 * it had better in-place writes for updated data.
2757 	 */
2758 	if (ipu_force ||
2759 		(__is_valid_data_blkaddr(fio->old_blkaddr) &&
2760 					need_inplace_update(fio))) {
2761 		err = f2fs_encrypt_one_page(fio);
2762 		if (err)
2763 			goto out_writepage;
2764 
2765 		set_page_writeback(page);
2766 		f2fs_put_dnode(&dn);
2767 		if (fio->need_lock == LOCK_REQ)
2768 			f2fs_unlock_op(fio->sbi);
2769 		err = f2fs_inplace_write_data(fio);
2770 		if (err) {
2771 			if (fscrypt_inode_uses_fs_layer_crypto(inode))
2772 				fscrypt_finalize_bounce_page(&fio->encrypted_page);
2773 			if (PageWriteback(page))
2774 				end_page_writeback(page);
2775 		} else {
2776 			set_inode_flag(inode, FI_UPDATE_WRITE);
2777 		}
2778 		trace_f2fs_do_write_data_page(fio->page, IPU);
2779 		return err;
2780 	}
2781 
2782 	if (fio->need_lock == LOCK_RETRY) {
2783 		if (!f2fs_trylock_op(fio->sbi)) {
2784 			err = -EAGAIN;
2785 			goto out_writepage;
2786 		}
2787 		fio->need_lock = LOCK_REQ;
2788 	}
2789 
2790 	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
2791 	if (err)
2792 		goto out_writepage;
2793 
2794 	fio->version = ni.version;
2795 
2796 	err = f2fs_encrypt_one_page(fio);
2797 	if (err)
2798 		goto out_writepage;
2799 
2800 	set_page_writeback(page);
2801 
2802 	if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2803 		f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2804 
2805 	/* LFS mode write path */
2806 	f2fs_outplace_write_data(&dn, fio);
2807 	trace_f2fs_do_write_data_page(page, OPU);
2808 	set_inode_flag(inode, FI_APPEND_WRITE);
2809 	if (page->index == 0)
2810 		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2811 out_writepage:
2812 	f2fs_put_dnode(&dn);
2813 out:
2814 	if (fio->need_lock == LOCK_REQ)
2815 		f2fs_unlock_op(fio->sbi);
2816 	return err;
2817 }
2818 
2819 int f2fs_write_single_data_page(struct page *page, int *submitted,
2820 				struct bio **bio,
2821 				sector_t *last_block,
2822 				struct writeback_control *wbc,
2823 				enum iostat_type io_type,
2824 				int compr_blocks,
2825 				bool allow_balance)
2826 {
2827 	struct inode *inode = page->mapping->host;
2828 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2829 	loff_t i_size = i_size_read(inode);
2830 	const pgoff_t end_index = ((unsigned long long)i_size)
2831 							>> PAGE_SHIFT;
2832 	loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
2833 	unsigned offset = 0;
2834 	bool need_balance_fs = false;
2835 	bool quota_inode = IS_NOQUOTA(inode);
2836 	int err = 0;
2837 	struct f2fs_io_info fio = {
2838 		.sbi = sbi,
2839 		.ino = inode->i_ino,
2840 		.type = DATA,
2841 		.op = REQ_OP_WRITE,
2842 		.op_flags = wbc_to_write_flags(wbc),
2843 		.old_blkaddr = NULL_ADDR,
2844 		.page = page,
2845 		.encrypted_page = NULL,
2846 		.submitted = 0,
2847 		.compr_blocks = compr_blocks,
2848 		.need_lock = LOCK_RETRY,
2849 		.post_read = f2fs_post_read_required(inode) ? 1 : 0,
2850 		.io_type = io_type,
2851 		.io_wbc = wbc,
2852 		.bio = bio,
2853 		.last_block = last_block,
2854 	};
2855 
2856 	trace_f2fs_writepage(page, DATA);
2857 
2858 	/* we should bypass data pages to proceed the kworker jobs */
2859 	if (unlikely(f2fs_cp_error(sbi))) {
2860 		mapping_set_error(page->mapping, -EIO);
2861 		/*
2862 		 * don't drop any dirty dentry pages for keeping lastest
2863 		 * directory structure.
2864 		 */
2865 		if (S_ISDIR(inode->i_mode) &&
2866 				!is_sbi_flag_set(sbi, SBI_IS_CLOSE))
2867 			goto redirty_out;
2868 
2869 		/* keep data pages in remount-ro mode */
2870 		if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
2871 			goto redirty_out;
2872 		goto out;
2873 	}
2874 
2875 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2876 		goto redirty_out;
2877 
2878 	if (page->index < end_index ||
2879 			f2fs_verity_in_progress(inode) ||
2880 			compr_blocks)
2881 		goto write;
2882 
2883 	/*
2884 	 * If the offset is out-of-range of file size,
2885 	 * this page does not have to be written to disk.
2886 	 */
2887 	offset = i_size & (PAGE_SIZE - 1);
2888 	if ((page->index >= end_index + 1) || !offset)
2889 		goto out;
2890 
2891 	zero_user_segment(page, offset, PAGE_SIZE);
2892 write:
2893 	if (f2fs_is_drop_cache(inode))
2894 		goto out;
2895 
2896 	/* Dentry/quota blocks are controlled by checkpoint */
2897 	if (S_ISDIR(inode->i_mode) || quota_inode) {
2898 		/*
2899 		 * We need to wait for node_write to avoid block allocation during
2900 		 * checkpoint. This can only happen to quota writes which can cause
2901 		 * the below discard race condition.
2902 		 */
2903 		if (quota_inode)
2904 			f2fs_down_read(&sbi->node_write);
2905 
2906 		fio.need_lock = LOCK_DONE;
2907 		err = f2fs_do_write_data_page(&fio);
2908 
2909 		if (quota_inode)
2910 			f2fs_up_read(&sbi->node_write);
2911 
2912 		goto done;
2913 	}
2914 
2915 	if (!wbc->for_reclaim)
2916 		need_balance_fs = true;
2917 	else if (has_not_enough_free_secs(sbi, 0, 0))
2918 		goto redirty_out;
2919 	else
2920 		set_inode_flag(inode, FI_HOT_DATA);
2921 
2922 	err = -EAGAIN;
2923 	if (f2fs_has_inline_data(inode)) {
2924 		err = f2fs_write_inline_data(inode, page);
2925 		if (!err)
2926 			goto out;
2927 	}
2928 
2929 	if (err == -EAGAIN) {
2930 		err = f2fs_do_write_data_page(&fio);
2931 		if (err == -EAGAIN) {
2932 			fio.need_lock = LOCK_REQ;
2933 			err = f2fs_do_write_data_page(&fio);
2934 		}
2935 	}
2936 
2937 	if (err) {
2938 		file_set_keep_isize(inode);
2939 	} else {
2940 		spin_lock(&F2FS_I(inode)->i_size_lock);
2941 		if (F2FS_I(inode)->last_disk_size < psize)
2942 			F2FS_I(inode)->last_disk_size = psize;
2943 		spin_unlock(&F2FS_I(inode)->i_size_lock);
2944 	}
2945 
2946 done:
2947 	if (err && err != -ENOENT)
2948 		goto redirty_out;
2949 
2950 out:
2951 	inode_dec_dirty_pages(inode);
2952 	if (err) {
2953 		ClearPageUptodate(page);
2954 		clear_page_private_gcing(page);
2955 	}
2956 
2957 	if (wbc->for_reclaim) {
2958 		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2959 		clear_inode_flag(inode, FI_HOT_DATA);
2960 		f2fs_remove_dirty_inode(inode);
2961 		submitted = NULL;
2962 	}
2963 	unlock_page(page);
2964 	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2965 			!F2FS_I(inode)->wb_task && allow_balance)
2966 		f2fs_balance_fs(sbi, need_balance_fs);
2967 
2968 	if (unlikely(f2fs_cp_error(sbi))) {
2969 		f2fs_submit_merged_write(sbi, DATA);
2970 		if (bio && *bio)
2971 			f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2972 		submitted = NULL;
2973 	}
2974 
2975 	if (submitted)
2976 		*submitted = fio.submitted;
2977 
2978 	return 0;
2979 
2980 redirty_out:
2981 	redirty_page_for_writepage(wbc, page);
2982 	/*
2983 	 * pageout() in MM translates EAGAIN, so calls handle_write_error()
2984 	 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2985 	 * file_write_and_wait_range() will see EIO error, which is critical
2986 	 * to return value of fsync() followed by atomic_write failure to user.
2987 	 */
2988 	if (!err || wbc->for_reclaim)
2989 		return AOP_WRITEPAGE_ACTIVATE;
2990 	unlock_page(page);
2991 	return err;
2992 }
2993 
2994 static int f2fs_write_data_page(struct page *page,
2995 					struct writeback_control *wbc)
2996 {
2997 #ifdef CONFIG_F2FS_FS_COMPRESSION
2998 	struct inode *inode = page->mapping->host;
2999 
3000 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
3001 		goto out;
3002 
3003 	if (f2fs_compressed_file(inode)) {
3004 		if (f2fs_is_compressed_cluster(inode, page->index)) {
3005 			redirty_page_for_writepage(wbc, page);
3006 			return AOP_WRITEPAGE_ACTIVATE;
3007 		}
3008 	}
3009 out:
3010 #endif
3011 
3012 	return f2fs_write_single_data_page(page, NULL, NULL, NULL,
3013 						wbc, FS_DATA_IO, 0, true);
3014 }
3015 
3016 /*
3017  * This function was copied from write_cache_pages from mm/page-writeback.c.
3018  * The major change is making write step of cold data page separately from
3019  * warm/hot data page.
3020  */
3021 static int f2fs_write_cache_pages(struct address_space *mapping,
3022 					struct writeback_control *wbc,
3023 					enum iostat_type io_type)
3024 {
3025 	int ret = 0;
3026 	int done = 0, retry = 0;
3027 	struct page *pages[F2FS_ONSTACK_PAGES];
3028 	struct folio_batch fbatch;
3029 	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
3030 	struct bio *bio = NULL;
3031 	sector_t last_block;
3032 #ifdef CONFIG_F2FS_FS_COMPRESSION
3033 	struct inode *inode = mapping->host;
3034 	struct compress_ctx cc = {
3035 		.inode = inode,
3036 		.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
3037 		.cluster_size = F2FS_I(inode)->i_cluster_size,
3038 		.cluster_idx = NULL_CLUSTER,
3039 		.rpages = NULL,
3040 		.nr_rpages = 0,
3041 		.cpages = NULL,
3042 		.valid_nr_cpages = 0,
3043 		.rbuf = NULL,
3044 		.cbuf = NULL,
3045 		.rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
3046 		.private = NULL,
3047 	};
3048 #endif
3049 	int nr_folios, p, idx;
3050 	int nr_pages;
3051 	pgoff_t index;
3052 	pgoff_t end;		/* Inclusive */
3053 	pgoff_t done_index;
3054 	int range_whole = 0;
3055 	xa_mark_t tag;
3056 	int nwritten = 0;
3057 	int submitted = 0;
3058 	int i;
3059 
3060 	folio_batch_init(&fbatch);
3061 
3062 	if (get_dirty_pages(mapping->host) <=
3063 				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
3064 		set_inode_flag(mapping->host, FI_HOT_DATA);
3065 	else
3066 		clear_inode_flag(mapping->host, FI_HOT_DATA);
3067 
3068 	if (wbc->range_cyclic) {
3069 		index = mapping->writeback_index; /* prev offset */
3070 		end = -1;
3071 	} else {
3072 		index = wbc->range_start >> PAGE_SHIFT;
3073 		end = wbc->range_end >> PAGE_SHIFT;
3074 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
3075 			range_whole = 1;
3076 	}
3077 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3078 		tag = PAGECACHE_TAG_TOWRITE;
3079 	else
3080 		tag = PAGECACHE_TAG_DIRTY;
3081 retry:
3082 	retry = 0;
3083 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3084 		tag_pages_for_writeback(mapping, index, end);
3085 	done_index = index;
3086 	while (!done && !retry && (index <= end)) {
3087 		nr_pages = 0;
3088 again:
3089 		nr_folios = filemap_get_folios_tag(mapping, &index, end,
3090 				tag, &fbatch);
3091 		if (nr_folios == 0) {
3092 			if (nr_pages)
3093 				goto write;
3094 			break;
3095 		}
3096 
3097 		for (i = 0; i < nr_folios; i++) {
3098 			struct folio *folio = fbatch.folios[i];
3099 
3100 			idx = 0;
3101 			p = folio_nr_pages(folio);
3102 add_more:
3103 			pages[nr_pages] = folio_page(folio, idx);
3104 			folio_get(folio);
3105 			if (++nr_pages == F2FS_ONSTACK_PAGES) {
3106 				index = folio->index + idx + 1;
3107 				folio_batch_release(&fbatch);
3108 				goto write;
3109 			}
3110 			if (++idx < p)
3111 				goto add_more;
3112 		}
3113 		folio_batch_release(&fbatch);
3114 		goto again;
3115 write:
3116 		for (i = 0; i < nr_pages; i++) {
3117 			struct page *page = pages[i];
3118 			struct folio *folio = page_folio(page);
3119 			bool need_readd;
3120 readd:
3121 			need_readd = false;
3122 #ifdef CONFIG_F2FS_FS_COMPRESSION
3123 			if (f2fs_compressed_file(inode)) {
3124 				void *fsdata = NULL;
3125 				struct page *pagep;
3126 				int ret2;
3127 
3128 				ret = f2fs_init_compress_ctx(&cc);
3129 				if (ret) {
3130 					done = 1;
3131 					break;
3132 				}
3133 
3134 				if (!f2fs_cluster_can_merge_page(&cc,
3135 								folio->index)) {
3136 					ret = f2fs_write_multi_pages(&cc,
3137 						&submitted, wbc, io_type);
3138 					if (!ret)
3139 						need_readd = true;
3140 					goto result;
3141 				}
3142 
3143 				if (unlikely(f2fs_cp_error(sbi)))
3144 					goto lock_folio;
3145 
3146 				if (!f2fs_cluster_is_empty(&cc))
3147 					goto lock_folio;
3148 
3149 				if (f2fs_all_cluster_page_ready(&cc,
3150 					pages, i, nr_pages, true))
3151 					goto lock_folio;
3152 
3153 				ret2 = f2fs_prepare_compress_overwrite(
3154 							inode, &pagep,
3155 							folio->index, &fsdata);
3156 				if (ret2 < 0) {
3157 					ret = ret2;
3158 					done = 1;
3159 					break;
3160 				} else if (ret2 &&
3161 					(!f2fs_compress_write_end(inode,
3162 						fsdata, folio->index, 1) ||
3163 					 !f2fs_all_cluster_page_ready(&cc,
3164 						pages, i, nr_pages,
3165 						false))) {
3166 					retry = 1;
3167 					break;
3168 				}
3169 			}
3170 #endif
3171 			/* give a priority to WB_SYNC threads */
3172 			if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3173 					wbc->sync_mode == WB_SYNC_NONE) {
3174 				done = 1;
3175 				break;
3176 			}
3177 #ifdef CONFIG_F2FS_FS_COMPRESSION
3178 lock_folio:
3179 #endif
3180 			done_index = folio->index;
3181 retry_write:
3182 			folio_lock(folio);
3183 
3184 			if (unlikely(folio->mapping != mapping)) {
3185 continue_unlock:
3186 				folio_unlock(folio);
3187 				continue;
3188 			}
3189 
3190 			if (!folio_test_dirty(folio)) {
3191 				/* someone wrote it for us */
3192 				goto continue_unlock;
3193 			}
3194 
3195 			if (folio_test_writeback(folio)) {
3196 				if (wbc->sync_mode == WB_SYNC_NONE)
3197 					goto continue_unlock;
3198 				f2fs_wait_on_page_writeback(&folio->page, DATA, true, true);
3199 			}
3200 
3201 			if (!folio_clear_dirty_for_io(folio))
3202 				goto continue_unlock;
3203 
3204 #ifdef CONFIG_F2FS_FS_COMPRESSION
3205 			if (f2fs_compressed_file(inode)) {
3206 				folio_get(folio);
3207 				f2fs_compress_ctx_add_page(&cc, &folio->page);
3208 				continue;
3209 			}
3210 #endif
3211 			ret = f2fs_write_single_data_page(&folio->page,
3212 					&submitted, &bio, &last_block,
3213 					wbc, io_type, 0, true);
3214 			if (ret == AOP_WRITEPAGE_ACTIVATE)
3215 				folio_unlock(folio);
3216 #ifdef CONFIG_F2FS_FS_COMPRESSION
3217 result:
3218 #endif
3219 			nwritten += submitted;
3220 			wbc->nr_to_write -= submitted;
3221 
3222 			if (unlikely(ret)) {
3223 				/*
3224 				 * keep nr_to_write, since vfs uses this to
3225 				 * get # of written pages.
3226 				 */
3227 				if (ret == AOP_WRITEPAGE_ACTIVATE) {
3228 					ret = 0;
3229 					goto next;
3230 				} else if (ret == -EAGAIN) {
3231 					ret = 0;
3232 					if (wbc->sync_mode == WB_SYNC_ALL) {
3233 						f2fs_io_schedule_timeout(
3234 							DEFAULT_IO_TIMEOUT);
3235 						goto retry_write;
3236 					}
3237 					goto next;
3238 				}
3239 				done_index = folio->index +
3240 					folio_nr_pages(folio);
3241 				done = 1;
3242 				break;
3243 			}
3244 
3245 			if (wbc->nr_to_write <= 0 &&
3246 					wbc->sync_mode == WB_SYNC_NONE) {
3247 				done = 1;
3248 				break;
3249 			}
3250 next:
3251 			if (need_readd)
3252 				goto readd;
3253 		}
3254 		release_pages(pages, nr_pages);
3255 		cond_resched();
3256 	}
3257 #ifdef CONFIG_F2FS_FS_COMPRESSION
3258 	/* flush remained pages in compress cluster */
3259 	if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3260 		ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3261 		nwritten += submitted;
3262 		wbc->nr_to_write -= submitted;
3263 		if (ret) {
3264 			done = 1;
3265 			retry = 0;
3266 		}
3267 	}
3268 	if (f2fs_compressed_file(inode))
3269 		f2fs_destroy_compress_ctx(&cc, false);
3270 #endif
3271 	if (retry) {
3272 		index = 0;
3273 		end = -1;
3274 		goto retry;
3275 	}
3276 	if (wbc->range_cyclic && !done)
3277 		done_index = 0;
3278 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3279 		mapping->writeback_index = done_index;
3280 
3281 	if (nwritten)
3282 		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3283 								NULL, 0, DATA);
3284 	/* submit cached bio of IPU write */
3285 	if (bio)
3286 		f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3287 
3288 	return ret;
3289 }
3290 
3291 static inline bool __should_serialize_io(struct inode *inode,
3292 					struct writeback_control *wbc)
3293 {
3294 	/* to avoid deadlock in path of data flush */
3295 	if (F2FS_I(inode)->wb_task)
3296 		return false;
3297 
3298 	if (!S_ISREG(inode->i_mode))
3299 		return false;
3300 	if (IS_NOQUOTA(inode))
3301 		return false;
3302 
3303 	if (f2fs_need_compress_data(inode))
3304 		return true;
3305 	if (wbc->sync_mode != WB_SYNC_ALL)
3306 		return true;
3307 	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3308 		return true;
3309 	return false;
3310 }
3311 
3312 static int __f2fs_write_data_pages(struct address_space *mapping,
3313 						struct writeback_control *wbc,
3314 						enum iostat_type io_type)
3315 {
3316 	struct inode *inode = mapping->host;
3317 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3318 	struct blk_plug plug;
3319 	int ret;
3320 	bool locked = false;
3321 
3322 	/* deal with chardevs and other special file */
3323 	if (!mapping->a_ops->writepage)
3324 		return 0;
3325 
3326 	/* skip writing if there is no dirty page in this inode */
3327 	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3328 		return 0;
3329 
3330 	/* during POR, we don't need to trigger writepage at all. */
3331 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3332 		goto skip_write;
3333 
3334 	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3335 			wbc->sync_mode == WB_SYNC_NONE &&
3336 			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3337 			f2fs_available_free_memory(sbi, DIRTY_DENTS))
3338 		goto skip_write;
3339 
3340 	/* skip writing in file defragment preparing stage */
3341 	if (is_inode_flag_set(inode, FI_SKIP_WRITES))
3342 		goto skip_write;
3343 
3344 	trace_f2fs_writepages(mapping->host, wbc, DATA);
3345 
3346 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3347 	if (wbc->sync_mode == WB_SYNC_ALL)
3348 		atomic_inc(&sbi->wb_sync_req[DATA]);
3349 	else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3350 		/* to avoid potential deadlock */
3351 		if (current->plug)
3352 			blk_finish_plug(current->plug);
3353 		goto skip_write;
3354 	}
3355 
3356 	if (__should_serialize_io(inode, wbc)) {
3357 		mutex_lock(&sbi->writepages);
3358 		locked = true;
3359 	}
3360 
3361 	blk_start_plug(&plug);
3362 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3363 	blk_finish_plug(&plug);
3364 
3365 	if (locked)
3366 		mutex_unlock(&sbi->writepages);
3367 
3368 	if (wbc->sync_mode == WB_SYNC_ALL)
3369 		atomic_dec(&sbi->wb_sync_req[DATA]);
3370 	/*
3371 	 * if some pages were truncated, we cannot guarantee its mapping->host
3372 	 * to detect pending bios.
3373 	 */
3374 
3375 	f2fs_remove_dirty_inode(inode);
3376 	return ret;
3377 
3378 skip_write:
3379 	wbc->pages_skipped += get_dirty_pages(inode);
3380 	trace_f2fs_writepages(mapping->host, wbc, DATA);
3381 	return 0;
3382 }
3383 
3384 static int f2fs_write_data_pages(struct address_space *mapping,
3385 			    struct writeback_control *wbc)
3386 {
3387 	struct inode *inode = mapping->host;
3388 
3389 	return __f2fs_write_data_pages(mapping, wbc,
3390 			F2FS_I(inode)->cp_task == current ?
3391 			FS_CP_DATA_IO : FS_DATA_IO);
3392 }
3393 
3394 void f2fs_write_failed(struct inode *inode, loff_t to)
3395 {
3396 	loff_t i_size = i_size_read(inode);
3397 
3398 	if (IS_NOQUOTA(inode))
3399 		return;
3400 
3401 	/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3402 	if (to > i_size && !f2fs_verity_in_progress(inode)) {
3403 		f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3404 		filemap_invalidate_lock(inode->i_mapping);
3405 
3406 		truncate_pagecache(inode, i_size);
3407 		f2fs_truncate_blocks(inode, i_size, true);
3408 
3409 		filemap_invalidate_unlock(inode->i_mapping);
3410 		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3411 	}
3412 }
3413 
3414 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3415 			struct page *page, loff_t pos, unsigned len,
3416 			block_t *blk_addr, bool *node_changed)
3417 {
3418 	struct inode *inode = page->mapping->host;
3419 	pgoff_t index = page->index;
3420 	struct dnode_of_data dn;
3421 	struct page *ipage;
3422 	bool locked = false;
3423 	int flag = F2FS_GET_BLOCK_PRE_AIO;
3424 	int err = 0;
3425 
3426 	/*
3427 	 * If a whole page is being written and we already preallocated all the
3428 	 * blocks, then there is no need to get a block address now.
3429 	 */
3430 	if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
3431 		return 0;
3432 
3433 	/* f2fs_lock_op avoids race between write CP and convert_inline_page */
3434 	if (f2fs_has_inline_data(inode)) {
3435 		if (pos + len > MAX_INLINE_DATA(inode))
3436 			flag = F2FS_GET_BLOCK_DEFAULT;
3437 		f2fs_map_lock(sbi, flag);
3438 		locked = true;
3439 	} else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
3440 		f2fs_map_lock(sbi, flag);
3441 		locked = true;
3442 	}
3443 
3444 restart:
3445 	/* check inline_data */
3446 	ipage = f2fs_get_node_page(sbi, inode->i_ino);
3447 	if (IS_ERR(ipage)) {
3448 		err = PTR_ERR(ipage);
3449 		goto unlock_out;
3450 	}
3451 
3452 	set_new_dnode(&dn, inode, ipage, ipage, 0);
3453 
3454 	if (f2fs_has_inline_data(inode)) {
3455 		if (pos + len <= MAX_INLINE_DATA(inode)) {
3456 			f2fs_do_read_inline_data(page, ipage);
3457 			set_inode_flag(inode, FI_DATA_EXIST);
3458 			if (inode->i_nlink)
3459 				set_page_private_inline(ipage);
3460 			goto out;
3461 		}
3462 		err = f2fs_convert_inline_page(&dn, page);
3463 		if (err || dn.data_blkaddr != NULL_ADDR)
3464 			goto out;
3465 	}
3466 
3467 	if (!f2fs_lookup_read_extent_cache_block(inode, index,
3468 						 &dn.data_blkaddr)) {
3469 		if (locked) {
3470 			err = f2fs_reserve_block(&dn, index);
3471 			goto out;
3472 		}
3473 
3474 		/* hole case */
3475 		err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3476 		if (!err && dn.data_blkaddr != NULL_ADDR)
3477 			goto out;
3478 		f2fs_put_dnode(&dn);
3479 		f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3480 		WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3481 		locked = true;
3482 		goto restart;
3483 	}
3484 out:
3485 	if (!err) {
3486 		/* convert_inline_page can make node_changed */
3487 		*blk_addr = dn.data_blkaddr;
3488 		*node_changed = dn.node_changed;
3489 	}
3490 	f2fs_put_dnode(&dn);
3491 unlock_out:
3492 	if (locked)
3493 		f2fs_map_unlock(sbi, flag);
3494 	return err;
3495 }
3496 
3497 static int __find_data_block(struct inode *inode, pgoff_t index,
3498 				block_t *blk_addr)
3499 {
3500 	struct dnode_of_data dn;
3501 	struct page *ipage;
3502 	int err = 0;
3503 
3504 	ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
3505 	if (IS_ERR(ipage))
3506 		return PTR_ERR(ipage);
3507 
3508 	set_new_dnode(&dn, inode, ipage, ipage, 0);
3509 
3510 	if (!f2fs_lookup_read_extent_cache_block(inode, index,
3511 						 &dn.data_blkaddr)) {
3512 		/* hole case */
3513 		err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3514 		if (err) {
3515 			dn.data_blkaddr = NULL_ADDR;
3516 			err = 0;
3517 		}
3518 	}
3519 	*blk_addr = dn.data_blkaddr;
3520 	f2fs_put_dnode(&dn);
3521 	return err;
3522 }
3523 
3524 static int __reserve_data_block(struct inode *inode, pgoff_t index,
3525 				block_t *blk_addr, bool *node_changed)
3526 {
3527 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3528 	struct dnode_of_data dn;
3529 	struct page *ipage;
3530 	int err = 0;
3531 
3532 	f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3533 
3534 	ipage = f2fs_get_node_page(sbi, inode->i_ino);
3535 	if (IS_ERR(ipage)) {
3536 		err = PTR_ERR(ipage);
3537 		goto unlock_out;
3538 	}
3539 	set_new_dnode(&dn, inode, ipage, ipage, 0);
3540 
3541 	if (!f2fs_lookup_read_extent_cache_block(dn.inode, index,
3542 						&dn.data_blkaddr))
3543 		err = f2fs_reserve_block(&dn, index);
3544 
3545 	*blk_addr = dn.data_blkaddr;
3546 	*node_changed = dn.node_changed;
3547 	f2fs_put_dnode(&dn);
3548 
3549 unlock_out:
3550 	f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3551 	return err;
3552 }
3553 
3554 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3555 			struct page *page, loff_t pos, unsigned int len,
3556 			block_t *blk_addr, bool *node_changed, bool *use_cow)
3557 {
3558 	struct inode *inode = page->mapping->host;
3559 	struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3560 	pgoff_t index = page->index;
3561 	int err = 0;
3562 	block_t ori_blk_addr = NULL_ADDR;
3563 
3564 	/* If pos is beyond the end of file, reserve a new block in COW inode */
3565 	if ((pos & PAGE_MASK) >= i_size_read(inode))
3566 		goto reserve_block;
3567 
3568 	/* Look for the block in COW inode first */
3569 	err = __find_data_block(cow_inode, index, blk_addr);
3570 	if (err) {
3571 		return err;
3572 	} else if (*blk_addr != NULL_ADDR) {
3573 		*use_cow = true;
3574 		return 0;
3575 	}
3576 
3577 	if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
3578 		goto reserve_block;
3579 
3580 	/* Look for the block in the original inode */
3581 	err = __find_data_block(inode, index, &ori_blk_addr);
3582 	if (err)
3583 		return err;
3584 
3585 reserve_block:
3586 	/* Finally, we should reserve a new block in COW inode for the update */
3587 	err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
3588 	if (err)
3589 		return err;
3590 	inc_atomic_write_cnt(inode);
3591 
3592 	if (ori_blk_addr != NULL_ADDR)
3593 		*blk_addr = ori_blk_addr;
3594 	return 0;
3595 }
3596 
3597 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3598 		loff_t pos, unsigned len, struct page **pagep, void **fsdata)
3599 {
3600 	struct inode *inode = mapping->host;
3601 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3602 	struct page *page = NULL;
3603 	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3604 	bool need_balance = false;
3605 	bool use_cow = false;
3606 	block_t blkaddr = NULL_ADDR;
3607 	int err = 0;
3608 
3609 	trace_f2fs_write_begin(inode, pos, len);
3610 
3611 	if (!f2fs_is_checkpoint_ready(sbi)) {
3612 		err = -ENOSPC;
3613 		goto fail;
3614 	}
3615 
3616 	/*
3617 	 * We should check this at this moment to avoid deadlock on inode page
3618 	 * and #0 page. The locking rule for inline_data conversion should be:
3619 	 * lock_page(page #0) -> lock_page(inode_page)
3620 	 */
3621 	if (index != 0) {
3622 		err = f2fs_convert_inline_inode(inode);
3623 		if (err)
3624 			goto fail;
3625 	}
3626 
3627 #ifdef CONFIG_F2FS_FS_COMPRESSION
3628 	if (f2fs_compressed_file(inode)) {
3629 		int ret;
3630 
3631 		*fsdata = NULL;
3632 
3633 		if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3634 			goto repeat;
3635 
3636 		ret = f2fs_prepare_compress_overwrite(inode, pagep,
3637 							index, fsdata);
3638 		if (ret < 0) {
3639 			err = ret;
3640 			goto fail;
3641 		} else if (ret) {
3642 			return 0;
3643 		}
3644 	}
3645 #endif
3646 
3647 repeat:
3648 	/*
3649 	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3650 	 * wait_for_stable_page. Will wait that below with our IO control.
3651 	 */
3652 	page = f2fs_pagecache_get_page(mapping, index,
3653 				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3654 	if (!page) {
3655 		err = -ENOMEM;
3656 		goto fail;
3657 	}
3658 
3659 	/* TODO: cluster can be compressed due to race with .writepage */
3660 
3661 	*pagep = page;
3662 
3663 	if (f2fs_is_atomic_file(inode))
3664 		err = prepare_atomic_write_begin(sbi, page, pos, len,
3665 					&blkaddr, &need_balance, &use_cow);
3666 	else
3667 		err = prepare_write_begin(sbi, page, pos, len,
3668 					&blkaddr, &need_balance);
3669 	if (err)
3670 		goto fail;
3671 
3672 	if (need_balance && !IS_NOQUOTA(inode) &&
3673 			has_not_enough_free_secs(sbi, 0, 0)) {
3674 		unlock_page(page);
3675 		f2fs_balance_fs(sbi, true);
3676 		lock_page(page);
3677 		if (page->mapping != mapping) {
3678 			/* The page got truncated from under us */
3679 			f2fs_put_page(page, 1);
3680 			goto repeat;
3681 		}
3682 	}
3683 
3684 	f2fs_wait_on_page_writeback(page, DATA, false, true);
3685 
3686 	if (len == PAGE_SIZE || PageUptodate(page))
3687 		return 0;
3688 
3689 	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3690 	    !f2fs_verity_in_progress(inode)) {
3691 		zero_user_segment(page, len, PAGE_SIZE);
3692 		return 0;
3693 	}
3694 
3695 	if (blkaddr == NEW_ADDR) {
3696 		zero_user_segment(page, 0, PAGE_SIZE);
3697 		SetPageUptodate(page);
3698 	} else {
3699 		if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3700 				DATA_GENERIC_ENHANCE_READ)) {
3701 			err = -EFSCORRUPTED;
3702 			f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
3703 			goto fail;
3704 		}
3705 		err = f2fs_submit_page_read(use_cow ?
3706 				F2FS_I(inode)->cow_inode : inode, page,
3707 				blkaddr, 0, true);
3708 		if (err)
3709 			goto fail;
3710 
3711 		lock_page(page);
3712 		if (unlikely(page->mapping != mapping)) {
3713 			f2fs_put_page(page, 1);
3714 			goto repeat;
3715 		}
3716 		if (unlikely(!PageUptodate(page))) {
3717 			err = -EIO;
3718 			goto fail;
3719 		}
3720 	}
3721 	return 0;
3722 
3723 fail:
3724 	f2fs_put_page(page, 1);
3725 	f2fs_write_failed(inode, pos + len);
3726 	return err;
3727 }
3728 
3729 static int f2fs_write_end(struct file *file,
3730 			struct address_space *mapping,
3731 			loff_t pos, unsigned len, unsigned copied,
3732 			struct page *page, void *fsdata)
3733 {
3734 	struct inode *inode = page->mapping->host;
3735 
3736 	trace_f2fs_write_end(inode, pos, len, copied);
3737 
3738 	/*
3739 	 * This should be come from len == PAGE_SIZE, and we expect copied
3740 	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3741 	 * let generic_perform_write() try to copy data again through copied=0.
3742 	 */
3743 	if (!PageUptodate(page)) {
3744 		if (unlikely(copied != len))
3745 			copied = 0;
3746 		else
3747 			SetPageUptodate(page);
3748 	}
3749 
3750 #ifdef CONFIG_F2FS_FS_COMPRESSION
3751 	/* overwrite compressed file */
3752 	if (f2fs_compressed_file(inode) && fsdata) {
3753 		f2fs_compress_write_end(inode, fsdata, page->index, copied);
3754 		f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3755 
3756 		if (pos + copied > i_size_read(inode) &&
3757 				!f2fs_verity_in_progress(inode))
3758 			f2fs_i_size_write(inode, pos + copied);
3759 		return copied;
3760 	}
3761 #endif
3762 
3763 	if (!copied)
3764 		goto unlock_out;
3765 
3766 	set_page_dirty(page);
3767 
3768 	if (pos + copied > i_size_read(inode) &&
3769 	    !f2fs_verity_in_progress(inode)) {
3770 		f2fs_i_size_write(inode, pos + copied);
3771 		if (f2fs_is_atomic_file(inode))
3772 			f2fs_i_size_write(F2FS_I(inode)->cow_inode,
3773 					pos + copied);
3774 	}
3775 unlock_out:
3776 	f2fs_put_page(page, 1);
3777 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3778 	return copied;
3779 }
3780 
3781 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
3782 {
3783 	struct inode *inode = folio->mapping->host;
3784 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3785 
3786 	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3787 				(offset || length != folio_size(folio)))
3788 		return;
3789 
3790 	if (folio_test_dirty(folio)) {
3791 		if (inode->i_ino == F2FS_META_INO(sbi)) {
3792 			dec_page_count(sbi, F2FS_DIRTY_META);
3793 		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3794 			dec_page_count(sbi, F2FS_DIRTY_NODES);
3795 		} else {
3796 			inode_dec_dirty_pages(inode);
3797 			f2fs_remove_dirty_inode(inode);
3798 		}
3799 	}
3800 	clear_page_private_all(&folio->page);
3801 }
3802 
3803 bool f2fs_release_folio(struct folio *folio, gfp_t wait)
3804 {
3805 	/* If this is dirty folio, keep private data */
3806 	if (folio_test_dirty(folio))
3807 		return false;
3808 
3809 	clear_page_private_all(&folio->page);
3810 	return true;
3811 }
3812 
3813 static bool f2fs_dirty_data_folio(struct address_space *mapping,
3814 		struct folio *folio)
3815 {
3816 	struct inode *inode = mapping->host;
3817 
3818 	trace_f2fs_set_page_dirty(&folio->page, DATA);
3819 
3820 	if (!folio_test_uptodate(folio))
3821 		folio_mark_uptodate(folio);
3822 	BUG_ON(folio_test_swapcache(folio));
3823 
3824 	if (filemap_dirty_folio(mapping, folio)) {
3825 		f2fs_update_dirty_folio(inode, folio);
3826 		return true;
3827 	}
3828 	return false;
3829 }
3830 
3831 
3832 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3833 {
3834 #ifdef CONFIG_F2FS_FS_COMPRESSION
3835 	struct dnode_of_data dn;
3836 	sector_t start_idx, blknr = 0;
3837 	int ret;
3838 
3839 	start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3840 
3841 	set_new_dnode(&dn, inode, NULL, NULL, 0);
3842 	ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3843 	if (ret)
3844 		return 0;
3845 
3846 	if (dn.data_blkaddr != COMPRESS_ADDR) {
3847 		dn.ofs_in_node += block - start_idx;
3848 		blknr = f2fs_data_blkaddr(&dn);
3849 		if (!__is_valid_data_blkaddr(blknr))
3850 			blknr = 0;
3851 	}
3852 
3853 	f2fs_put_dnode(&dn);
3854 	return blknr;
3855 #else
3856 	return 0;
3857 #endif
3858 }
3859 
3860 
3861 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3862 {
3863 	struct inode *inode = mapping->host;
3864 	sector_t blknr = 0;
3865 
3866 	if (f2fs_has_inline_data(inode))
3867 		goto out;
3868 
3869 	/* make sure allocating whole blocks */
3870 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3871 		filemap_write_and_wait(mapping);
3872 
3873 	/* Block number less than F2FS MAX BLOCKS */
3874 	if (unlikely(block >= max_file_blocks(inode)))
3875 		goto out;
3876 
3877 	if (f2fs_compressed_file(inode)) {
3878 		blknr = f2fs_bmap_compress(inode, block);
3879 	} else {
3880 		struct f2fs_map_blocks map;
3881 
3882 		memset(&map, 0, sizeof(map));
3883 		map.m_lblk = block;
3884 		map.m_len = 1;
3885 		map.m_next_pgofs = NULL;
3886 		map.m_seg_type = NO_CHECK_TYPE;
3887 
3888 		if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP))
3889 			blknr = map.m_pblk;
3890 	}
3891 out:
3892 	trace_f2fs_bmap(inode, block, blknr);
3893 	return blknr;
3894 }
3895 
3896 #ifdef CONFIG_SWAP
3897 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3898 							unsigned int blkcnt)
3899 {
3900 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3901 	unsigned int blkofs;
3902 	unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3903 	unsigned int secidx = start_blk / blk_per_sec;
3904 	unsigned int end_sec = secidx + blkcnt / blk_per_sec;
3905 	int ret = 0;
3906 
3907 	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3908 	filemap_invalidate_lock(inode->i_mapping);
3909 
3910 	set_inode_flag(inode, FI_ALIGNED_WRITE);
3911 	set_inode_flag(inode, FI_OPU_WRITE);
3912 
3913 	for (; secidx < end_sec; secidx++) {
3914 		f2fs_down_write(&sbi->pin_sem);
3915 
3916 		f2fs_lock_op(sbi);
3917 		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3918 		f2fs_unlock_op(sbi);
3919 
3920 		set_inode_flag(inode, FI_SKIP_WRITES);
3921 
3922 		for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
3923 			struct page *page;
3924 			unsigned int blkidx = secidx * blk_per_sec + blkofs;
3925 
3926 			page = f2fs_get_lock_data_page(inode, blkidx, true);
3927 			if (IS_ERR(page)) {
3928 				f2fs_up_write(&sbi->pin_sem);
3929 				ret = PTR_ERR(page);
3930 				goto done;
3931 			}
3932 
3933 			set_page_dirty(page);
3934 			f2fs_put_page(page, 1);
3935 		}
3936 
3937 		clear_inode_flag(inode, FI_SKIP_WRITES);
3938 
3939 		ret = filemap_fdatawrite(inode->i_mapping);
3940 
3941 		f2fs_up_write(&sbi->pin_sem);
3942 
3943 		if (ret)
3944 			break;
3945 	}
3946 
3947 done:
3948 	clear_inode_flag(inode, FI_SKIP_WRITES);
3949 	clear_inode_flag(inode, FI_OPU_WRITE);
3950 	clear_inode_flag(inode, FI_ALIGNED_WRITE);
3951 
3952 	filemap_invalidate_unlock(inode->i_mapping);
3953 	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3954 
3955 	return ret;
3956 }
3957 
3958 static int check_swap_activate(struct swap_info_struct *sis,
3959 				struct file *swap_file, sector_t *span)
3960 {
3961 	struct address_space *mapping = swap_file->f_mapping;
3962 	struct inode *inode = mapping->host;
3963 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3964 	sector_t cur_lblock;
3965 	sector_t last_lblock;
3966 	sector_t pblock;
3967 	sector_t lowest_pblock = -1;
3968 	sector_t highest_pblock = 0;
3969 	int nr_extents = 0;
3970 	unsigned long nr_pblocks;
3971 	unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
3972 	unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
3973 	unsigned int not_aligned = 0;
3974 	int ret = 0;
3975 
3976 	/*
3977 	 * Map all the blocks into the extent list.  This code doesn't try
3978 	 * to be very smart.
3979 	 */
3980 	cur_lblock = 0;
3981 	last_lblock = bytes_to_blks(inode, i_size_read(inode));
3982 
3983 	while (cur_lblock < last_lblock && cur_lblock < sis->max) {
3984 		struct f2fs_map_blocks map;
3985 retry:
3986 		cond_resched();
3987 
3988 		memset(&map, 0, sizeof(map));
3989 		map.m_lblk = cur_lblock;
3990 		map.m_len = last_lblock - cur_lblock;
3991 		map.m_next_pgofs = NULL;
3992 		map.m_next_extent = NULL;
3993 		map.m_seg_type = NO_CHECK_TYPE;
3994 		map.m_may_create = false;
3995 
3996 		ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
3997 		if (ret)
3998 			goto out;
3999 
4000 		/* hole */
4001 		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
4002 			f2fs_err(sbi, "Swapfile has holes");
4003 			ret = -EINVAL;
4004 			goto out;
4005 		}
4006 
4007 		pblock = map.m_pblk;
4008 		nr_pblocks = map.m_len;
4009 
4010 		if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
4011 				nr_pblocks & sec_blks_mask) {
4012 			not_aligned++;
4013 
4014 			nr_pblocks = roundup(nr_pblocks, blks_per_sec);
4015 			if (cur_lblock + nr_pblocks > sis->max)
4016 				nr_pblocks -= blks_per_sec;
4017 
4018 			if (!nr_pblocks) {
4019 				/* this extent is last one */
4020 				nr_pblocks = map.m_len;
4021 				f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
4022 				goto next;
4023 			}
4024 
4025 			ret = f2fs_migrate_blocks(inode, cur_lblock,
4026 							nr_pblocks);
4027 			if (ret)
4028 				goto out;
4029 			goto retry;
4030 		}
4031 next:
4032 		if (cur_lblock + nr_pblocks >= sis->max)
4033 			nr_pblocks = sis->max - cur_lblock;
4034 
4035 		if (cur_lblock) {	/* exclude the header page */
4036 			if (pblock < lowest_pblock)
4037 				lowest_pblock = pblock;
4038 			if (pblock + nr_pblocks - 1 > highest_pblock)
4039 				highest_pblock = pblock + nr_pblocks - 1;
4040 		}
4041 
4042 		/*
4043 		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
4044 		 */
4045 		ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
4046 		if (ret < 0)
4047 			goto out;
4048 		nr_extents += ret;
4049 		cur_lblock += nr_pblocks;
4050 	}
4051 	ret = nr_extents;
4052 	*span = 1 + highest_pblock - lowest_pblock;
4053 	if (cur_lblock == 0)
4054 		cur_lblock = 1;	/* force Empty message */
4055 	sis->max = cur_lblock;
4056 	sis->pages = cur_lblock - 1;
4057 	sis->highest_bit = cur_lblock - 1;
4058 out:
4059 	if (not_aligned)
4060 		f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
4061 			  not_aligned, blks_per_sec * F2FS_BLKSIZE);
4062 	return ret;
4063 }
4064 
4065 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4066 				sector_t *span)
4067 {
4068 	struct inode *inode = file_inode(file);
4069 	int ret;
4070 
4071 	if (!S_ISREG(inode->i_mode))
4072 		return -EINVAL;
4073 
4074 	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
4075 		return -EROFS;
4076 
4077 	if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
4078 		f2fs_err(F2FS_I_SB(inode),
4079 			"Swapfile not supported in LFS mode");
4080 		return -EINVAL;
4081 	}
4082 
4083 	ret = f2fs_convert_inline_inode(inode);
4084 	if (ret)
4085 		return ret;
4086 
4087 	if (!f2fs_disable_compressed_file(inode))
4088 		return -EINVAL;
4089 
4090 	f2fs_precache_extents(inode);
4091 
4092 	ret = check_swap_activate(sis, file, span);
4093 	if (ret < 0)
4094 		return ret;
4095 
4096 	stat_inc_swapfile_inode(inode);
4097 	set_inode_flag(inode, FI_PIN_FILE);
4098 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
4099 	return ret;
4100 }
4101 
4102 static void f2fs_swap_deactivate(struct file *file)
4103 {
4104 	struct inode *inode = file_inode(file);
4105 
4106 	stat_dec_swapfile_inode(inode);
4107 	clear_inode_flag(inode, FI_PIN_FILE);
4108 }
4109 #else
4110 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4111 				sector_t *span)
4112 {
4113 	return -EOPNOTSUPP;
4114 }
4115 
4116 static void f2fs_swap_deactivate(struct file *file)
4117 {
4118 }
4119 #endif
4120 
4121 const struct address_space_operations f2fs_dblock_aops = {
4122 	.read_folio	= f2fs_read_data_folio,
4123 	.readahead	= f2fs_readahead,
4124 	.writepage	= f2fs_write_data_page,
4125 	.writepages	= f2fs_write_data_pages,
4126 	.write_begin	= f2fs_write_begin,
4127 	.write_end	= f2fs_write_end,
4128 	.dirty_folio	= f2fs_dirty_data_folio,
4129 	.migrate_folio	= filemap_migrate_folio,
4130 	.invalidate_folio = f2fs_invalidate_folio,
4131 	.release_folio	= f2fs_release_folio,
4132 	.bmap		= f2fs_bmap,
4133 	.swap_activate  = f2fs_swap_activate,
4134 	.swap_deactivate = f2fs_swap_deactivate,
4135 };
4136 
4137 void f2fs_clear_page_cache_dirty_tag(struct page *page)
4138 {
4139 	struct address_space *mapping = page_mapping(page);
4140 	unsigned long flags;
4141 
4142 	xa_lock_irqsave(&mapping->i_pages, flags);
4143 	__xa_clear_mark(&mapping->i_pages, page_index(page),
4144 						PAGECACHE_TAG_DIRTY);
4145 	xa_unlock_irqrestore(&mapping->i_pages, flags);
4146 }
4147 
4148 int __init f2fs_init_post_read_processing(void)
4149 {
4150 	bio_post_read_ctx_cache =
4151 		kmem_cache_create("f2fs_bio_post_read_ctx",
4152 				  sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4153 	if (!bio_post_read_ctx_cache)
4154 		goto fail;
4155 	bio_post_read_ctx_pool =
4156 		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4157 					 bio_post_read_ctx_cache);
4158 	if (!bio_post_read_ctx_pool)
4159 		goto fail_free_cache;
4160 	return 0;
4161 
4162 fail_free_cache:
4163 	kmem_cache_destroy(bio_post_read_ctx_cache);
4164 fail:
4165 	return -ENOMEM;
4166 }
4167 
4168 void f2fs_destroy_post_read_processing(void)
4169 {
4170 	mempool_destroy(bio_post_read_ctx_pool);
4171 	kmem_cache_destroy(bio_post_read_ctx_cache);
4172 }
4173 
4174 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4175 {
4176 	if (!f2fs_sb_has_encrypt(sbi) &&
4177 		!f2fs_sb_has_verity(sbi) &&
4178 		!f2fs_sb_has_compression(sbi))
4179 		return 0;
4180 
4181 	sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4182 						 WQ_UNBOUND | WQ_HIGHPRI,
4183 						 num_online_cpus());
4184 	return sbi->post_read_wq ? 0 : -ENOMEM;
4185 }
4186 
4187 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4188 {
4189 	if (sbi->post_read_wq)
4190 		destroy_workqueue(sbi->post_read_wq);
4191 }
4192 
4193 int __init f2fs_init_bio_entry_cache(void)
4194 {
4195 	bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4196 			sizeof(struct bio_entry));
4197 	return bio_entry_slab ? 0 : -ENOMEM;
4198 }
4199 
4200 void f2fs_destroy_bio_entry_cache(void)
4201 {
4202 	kmem_cache_destroy(bio_entry_slab);
4203 }
4204 
4205 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4206 			    unsigned int flags, struct iomap *iomap,
4207 			    struct iomap *srcmap)
4208 {
4209 	struct f2fs_map_blocks map = {};
4210 	pgoff_t next_pgofs = 0;
4211 	int err;
4212 
4213 	map.m_lblk = bytes_to_blks(inode, offset);
4214 	map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
4215 	map.m_next_pgofs = &next_pgofs;
4216 	map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
4217 	if (flags & IOMAP_WRITE)
4218 		map.m_may_create = true;
4219 
4220 	err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
4221 	if (err)
4222 		return err;
4223 
4224 	iomap->offset = blks_to_bytes(inode, map.m_lblk);
4225 
4226 	/*
4227 	 * When inline encryption is enabled, sometimes I/O to an encrypted file
4228 	 * has to be broken up to guarantee DUN contiguity.  Handle this by
4229 	 * limiting the length of the mapping returned.
4230 	 */
4231 	map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
4232 
4233 	/*
4234 	 * We should never see delalloc or compressed extents here based on
4235 	 * prior flushing and checks.
4236 	 */
4237 	if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
4238 		return -EINVAL;
4239 	if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
4240 		return -EINVAL;
4241 
4242 	if (map.m_pblk != NULL_ADDR) {
4243 		iomap->length = blks_to_bytes(inode, map.m_len);
4244 		iomap->type = IOMAP_MAPPED;
4245 		iomap->flags |= IOMAP_F_MERGED;
4246 		iomap->bdev = map.m_bdev;
4247 		iomap->addr = blks_to_bytes(inode, map.m_pblk);
4248 	} else {
4249 		if (flags & IOMAP_WRITE)
4250 			return -ENOTBLK;
4251 		iomap->length = blks_to_bytes(inode, next_pgofs) -
4252 				iomap->offset;
4253 		iomap->type = IOMAP_HOLE;
4254 		iomap->addr = IOMAP_NULL_ADDR;
4255 	}
4256 
4257 	if (map.m_flags & F2FS_MAP_NEW)
4258 		iomap->flags |= IOMAP_F_NEW;
4259 	if ((inode->i_state & I_DIRTY_DATASYNC) ||
4260 	    offset + length > i_size_read(inode))
4261 		iomap->flags |= IOMAP_F_DIRTY;
4262 
4263 	return 0;
4264 }
4265 
4266 const struct iomap_ops f2fs_iomap_ops = {
4267 	.iomap_begin	= f2fs_iomap_begin,
4268 };
4269