xref: /openbmc/linux/fs/f2fs/data.c (revision cfb271d485d0ec31eb92b51f4fbe54bf6542e8e6)
1 /*
2  * fs/f2fs/data.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/buffer_head.h>
14 #include <linux/mpage.h>
15 #include <linux/aio.h>
16 #include <linux/writeback.h>
17 #include <linux/backing-dev.h>
18 #include <linux/blkdev.h>
19 #include <linux/bio.h>
20 #include <linux/prefetch.h>
21 
22 #include "f2fs.h"
23 #include "node.h"
24 #include "segment.h"
25 #include <trace/events/f2fs.h>
26 
27 /*
28  * Low-level block read/write IO operations.
29  */
30 static struct bio *__bio_alloc(struct block_device *bdev, int npages)
31 {
32 	struct bio *bio;
33 
34 	/* No failure on bio allocation */
35 	bio = bio_alloc(GFP_NOIO, npages);
36 	bio->bi_bdev = bdev;
37 	bio->bi_private = NULL;
38 	return bio;
39 }
40 
41 static void f2fs_read_end_io(struct bio *bio, int err)
42 {
43 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
44 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
45 
46 	do {
47 		struct page *page = bvec->bv_page;
48 
49 		if (--bvec >= bio->bi_io_vec)
50 			prefetchw(&bvec->bv_page->flags);
51 
52 		if (uptodate) {
53 			SetPageUptodate(page);
54 		} else {
55 			ClearPageUptodate(page);
56 			SetPageError(page);
57 		}
58 		unlock_page(page);
59 	} while (bvec >= bio->bi_io_vec);
60 
61 	bio_put(bio);
62 }
63 
64 static void f2fs_write_end_io(struct bio *bio, int err)
65 {
66 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
67 	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
68 	struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb);
69 
70 	do {
71 		struct page *page = bvec->bv_page;
72 
73 		if (--bvec >= bio->bi_io_vec)
74 			prefetchw(&bvec->bv_page->flags);
75 
76 		if (!uptodate) {
77 			SetPageError(page);
78 			set_bit(AS_EIO, &page->mapping->flags);
79 			set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
80 			sbi->sb->s_flags |= MS_RDONLY;
81 		}
82 		end_page_writeback(page);
83 		dec_page_count(sbi, F2FS_WRITEBACK);
84 	} while (bvec >= bio->bi_io_vec);
85 
86 	if (bio->bi_private)
87 		complete(bio->bi_private);
88 
89 	if (!get_pages(sbi, F2FS_WRITEBACK) &&
90 			!list_empty(&sbi->cp_wait.task_list))
91 		wake_up(&sbi->cp_wait);
92 
93 	bio_put(bio);
94 }
95 
96 static void __submit_merged_bio(struct f2fs_sb_info *sbi,
97 				struct f2fs_bio_info *io,
98 				enum page_type type, bool sync, int rw)
99 {
100 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
101 
102 	if (!io->bio)
103 		return;
104 
105 	if (btype == META)
106 		rw |= REQ_META;
107 
108 	if (is_read_io(rw)) {
109 		if (sync)
110 			rw |= READ_SYNC;
111 		submit_bio(rw, io->bio);
112 		trace_f2fs_submit_read_bio(sbi->sb, rw, type, io->bio);
113 		io->bio = NULL;
114 		return;
115 	}
116 
117 	if (sync)
118 		rw |= WRITE_SYNC;
119 	if (type >= META_FLUSH)
120 		rw |= WRITE_FLUSH_FUA;
121 
122 	/*
123 	 * META_FLUSH is only from the checkpoint procedure, and we should wait
124 	 * this metadata bio for FS consistency.
125 	 */
126 	if (type == META_FLUSH) {
127 		DECLARE_COMPLETION_ONSTACK(wait);
128 		io->bio->bi_private = &wait;
129 		submit_bio(rw, io->bio);
130 		wait_for_completion(&wait);
131 	} else {
132 		submit_bio(rw, io->bio);
133 	}
134 	trace_f2fs_submit_write_bio(sbi->sb, rw, btype, io->bio);
135 	io->bio = NULL;
136 }
137 
138 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
139 				enum page_type type, bool sync, int rw)
140 {
141 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
142 	struct f2fs_bio_info *io;
143 
144 	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
145 
146 	mutex_lock(&io->io_mutex);
147 	__submit_merged_bio(sbi, io, type, sync, rw);
148 	mutex_unlock(&io->io_mutex);
149 }
150 
151 /*
152  * Fill the locked page with data located in the block address.
153  * Return unlocked page.
154  */
155 int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
156 					block_t blk_addr, int rw)
157 {
158 	struct block_device *bdev = sbi->sb->s_bdev;
159 	struct bio *bio;
160 
161 	trace_f2fs_submit_page_bio(page, blk_addr, rw);
162 
163 	/* Allocate a new bio */
164 	bio = __bio_alloc(bdev, 1);
165 
166 	/* Initialize the bio */
167 	bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
168 	bio->bi_end_io = is_read_io(rw) ? f2fs_read_end_io : f2fs_write_end_io;
169 
170 	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
171 		bio_put(bio);
172 		f2fs_put_page(page, 1);
173 		return -EFAULT;
174 	}
175 
176 	submit_bio(rw, bio);
177 	return 0;
178 }
179 
180 void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
181 			block_t blk_addr, enum page_type type, int rw)
182 {
183 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
184 	struct block_device *bdev = sbi->sb->s_bdev;
185 	struct f2fs_bio_info *io;
186 	int bio_blocks;
187 
188 	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
189 
190 	verify_block_addr(sbi, blk_addr);
191 
192 	mutex_lock(&io->io_mutex);
193 
194 	if (!is_read_io(rw))
195 		inc_page_count(sbi, F2FS_WRITEBACK);
196 
197 	if (io->bio && io->last_block_in_bio != blk_addr - 1)
198 		__submit_merged_bio(sbi, io, type, true, rw);
199 alloc_new:
200 	if (io->bio == NULL) {
201 		bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
202 		io->bio = __bio_alloc(bdev, bio_blocks);
203 		io->bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
204 		io->bio->bi_end_io = is_read_io(rw) ? f2fs_read_end_io :
205 							f2fs_write_end_io;
206 		/*
207 		 * The end_io will be assigned at the sumbission phase.
208 		 * Until then, let bio_add_page() merge consecutive IOs as much
209 		 * as possible.
210 		 */
211 	}
212 
213 	if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
214 							PAGE_CACHE_SIZE) {
215 		__submit_merged_bio(sbi, io, type, true, rw);
216 		goto alloc_new;
217 	}
218 
219 	io->last_block_in_bio = blk_addr;
220 
221 	mutex_unlock(&io->io_mutex);
222 	trace_f2fs_submit_page_mbio(page, rw, type, blk_addr);
223 }
224 
225 /*
226  * Lock ordering for the change of data block address:
227  * ->data_page
228  *  ->node_page
229  *    update block addresses in the node page
230  */
231 static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
232 {
233 	struct f2fs_node *rn;
234 	__le32 *addr_array;
235 	struct page *node_page = dn->node_page;
236 	unsigned int ofs_in_node = dn->ofs_in_node;
237 
238 	f2fs_wait_on_page_writeback(node_page, NODE, false);
239 
240 	rn = F2FS_NODE(node_page);
241 
242 	/* Get physical address of data block */
243 	addr_array = blkaddr_in_node(rn);
244 	addr_array[ofs_in_node] = cpu_to_le32(new_addr);
245 	set_page_dirty(node_page);
246 }
247 
248 int reserve_new_block(struct dnode_of_data *dn)
249 {
250 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
251 
252 	if (is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))
253 		return -EPERM;
254 	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
255 		return -ENOSPC;
256 
257 	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
258 
259 	__set_data_blkaddr(dn, NEW_ADDR);
260 	dn->data_blkaddr = NEW_ADDR;
261 	sync_inode_page(dn);
262 	return 0;
263 }
264 
265 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
266 {
267 	bool need_put = dn->inode_page ? false : true;
268 	int err;
269 
270 	err = get_dnode_of_data(dn, index, ALLOC_NODE);
271 	if (err)
272 		return err;
273 	if (dn->data_blkaddr == NULL_ADDR)
274 		err = reserve_new_block(dn);
275 
276 	if (need_put)
277 		f2fs_put_dnode(dn);
278 	return err;
279 }
280 
281 static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
282 					struct buffer_head *bh_result)
283 {
284 	struct f2fs_inode_info *fi = F2FS_I(inode);
285 	pgoff_t start_fofs, end_fofs;
286 	block_t start_blkaddr;
287 
288 	if (is_inode_flag_set(fi, FI_NO_EXTENT))
289 		return 0;
290 
291 	read_lock(&fi->ext.ext_lock);
292 	if (fi->ext.len == 0) {
293 		read_unlock(&fi->ext.ext_lock);
294 		return 0;
295 	}
296 
297 	stat_inc_total_hit(inode->i_sb);
298 
299 	start_fofs = fi->ext.fofs;
300 	end_fofs = fi->ext.fofs + fi->ext.len - 1;
301 	start_blkaddr = fi->ext.blk_addr;
302 
303 	if (pgofs >= start_fofs && pgofs <= end_fofs) {
304 		unsigned int blkbits = inode->i_sb->s_blocksize_bits;
305 		size_t count;
306 
307 		clear_buffer_new(bh_result);
308 		map_bh(bh_result, inode->i_sb,
309 				start_blkaddr + pgofs - start_fofs);
310 		count = end_fofs - pgofs + 1;
311 		if (count < (UINT_MAX >> blkbits))
312 			bh_result->b_size = (count << blkbits);
313 		else
314 			bh_result->b_size = UINT_MAX;
315 
316 		stat_inc_read_hit(inode->i_sb);
317 		read_unlock(&fi->ext.ext_lock);
318 		return 1;
319 	}
320 	read_unlock(&fi->ext.ext_lock);
321 	return 0;
322 }
323 
324 void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
325 {
326 	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
327 	pgoff_t fofs, start_fofs, end_fofs;
328 	block_t start_blkaddr, end_blkaddr;
329 	int need_update = true;
330 
331 	f2fs_bug_on(blk_addr == NEW_ADDR);
332 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
333 							dn->ofs_in_node;
334 
335 	/* Update the page address in the parent node */
336 	__set_data_blkaddr(dn, blk_addr);
337 
338 	if (is_inode_flag_set(fi, FI_NO_EXTENT))
339 		return;
340 
341 	write_lock(&fi->ext.ext_lock);
342 
343 	start_fofs = fi->ext.fofs;
344 	end_fofs = fi->ext.fofs + fi->ext.len - 1;
345 	start_blkaddr = fi->ext.blk_addr;
346 	end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
347 
348 	/* Drop and initialize the matched extent */
349 	if (fi->ext.len == 1 && fofs == start_fofs)
350 		fi->ext.len = 0;
351 
352 	/* Initial extent */
353 	if (fi->ext.len == 0) {
354 		if (blk_addr != NULL_ADDR) {
355 			fi->ext.fofs = fofs;
356 			fi->ext.blk_addr = blk_addr;
357 			fi->ext.len = 1;
358 		}
359 		goto end_update;
360 	}
361 
362 	/* Front merge */
363 	if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
364 		fi->ext.fofs--;
365 		fi->ext.blk_addr--;
366 		fi->ext.len++;
367 		goto end_update;
368 	}
369 
370 	/* Back merge */
371 	if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
372 		fi->ext.len++;
373 		goto end_update;
374 	}
375 
376 	/* Split the existing extent */
377 	if (fi->ext.len > 1 &&
378 		fofs >= start_fofs && fofs <= end_fofs) {
379 		if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
380 			fi->ext.len = fofs - start_fofs;
381 		} else {
382 			fi->ext.fofs = fofs + 1;
383 			fi->ext.blk_addr = start_blkaddr +
384 					fofs - start_fofs + 1;
385 			fi->ext.len -= fofs - start_fofs + 1;
386 		}
387 	} else {
388 		need_update = false;
389 	}
390 
391 	/* Finally, if the extent is very fragmented, let's drop the cache. */
392 	if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
393 		fi->ext.len = 0;
394 		set_inode_flag(fi, FI_NO_EXTENT);
395 		need_update = true;
396 	}
397 end_update:
398 	write_unlock(&fi->ext.ext_lock);
399 	if (need_update)
400 		sync_inode_page(dn);
401 	return;
402 }
403 
404 struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
405 {
406 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
407 	struct address_space *mapping = inode->i_mapping;
408 	struct dnode_of_data dn;
409 	struct page *page;
410 	int err;
411 
412 	page = find_get_page(mapping, index);
413 	if (page && PageUptodate(page))
414 		return page;
415 	f2fs_put_page(page, 0);
416 
417 	set_new_dnode(&dn, inode, NULL, NULL, 0);
418 	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
419 	if (err)
420 		return ERR_PTR(err);
421 	f2fs_put_dnode(&dn);
422 
423 	if (dn.data_blkaddr == NULL_ADDR)
424 		return ERR_PTR(-ENOENT);
425 
426 	/* By fallocate(), there is no cached page, but with NEW_ADDR */
427 	if (dn.data_blkaddr == NEW_ADDR)
428 		return ERR_PTR(-EINVAL);
429 
430 	page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
431 	if (!page)
432 		return ERR_PTR(-ENOMEM);
433 
434 	if (PageUptodate(page)) {
435 		unlock_page(page);
436 		return page;
437 	}
438 
439 	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
440 					sync ? READ_SYNC : READA);
441 	if (err)
442 		return ERR_PTR(err);
443 
444 	if (sync) {
445 		wait_on_page_locked(page);
446 		if (!PageUptodate(page)) {
447 			f2fs_put_page(page, 0);
448 			return ERR_PTR(-EIO);
449 		}
450 	}
451 	return page;
452 }
453 
454 /*
455  * If it tries to access a hole, return an error.
456  * Because, the callers, functions in dir.c and GC, should be able to know
457  * whether this page exists or not.
458  */
459 struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
460 {
461 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
462 	struct address_space *mapping = inode->i_mapping;
463 	struct dnode_of_data dn;
464 	struct page *page;
465 	int err;
466 
467 repeat:
468 	page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
469 	if (!page)
470 		return ERR_PTR(-ENOMEM);
471 
472 	set_new_dnode(&dn, inode, NULL, NULL, 0);
473 	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
474 	if (err) {
475 		f2fs_put_page(page, 1);
476 		return ERR_PTR(err);
477 	}
478 	f2fs_put_dnode(&dn);
479 
480 	if (dn.data_blkaddr == NULL_ADDR) {
481 		f2fs_put_page(page, 1);
482 		return ERR_PTR(-ENOENT);
483 	}
484 
485 	if (PageUptodate(page))
486 		return page;
487 
488 	/*
489 	 * A new dentry page is allocated but not able to be written, since its
490 	 * new inode page couldn't be allocated due to -ENOSPC.
491 	 * In such the case, its blkaddr can be remained as NEW_ADDR.
492 	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
493 	 */
494 	if (dn.data_blkaddr == NEW_ADDR) {
495 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
496 		SetPageUptodate(page);
497 		return page;
498 	}
499 
500 	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
501 	if (err)
502 		return ERR_PTR(err);
503 
504 	lock_page(page);
505 	if (!PageUptodate(page)) {
506 		f2fs_put_page(page, 1);
507 		return ERR_PTR(-EIO);
508 	}
509 	if (page->mapping != mapping) {
510 		f2fs_put_page(page, 1);
511 		goto repeat;
512 	}
513 	return page;
514 }
515 
516 /*
517  * Caller ensures that this data page is never allocated.
518  * A new zero-filled data page is allocated in the page cache.
519  *
520  * Also, caller should grab and release a mutex by calling mutex_lock_op() and
521  * mutex_unlock_op().
522  * Note that, npage is set only by make_empty_dir.
523  */
524 struct page *get_new_data_page(struct inode *inode,
525 		struct page *npage, pgoff_t index, bool new_i_size)
526 {
527 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
528 	struct address_space *mapping = inode->i_mapping;
529 	struct page *page;
530 	struct dnode_of_data dn;
531 	int err;
532 
533 	set_new_dnode(&dn, inode, npage, npage, 0);
534 	err = f2fs_reserve_block(&dn, index);
535 	if (err)
536 		return ERR_PTR(err);
537 
538 repeat:
539 	page = grab_cache_page(mapping, index);
540 	if (!page)
541 		return ERR_PTR(-ENOMEM);
542 
543 	if (PageUptodate(page))
544 		return page;
545 
546 	if (dn.data_blkaddr == NEW_ADDR) {
547 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
548 		SetPageUptodate(page);
549 	} else {
550 		err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
551 								READ_SYNC);
552 		if (err)
553 			return ERR_PTR(err);
554 		lock_page(page);
555 		if (!PageUptodate(page)) {
556 			f2fs_put_page(page, 1);
557 			return ERR_PTR(-EIO);
558 		}
559 		if (page->mapping != mapping) {
560 			f2fs_put_page(page, 1);
561 			goto repeat;
562 		}
563 	}
564 
565 	if (new_i_size &&
566 		i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
567 		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
568 		/* Only the directory inode sets new_i_size */
569 		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
570 		mark_inode_dirty_sync(inode);
571 	}
572 	return page;
573 }
574 
575 /*
576  * This function should be used by the data read flow only where it
577  * does not check the "create" flag that indicates block allocation.
578  * The reason for this special functionality is to exploit VFS readahead
579  * mechanism.
580  */
581 static int get_data_block_ro(struct inode *inode, sector_t iblock,
582 			struct buffer_head *bh_result, int create)
583 {
584 	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
585 	unsigned maxblocks = bh_result->b_size >> blkbits;
586 	struct dnode_of_data dn;
587 	pgoff_t pgofs;
588 	int err;
589 
590 	/* Get the page offset from the block offset(iblock) */
591 	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
592 
593 	if (check_extent_cache(inode, pgofs, bh_result)) {
594 		trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
595 		return 0;
596 	}
597 
598 	/* When reading holes, we need its node page */
599 	set_new_dnode(&dn, inode, NULL, NULL, 0);
600 	err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
601 	if (err) {
602 		trace_f2fs_get_data_block(inode, iblock, bh_result, err);
603 		return (err == -ENOENT) ? 0 : err;
604 	}
605 
606 	/* It does not support data allocation */
607 	f2fs_bug_on(create);
608 
609 	if (dn.data_blkaddr != NEW_ADDR && dn.data_blkaddr != NULL_ADDR) {
610 		int i;
611 		unsigned int end_offset;
612 
613 		end_offset = IS_INODE(dn.node_page) ?
614 				ADDRS_PER_INODE(F2FS_I(inode)) :
615 				ADDRS_PER_BLOCK;
616 
617 		clear_buffer_new(bh_result);
618 
619 		/* Give more consecutive addresses for the read ahead */
620 		for (i = 0; i < end_offset - dn.ofs_in_node; i++)
621 			if (((datablock_addr(dn.node_page,
622 							dn.ofs_in_node + i))
623 				!= (dn.data_blkaddr + i)) || maxblocks == i)
624 				break;
625 		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
626 		bh_result->b_size = (((size_t)i) << blkbits);
627 	}
628 	f2fs_put_dnode(&dn);
629 	trace_f2fs_get_data_block(inode, iblock, bh_result, 0);
630 	return 0;
631 }
632 
633 static int f2fs_read_data_page(struct file *file, struct page *page)
634 {
635 	return mpage_readpage(page, get_data_block_ro);
636 }
637 
638 static int f2fs_read_data_pages(struct file *file,
639 			struct address_space *mapping,
640 			struct list_head *pages, unsigned nr_pages)
641 {
642 	return mpage_readpages(mapping, pages, nr_pages, get_data_block_ro);
643 }
644 
645 int do_write_data_page(struct page *page)
646 {
647 	struct inode *inode = page->mapping->host;
648 	block_t old_blk_addr, new_blk_addr;
649 	struct dnode_of_data dn;
650 	int err = 0;
651 
652 	set_new_dnode(&dn, inode, NULL, NULL, 0);
653 	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
654 	if (err)
655 		return err;
656 
657 	old_blk_addr = dn.data_blkaddr;
658 
659 	/* This page is already truncated */
660 	if (old_blk_addr == NULL_ADDR)
661 		goto out_writepage;
662 
663 	set_page_writeback(page);
664 
665 	/*
666 	 * If current allocation needs SSR,
667 	 * it had better in-place writes for updated data.
668 	 */
669 	if (unlikely(old_blk_addr != NEW_ADDR &&
670 			!is_cold_data(page) &&
671 			need_inplace_update(inode))) {
672 		rewrite_data_page(F2FS_SB(inode->i_sb), page,
673 						old_blk_addr);
674 	} else {
675 		write_data_page(inode, page, &dn,
676 				old_blk_addr, &new_blk_addr);
677 		update_extent_cache(new_blk_addr, &dn);
678 	}
679 out_writepage:
680 	f2fs_put_dnode(&dn);
681 	return err;
682 }
683 
684 static int f2fs_write_data_page(struct page *page,
685 					struct writeback_control *wbc)
686 {
687 	struct inode *inode = page->mapping->host;
688 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
689 	loff_t i_size = i_size_read(inode);
690 	const pgoff_t end_index = ((unsigned long long) i_size)
691 							>> PAGE_CACHE_SHIFT;
692 	unsigned offset;
693 	bool need_balance_fs = false;
694 	int err = 0;
695 
696 	if (page->index < end_index)
697 		goto write;
698 
699 	/*
700 	 * If the offset is out-of-range of file size,
701 	 * this page does not have to be written to disk.
702 	 */
703 	offset = i_size & (PAGE_CACHE_SIZE - 1);
704 	if ((page->index >= end_index + 1) || !offset) {
705 		if (S_ISDIR(inode->i_mode)) {
706 			dec_page_count(sbi, F2FS_DIRTY_DENTS);
707 			inode_dec_dirty_dents(inode);
708 		}
709 		goto out;
710 	}
711 
712 	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
713 write:
714 	if (unlikely(sbi->por_doing)) {
715 		err = AOP_WRITEPAGE_ACTIVATE;
716 		goto redirty_out;
717 	}
718 
719 	/* Dentry blocks are controlled by checkpoint */
720 	if (S_ISDIR(inode->i_mode)) {
721 		dec_page_count(sbi, F2FS_DIRTY_DENTS);
722 		inode_dec_dirty_dents(inode);
723 		err = do_write_data_page(page);
724 	} else {
725 		f2fs_lock_op(sbi);
726 		err = do_write_data_page(page);
727 		f2fs_unlock_op(sbi);
728 		need_balance_fs = true;
729 	}
730 	if (err == -ENOENT)
731 		goto out;
732 	else if (err)
733 		goto redirty_out;
734 
735 	if (wbc->for_reclaim)
736 		f2fs_submit_merged_bio(sbi, DATA, true, WRITE);
737 
738 	clear_cold_data(page);
739 out:
740 	unlock_page(page);
741 	if (need_balance_fs)
742 		f2fs_balance_fs(sbi);
743 	return 0;
744 
745 redirty_out:
746 	wbc->pages_skipped++;
747 	set_page_dirty(page);
748 	return err;
749 }
750 
751 #define MAX_DESIRED_PAGES_WP	4096
752 
753 static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
754 			void *data)
755 {
756 	struct address_space *mapping = data;
757 	int ret = mapping->a_ops->writepage(page, wbc);
758 	mapping_set_error(mapping, ret);
759 	return ret;
760 }
761 
762 static int f2fs_write_data_pages(struct address_space *mapping,
763 			    struct writeback_control *wbc)
764 {
765 	struct inode *inode = mapping->host;
766 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
767 	bool locked = false;
768 	int ret;
769 	long excess_nrtw = 0, desired_nrtw;
770 
771 	/* deal with chardevs and other special file */
772 	if (!mapping->a_ops->writepage)
773 		return 0;
774 
775 	if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) {
776 		desired_nrtw = MAX_DESIRED_PAGES_WP;
777 		excess_nrtw = desired_nrtw - wbc->nr_to_write;
778 		wbc->nr_to_write = desired_nrtw;
779 	}
780 
781 	if (!S_ISDIR(inode->i_mode)) {
782 		mutex_lock(&sbi->writepages);
783 		locked = true;
784 	}
785 	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
786 	if (locked)
787 		mutex_unlock(&sbi->writepages);
788 	f2fs_submit_merged_bio(sbi, DATA, wbc->sync_mode == WB_SYNC_ALL, WRITE);
789 
790 	remove_dirty_dir_inode(inode);
791 
792 	wbc->nr_to_write -= excess_nrtw;
793 	return ret;
794 }
795 
796 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
797 		loff_t pos, unsigned len, unsigned flags,
798 		struct page **pagep, void **fsdata)
799 {
800 	struct inode *inode = mapping->host;
801 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
802 	struct page *page;
803 	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
804 	struct dnode_of_data dn;
805 	int err = 0;
806 
807 	f2fs_balance_fs(sbi);
808 repeat:
809 	page = grab_cache_page_write_begin(mapping, index, flags);
810 	if (!page)
811 		return -ENOMEM;
812 	*pagep = page;
813 
814 	f2fs_lock_op(sbi);
815 	set_new_dnode(&dn, inode, NULL, NULL, 0);
816 	err = f2fs_reserve_block(&dn, index);
817 	f2fs_unlock_op(sbi);
818 
819 	if (err) {
820 		f2fs_put_page(page, 1);
821 		return err;
822 	}
823 
824 	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
825 		return 0;
826 
827 	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
828 		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
829 		unsigned end = start + len;
830 
831 		/* Reading beyond i_size is simple: memset to zero */
832 		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
833 		goto out;
834 	}
835 
836 	if (dn.data_blkaddr == NEW_ADDR) {
837 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
838 	} else {
839 		err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
840 							READ_SYNC);
841 		if (err)
842 			return err;
843 		lock_page(page);
844 		if (!PageUptodate(page)) {
845 			f2fs_put_page(page, 1);
846 			return -EIO;
847 		}
848 		if (page->mapping != mapping) {
849 			f2fs_put_page(page, 1);
850 			goto repeat;
851 		}
852 	}
853 out:
854 	SetPageUptodate(page);
855 	clear_cold_data(page);
856 	return 0;
857 }
858 
859 static int f2fs_write_end(struct file *file,
860 			struct address_space *mapping,
861 			loff_t pos, unsigned len, unsigned copied,
862 			struct page *page, void *fsdata)
863 {
864 	struct inode *inode = page->mapping->host;
865 
866 	SetPageUptodate(page);
867 	set_page_dirty(page);
868 
869 	if (pos + copied > i_size_read(inode)) {
870 		i_size_write(inode, pos + copied);
871 		mark_inode_dirty(inode);
872 		update_inode_page(inode);
873 	}
874 
875 	f2fs_put_page(page, 1);
876 	return copied;
877 }
878 
879 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
880 		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
881 {
882 	struct file *file = iocb->ki_filp;
883 	struct inode *inode = file->f_mapping->host;
884 
885 	if (rw == WRITE)
886 		return 0;
887 
888 	/* Needs synchronization with the cleaner */
889 	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
890 						  get_data_block_ro);
891 }
892 
893 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
894 				      unsigned int length)
895 {
896 	struct inode *inode = page->mapping->host;
897 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
898 	if (S_ISDIR(inode->i_mode) && PageDirty(page)) {
899 		dec_page_count(sbi, F2FS_DIRTY_DENTS);
900 		inode_dec_dirty_dents(inode);
901 	}
902 	ClearPagePrivate(page);
903 }
904 
905 static int f2fs_release_data_page(struct page *page, gfp_t wait)
906 {
907 	ClearPagePrivate(page);
908 	return 1;
909 }
910 
911 static int f2fs_set_data_page_dirty(struct page *page)
912 {
913 	struct address_space *mapping = page->mapping;
914 	struct inode *inode = mapping->host;
915 
916 	trace_f2fs_set_page_dirty(page, DATA);
917 
918 	SetPageUptodate(page);
919 	if (!PageDirty(page)) {
920 		__set_page_dirty_nobuffers(page);
921 		set_dirty_dir_page(inode, page);
922 		return 1;
923 	}
924 	return 0;
925 }
926 
927 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
928 {
929 	return generic_block_bmap(mapping, block, get_data_block_ro);
930 }
931 
932 const struct address_space_operations f2fs_dblock_aops = {
933 	.readpage	= f2fs_read_data_page,
934 	.readpages	= f2fs_read_data_pages,
935 	.writepage	= f2fs_write_data_page,
936 	.writepages	= f2fs_write_data_pages,
937 	.write_begin	= f2fs_write_begin,
938 	.write_end	= f2fs_write_end,
939 	.set_page_dirty	= f2fs_set_data_page_dirty,
940 	.invalidatepage	= f2fs_invalidate_data_page,
941 	.releasepage	= f2fs_release_data_page,
942 	.direct_IO	= f2fs_direct_IO,
943 	.bmap		= f2fs_bmap,
944 };
945