xref: /openbmc/linux/fs/f2fs/data.c (revision 588b48ca)
1 /*
2  * fs/f2fs/data.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/buffer_head.h>
14 #include <linux/mpage.h>
15 #include <linux/aio.h>
16 #include <linux/writeback.h>
17 #include <linux/backing-dev.h>
18 #include <linux/blkdev.h>
19 #include <linux/bio.h>
20 #include <linux/prefetch.h>
21 
22 #include "f2fs.h"
23 #include "node.h"
24 #include "segment.h"
25 #include <trace/events/f2fs.h>
26 
27 static void f2fs_read_end_io(struct bio *bio, int err)
28 {
29 	struct bio_vec *bvec;
30 	int i;
31 
32 	bio_for_each_segment_all(bvec, bio, i) {
33 		struct page *page = bvec->bv_page;
34 
35 		if (!err) {
36 			SetPageUptodate(page);
37 		} else {
38 			ClearPageUptodate(page);
39 			SetPageError(page);
40 		}
41 		unlock_page(page);
42 	}
43 	bio_put(bio);
44 }
45 
46 static void f2fs_write_end_io(struct bio *bio, int err)
47 {
48 	struct f2fs_sb_info *sbi = bio->bi_private;
49 	struct bio_vec *bvec;
50 	int i;
51 
52 	bio_for_each_segment_all(bvec, bio, i) {
53 		struct page *page = bvec->bv_page;
54 
55 		if (unlikely(err)) {
56 			SetPageError(page);
57 			set_bit(AS_EIO, &page->mapping->flags);
58 			f2fs_stop_checkpoint(sbi);
59 		}
60 		end_page_writeback(page);
61 		dec_page_count(sbi, F2FS_WRITEBACK);
62 	}
63 
64 	if (sbi->wait_io) {
65 		complete(sbi->wait_io);
66 		sbi->wait_io = NULL;
67 	}
68 
69 	if (!get_pages(sbi, F2FS_WRITEBACK) &&
70 			!list_empty(&sbi->cp_wait.task_list))
71 		wake_up(&sbi->cp_wait);
72 
73 	bio_put(bio);
74 }
75 
76 /*
77  * Low-level block read/write IO operations.
78  */
79 static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
80 				int npages, bool is_read)
81 {
82 	struct bio *bio;
83 
84 	/* No failure on bio allocation */
85 	bio = bio_alloc(GFP_NOIO, npages);
86 
87 	bio->bi_bdev = sbi->sb->s_bdev;
88 	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
89 	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90 	bio->bi_private = sbi;
91 
92 	return bio;
93 }
94 
95 static void __submit_merged_bio(struct f2fs_bio_info *io)
96 {
97 	struct f2fs_io_info *fio = &io->fio;
98 	int rw;
99 
100 	if (!io->bio)
101 		return;
102 
103 	rw = fio->rw;
104 
105 	if (is_read_io(rw)) {
106 		trace_f2fs_submit_read_bio(io->sbi->sb, rw,
107 						fio->type, io->bio);
108 		submit_bio(rw, io->bio);
109 	} else {
110 		trace_f2fs_submit_write_bio(io->sbi->sb, rw,
111 						fio->type, io->bio);
112 		/*
113 		 * META_FLUSH is only from the checkpoint procedure, and we
114 		 * should wait this metadata bio for FS consistency.
115 		 */
116 		if (fio->type == META_FLUSH) {
117 			DECLARE_COMPLETION_ONSTACK(wait);
118 			io->sbi->wait_io = &wait;
119 			submit_bio(rw, io->bio);
120 			wait_for_completion(&wait);
121 		} else {
122 			submit_bio(rw, io->bio);
123 		}
124 	}
125 
126 	io->bio = NULL;
127 }
128 
129 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
130 				enum page_type type, int rw)
131 {
132 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
133 	struct f2fs_bio_info *io;
134 
135 	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
136 
137 	down_write(&io->io_rwsem);
138 
139 	/* change META to META_FLUSH in the checkpoint procedure */
140 	if (type >= META_FLUSH) {
141 		io->fio.type = META_FLUSH;
142 		if (test_opt(sbi, NOBARRIER))
143 			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
144 		else
145 			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
146 	}
147 	__submit_merged_bio(io);
148 	up_write(&io->io_rwsem);
149 }
150 
151 /*
152  * Fill the locked page with data located in the block address.
153  * Return unlocked page.
154  */
155 int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
156 					block_t blk_addr, int rw)
157 {
158 	struct bio *bio;
159 
160 	trace_f2fs_submit_page_bio(page, blk_addr, rw);
161 
162 	/* Allocate a new bio */
163 	bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
164 
165 	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
166 		bio_put(bio);
167 		f2fs_put_page(page, 1);
168 		return -EFAULT;
169 	}
170 
171 	submit_bio(rw, bio);
172 	return 0;
173 }
174 
175 void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
176 			block_t blk_addr, struct f2fs_io_info *fio)
177 {
178 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
179 	struct f2fs_bio_info *io;
180 	bool is_read = is_read_io(fio->rw);
181 
182 	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
183 
184 	verify_block_addr(sbi, blk_addr);
185 
186 	down_write(&io->io_rwsem);
187 
188 	if (!is_read)
189 		inc_page_count(sbi, F2FS_WRITEBACK);
190 
191 	if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
192 						io->fio.rw != fio->rw))
193 		__submit_merged_bio(io);
194 alloc_new:
195 	if (io->bio == NULL) {
196 		int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));
197 
198 		io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
199 		io->fio = *fio;
200 	}
201 
202 	if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
203 							PAGE_CACHE_SIZE) {
204 		__submit_merged_bio(io);
205 		goto alloc_new;
206 	}
207 
208 	io->last_block_in_bio = blk_addr;
209 
210 	up_write(&io->io_rwsem);
211 	trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
212 }
213 
214 /*
215  * Lock ordering for the change of data block address:
216  * ->data_page
217  *  ->node_page
218  *    update block addresses in the node page
219  */
220 static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
221 {
222 	struct f2fs_node *rn;
223 	__le32 *addr_array;
224 	struct page *node_page = dn->node_page;
225 	unsigned int ofs_in_node = dn->ofs_in_node;
226 
227 	f2fs_wait_on_page_writeback(node_page, NODE);
228 
229 	rn = F2FS_NODE(node_page);
230 
231 	/* Get physical address of data block */
232 	addr_array = blkaddr_in_node(rn);
233 	addr_array[ofs_in_node] = cpu_to_le32(new_addr);
234 	set_page_dirty(node_page);
235 }
236 
237 int reserve_new_block(struct dnode_of_data *dn)
238 {
239 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
240 
241 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
242 		return -EPERM;
243 	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
244 		return -ENOSPC;
245 
246 	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
247 
248 	__set_data_blkaddr(dn, NEW_ADDR);
249 	dn->data_blkaddr = NEW_ADDR;
250 	mark_inode_dirty(dn->inode);
251 	sync_inode_page(dn);
252 	return 0;
253 }
254 
255 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
256 {
257 	bool need_put = dn->inode_page ? false : true;
258 	int err;
259 
260 	/* if inode_page exists, index should be zero */
261 	f2fs_bug_on(!need_put && index);
262 
263 	err = get_dnode_of_data(dn, index, ALLOC_NODE);
264 	if (err)
265 		return err;
266 
267 	if (dn->data_blkaddr == NULL_ADDR)
268 		err = reserve_new_block(dn);
269 	if (err || need_put)
270 		f2fs_put_dnode(dn);
271 	return err;
272 }
273 
274 static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
275 					struct buffer_head *bh_result)
276 {
277 	struct f2fs_inode_info *fi = F2FS_I(inode);
278 	pgoff_t start_fofs, end_fofs;
279 	block_t start_blkaddr;
280 
281 	if (is_inode_flag_set(fi, FI_NO_EXTENT))
282 		return 0;
283 
284 	read_lock(&fi->ext.ext_lock);
285 	if (fi->ext.len == 0) {
286 		read_unlock(&fi->ext.ext_lock);
287 		return 0;
288 	}
289 
290 	stat_inc_total_hit(inode->i_sb);
291 
292 	start_fofs = fi->ext.fofs;
293 	end_fofs = fi->ext.fofs + fi->ext.len - 1;
294 	start_blkaddr = fi->ext.blk_addr;
295 
296 	if (pgofs >= start_fofs && pgofs <= end_fofs) {
297 		unsigned int blkbits = inode->i_sb->s_blocksize_bits;
298 		size_t count;
299 
300 		clear_buffer_new(bh_result);
301 		map_bh(bh_result, inode->i_sb,
302 				start_blkaddr + pgofs - start_fofs);
303 		count = end_fofs - pgofs + 1;
304 		if (count < (UINT_MAX >> blkbits))
305 			bh_result->b_size = (count << blkbits);
306 		else
307 			bh_result->b_size = UINT_MAX;
308 
309 		stat_inc_read_hit(inode->i_sb);
310 		read_unlock(&fi->ext.ext_lock);
311 		return 1;
312 	}
313 	read_unlock(&fi->ext.ext_lock);
314 	return 0;
315 }
316 
317 void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
318 {
319 	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
320 	pgoff_t fofs, start_fofs, end_fofs;
321 	block_t start_blkaddr, end_blkaddr;
322 	int need_update = true;
323 
324 	f2fs_bug_on(blk_addr == NEW_ADDR);
325 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
326 							dn->ofs_in_node;
327 
328 	/* Update the page address in the parent node */
329 	__set_data_blkaddr(dn, blk_addr);
330 
331 	if (is_inode_flag_set(fi, FI_NO_EXTENT))
332 		return;
333 
334 	write_lock(&fi->ext.ext_lock);
335 
336 	start_fofs = fi->ext.fofs;
337 	end_fofs = fi->ext.fofs + fi->ext.len - 1;
338 	start_blkaddr = fi->ext.blk_addr;
339 	end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
340 
341 	/* Drop and initialize the matched extent */
342 	if (fi->ext.len == 1 && fofs == start_fofs)
343 		fi->ext.len = 0;
344 
345 	/* Initial extent */
346 	if (fi->ext.len == 0) {
347 		if (blk_addr != NULL_ADDR) {
348 			fi->ext.fofs = fofs;
349 			fi->ext.blk_addr = blk_addr;
350 			fi->ext.len = 1;
351 		}
352 		goto end_update;
353 	}
354 
355 	/* Front merge */
356 	if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
357 		fi->ext.fofs--;
358 		fi->ext.blk_addr--;
359 		fi->ext.len++;
360 		goto end_update;
361 	}
362 
363 	/* Back merge */
364 	if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
365 		fi->ext.len++;
366 		goto end_update;
367 	}
368 
369 	/* Split the existing extent */
370 	if (fi->ext.len > 1 &&
371 		fofs >= start_fofs && fofs <= end_fofs) {
372 		if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
373 			fi->ext.len = fofs - start_fofs;
374 		} else {
375 			fi->ext.fofs = fofs + 1;
376 			fi->ext.blk_addr = start_blkaddr +
377 					fofs - start_fofs + 1;
378 			fi->ext.len -= fofs - start_fofs + 1;
379 		}
380 	} else {
381 		need_update = false;
382 	}
383 
384 	/* Finally, if the extent is very fragmented, let's drop the cache. */
385 	if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
386 		fi->ext.len = 0;
387 		set_inode_flag(fi, FI_NO_EXTENT);
388 		need_update = true;
389 	}
390 end_update:
391 	write_unlock(&fi->ext.ext_lock);
392 	if (need_update)
393 		sync_inode_page(dn);
394 	return;
395 }
396 
397 struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
398 {
399 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
400 	struct address_space *mapping = inode->i_mapping;
401 	struct dnode_of_data dn;
402 	struct page *page;
403 	int err;
404 
405 	page = find_get_page(mapping, index);
406 	if (page && PageUptodate(page))
407 		return page;
408 	f2fs_put_page(page, 0);
409 
410 	set_new_dnode(&dn, inode, NULL, NULL, 0);
411 	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
412 	if (err)
413 		return ERR_PTR(err);
414 	f2fs_put_dnode(&dn);
415 
416 	if (dn.data_blkaddr == NULL_ADDR)
417 		return ERR_PTR(-ENOENT);
418 
419 	/* By fallocate(), there is no cached page, but with NEW_ADDR */
420 	if (unlikely(dn.data_blkaddr == NEW_ADDR))
421 		return ERR_PTR(-EINVAL);
422 
423 	page = grab_cache_page(mapping, index);
424 	if (!page)
425 		return ERR_PTR(-ENOMEM);
426 
427 	if (PageUptodate(page)) {
428 		unlock_page(page);
429 		return page;
430 	}
431 
432 	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
433 					sync ? READ_SYNC : READA);
434 	if (err)
435 		return ERR_PTR(err);
436 
437 	if (sync) {
438 		wait_on_page_locked(page);
439 		if (unlikely(!PageUptodate(page))) {
440 			f2fs_put_page(page, 0);
441 			return ERR_PTR(-EIO);
442 		}
443 	}
444 	return page;
445 }
446 
447 /*
448  * If it tries to access a hole, return an error.
449  * Because, the callers, functions in dir.c and GC, should be able to know
450  * whether this page exists or not.
451  */
452 struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
453 {
454 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
455 	struct address_space *mapping = inode->i_mapping;
456 	struct dnode_of_data dn;
457 	struct page *page;
458 	int err;
459 
460 repeat:
461 	page = grab_cache_page(mapping, index);
462 	if (!page)
463 		return ERR_PTR(-ENOMEM);
464 
465 	set_new_dnode(&dn, inode, NULL, NULL, 0);
466 	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
467 	if (err) {
468 		f2fs_put_page(page, 1);
469 		return ERR_PTR(err);
470 	}
471 	f2fs_put_dnode(&dn);
472 
473 	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
474 		f2fs_put_page(page, 1);
475 		return ERR_PTR(-ENOENT);
476 	}
477 
478 	if (PageUptodate(page))
479 		return page;
480 
481 	/*
482 	 * A new dentry page is allocated but not able to be written, since its
483 	 * new inode page couldn't be allocated due to -ENOSPC.
484 	 * In such the case, its blkaddr can be remained as NEW_ADDR.
485 	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
486 	 */
487 	if (dn.data_blkaddr == NEW_ADDR) {
488 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
489 		SetPageUptodate(page);
490 		return page;
491 	}
492 
493 	err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC);
494 	if (err)
495 		return ERR_PTR(err);
496 
497 	lock_page(page);
498 	if (unlikely(!PageUptodate(page))) {
499 		f2fs_put_page(page, 1);
500 		return ERR_PTR(-EIO);
501 	}
502 	if (unlikely(page->mapping != mapping)) {
503 		f2fs_put_page(page, 1);
504 		goto repeat;
505 	}
506 	return page;
507 }
508 
509 /*
510  * Caller ensures that this data page is never allocated.
511  * A new zero-filled data page is allocated in the page cache.
512  *
513  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
514  * f2fs_unlock_op().
515  * Note that, ipage is set only by make_empty_dir.
516  */
517 struct page *get_new_data_page(struct inode *inode,
518 		struct page *ipage, pgoff_t index, bool new_i_size)
519 {
520 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
521 	struct address_space *mapping = inode->i_mapping;
522 	struct page *page;
523 	struct dnode_of_data dn;
524 	int err;
525 
526 	set_new_dnode(&dn, inode, ipage, NULL, 0);
527 	err = f2fs_reserve_block(&dn, index);
528 	if (err)
529 		return ERR_PTR(err);
530 repeat:
531 	page = grab_cache_page(mapping, index);
532 	if (!page) {
533 		err = -ENOMEM;
534 		goto put_err;
535 	}
536 
537 	if (PageUptodate(page))
538 		return page;
539 
540 	if (dn.data_blkaddr == NEW_ADDR) {
541 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
542 		SetPageUptodate(page);
543 	} else {
544 		err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
545 								READ_SYNC);
546 		if (err)
547 			goto put_err;
548 
549 		lock_page(page);
550 		if (unlikely(!PageUptodate(page))) {
551 			f2fs_put_page(page, 1);
552 			err = -EIO;
553 			goto put_err;
554 		}
555 		if (unlikely(page->mapping != mapping)) {
556 			f2fs_put_page(page, 1);
557 			goto repeat;
558 		}
559 	}
560 
561 	if (new_i_size &&
562 		i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
563 		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
564 		/* Only the directory inode sets new_i_size */
565 		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
566 	}
567 	return page;
568 
569 put_err:
570 	f2fs_put_dnode(&dn);
571 	return ERR_PTR(err);
572 }
573 
574 static int __allocate_data_block(struct dnode_of_data *dn)
575 {
576 	struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb);
577 	struct f2fs_summary sum;
578 	block_t new_blkaddr;
579 	struct node_info ni;
580 	int type;
581 
582 	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
583 		return -EPERM;
584 	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
585 		return -ENOSPC;
586 
587 	__set_data_blkaddr(dn, NEW_ADDR);
588 	dn->data_blkaddr = NEW_ADDR;
589 
590 	get_node_info(sbi, dn->nid, &ni);
591 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
592 
593 	type = CURSEG_WARM_DATA;
594 
595 	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);
596 
597 	/* direct IO doesn't use extent cache to maximize the performance */
598 	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
599 	update_extent_cache(new_blkaddr, dn);
600 	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
601 
602 	dn->data_blkaddr = new_blkaddr;
603 	return 0;
604 }
605 
606 /*
607  * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
608  * If original data blocks are allocated, then give them to blockdev.
609  * Otherwise,
610  *     a. preallocate requested block addresses
611  *     b. do not use extent cache for better performance
612  *     c. give the block addresses to blockdev
613  */
614 static int __get_data_block(struct inode *inode, sector_t iblock,
615 			struct buffer_head *bh_result, int create, bool fiemap)
616 {
617 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
618 	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
619 	unsigned maxblocks = bh_result->b_size >> blkbits;
620 	struct dnode_of_data dn;
621 	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
622 	pgoff_t pgofs, end_offset;
623 	int err = 0, ofs = 1;
624 	bool allocated = false;
625 
626 	/* Get the page offset from the block offset(iblock) */
627 	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
628 
629 	if (check_extent_cache(inode, pgofs, bh_result))
630 		goto out;
631 
632 	if (create) {
633 		f2fs_balance_fs(sbi);
634 		f2fs_lock_op(sbi);
635 	}
636 
637 	/* When reading holes, we need its node page */
638 	set_new_dnode(&dn, inode, NULL, NULL, 0);
639 	err = get_dnode_of_data(&dn, pgofs, mode);
640 	if (err) {
641 		if (err == -ENOENT)
642 			err = 0;
643 		goto unlock_out;
644 	}
645 	if (dn.data_blkaddr == NEW_ADDR && !fiemap)
646 		goto put_out;
647 
648 	if (dn.data_blkaddr != NULL_ADDR) {
649 		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
650 	} else if (create) {
651 		err = __allocate_data_block(&dn);
652 		if (err)
653 			goto put_out;
654 		allocated = true;
655 		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
656 	} else {
657 		goto put_out;
658 	}
659 
660 	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
661 	bh_result->b_size = (((size_t)1) << blkbits);
662 	dn.ofs_in_node++;
663 	pgofs++;
664 
665 get_next:
666 	if (dn.ofs_in_node >= end_offset) {
667 		if (allocated)
668 			sync_inode_page(&dn);
669 		allocated = false;
670 		f2fs_put_dnode(&dn);
671 
672 		set_new_dnode(&dn, inode, NULL, NULL, 0);
673 		err = get_dnode_of_data(&dn, pgofs, mode);
674 		if (err) {
675 			if (err == -ENOENT)
676 				err = 0;
677 			goto unlock_out;
678 		}
679 		if (dn.data_blkaddr == NEW_ADDR && !fiemap)
680 			goto put_out;
681 
682 		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
683 	}
684 
685 	if (maxblocks > (bh_result->b_size >> blkbits)) {
686 		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
687 		if (blkaddr == NULL_ADDR && create) {
688 			err = __allocate_data_block(&dn);
689 			if (err)
690 				goto sync_out;
691 			allocated = true;
692 			blkaddr = dn.data_blkaddr;
693 		}
694 		/* Give more consecutive addresses for the read ahead */
695 		if (blkaddr == (bh_result->b_blocknr + ofs)) {
696 			ofs++;
697 			dn.ofs_in_node++;
698 			pgofs++;
699 			bh_result->b_size += (((size_t)1) << blkbits);
700 			goto get_next;
701 		}
702 	}
703 sync_out:
704 	if (allocated)
705 		sync_inode_page(&dn);
706 put_out:
707 	f2fs_put_dnode(&dn);
708 unlock_out:
709 	if (create)
710 		f2fs_unlock_op(sbi);
711 out:
712 	trace_f2fs_get_data_block(inode, iblock, bh_result, err);
713 	return err;
714 }
715 
716 static int get_data_block(struct inode *inode, sector_t iblock,
717 			struct buffer_head *bh_result, int create)
718 {
719 	return __get_data_block(inode, iblock, bh_result, create, false);
720 }
721 
722 static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
723 			struct buffer_head *bh_result, int create)
724 {
725 	return __get_data_block(inode, iblock, bh_result, create, true);
726 }
727 
728 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
729 		u64 start, u64 len)
730 {
731 	return generic_block_fiemap(inode, fieinfo,
732 				start, len, get_data_block_fiemap);
733 }
734 
735 static int f2fs_read_data_page(struct file *file, struct page *page)
736 {
737 	struct inode *inode = page->mapping->host;
738 	int ret;
739 
740 	trace_f2fs_readpage(page, DATA);
741 
742 	/* If the file has inline data, try to read it directlly */
743 	if (f2fs_has_inline_data(inode))
744 		ret = f2fs_read_inline_data(inode, page);
745 	else
746 		ret = mpage_readpage(page, get_data_block);
747 
748 	return ret;
749 }
750 
751 static int f2fs_read_data_pages(struct file *file,
752 			struct address_space *mapping,
753 			struct list_head *pages, unsigned nr_pages)
754 {
755 	struct inode *inode = file->f_mapping->host;
756 
757 	/* If the file has inline data, skip readpages */
758 	if (f2fs_has_inline_data(inode))
759 		return 0;
760 
761 	return mpage_readpages(mapping, pages, nr_pages, get_data_block);
762 }
763 
764 int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
765 {
766 	struct inode *inode = page->mapping->host;
767 	block_t old_blkaddr, new_blkaddr;
768 	struct dnode_of_data dn;
769 	int err = 0;
770 
771 	set_new_dnode(&dn, inode, NULL, NULL, 0);
772 	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
773 	if (err)
774 		return err;
775 
776 	old_blkaddr = dn.data_blkaddr;
777 
778 	/* This page is already truncated */
779 	if (old_blkaddr == NULL_ADDR)
780 		goto out_writepage;
781 
782 	set_page_writeback(page);
783 
784 	/*
785 	 * If current allocation needs SSR,
786 	 * it had better in-place writes for updated data.
787 	 */
788 	if (unlikely(old_blkaddr != NEW_ADDR &&
789 			!is_cold_data(page) &&
790 			need_inplace_update(inode))) {
791 		rewrite_data_page(page, old_blkaddr, fio);
792 		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
793 	} else {
794 		write_data_page(page, &dn, &new_blkaddr, fio);
795 		update_extent_cache(new_blkaddr, &dn);
796 		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
797 	}
798 out_writepage:
799 	f2fs_put_dnode(&dn);
800 	return err;
801 }
802 
803 static int f2fs_write_data_page(struct page *page,
804 					struct writeback_control *wbc)
805 {
806 	struct inode *inode = page->mapping->host;
807 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
808 	loff_t i_size = i_size_read(inode);
809 	const pgoff_t end_index = ((unsigned long long) i_size)
810 							>> PAGE_CACHE_SHIFT;
811 	unsigned offset = 0;
812 	bool need_balance_fs = false;
813 	int err = 0;
814 	struct f2fs_io_info fio = {
815 		.type = DATA,
816 		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
817 	};
818 
819 	trace_f2fs_writepage(page, DATA);
820 
821 	if (page->index < end_index)
822 		goto write;
823 
824 	/*
825 	 * If the offset is out-of-range of file size,
826 	 * this page does not have to be written to disk.
827 	 */
828 	offset = i_size & (PAGE_CACHE_SIZE - 1);
829 	if ((page->index >= end_index + 1) || !offset)
830 		goto out;
831 
832 	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
833 write:
834 	if (unlikely(sbi->por_doing))
835 		goto redirty_out;
836 
837 	/* Dentry blocks are controlled by checkpoint */
838 	if (S_ISDIR(inode->i_mode)) {
839 		err = do_write_data_page(page, &fio);
840 		goto done;
841 	}
842 
843 	if (!wbc->for_reclaim)
844 		need_balance_fs = true;
845 	else if (has_not_enough_free_secs(sbi, 0))
846 		goto redirty_out;
847 
848 	f2fs_lock_op(sbi);
849 	if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
850 		err = f2fs_write_inline_data(inode, page, offset);
851 	else
852 		err = do_write_data_page(page, &fio);
853 	f2fs_unlock_op(sbi);
854 done:
855 	if (err && err != -ENOENT)
856 		goto redirty_out;
857 
858 	clear_cold_data(page);
859 out:
860 	inode_dec_dirty_dents(inode);
861 	unlock_page(page);
862 	if (need_balance_fs)
863 		f2fs_balance_fs(sbi);
864 	if (wbc->for_reclaim)
865 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
866 	return 0;
867 
868 redirty_out:
869 	redirty_page_for_writepage(wbc, page);
870 	return AOP_WRITEPAGE_ACTIVATE;
871 }
872 
873 static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
874 			void *data)
875 {
876 	struct address_space *mapping = data;
877 	int ret = mapping->a_ops->writepage(page, wbc);
878 	mapping_set_error(mapping, ret);
879 	return ret;
880 }
881 
882 static int f2fs_write_data_pages(struct address_space *mapping,
883 			    struct writeback_control *wbc)
884 {
885 	struct inode *inode = mapping->host;
886 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
887 	bool locked = false;
888 	int ret;
889 	long diff;
890 
891 	trace_f2fs_writepages(mapping->host, wbc, DATA);
892 
893 	/* deal with chardevs and other special file */
894 	if (!mapping->a_ops->writepage)
895 		return 0;
896 
897 	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
898 			get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) &&
899 			available_free_memory(sbi, DIRTY_DENTS))
900 		goto skip_write;
901 
902 	diff = nr_pages_to_write(sbi, DATA, wbc);
903 
904 	if (!S_ISDIR(inode->i_mode)) {
905 		mutex_lock(&sbi->writepages);
906 		locked = true;
907 	}
908 	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
909 	if (locked)
910 		mutex_unlock(&sbi->writepages);
911 
912 	f2fs_submit_merged_bio(sbi, DATA, WRITE);
913 
914 	remove_dirty_dir_inode(inode);
915 
916 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
917 	return ret;
918 
919 skip_write:
920 	wbc->pages_skipped += get_dirty_dents(inode);
921 	return 0;
922 }
923 
924 static void f2fs_write_failed(struct address_space *mapping, loff_t to)
925 {
926 	struct inode *inode = mapping->host;
927 
928 	if (to > inode->i_size) {
929 		truncate_pagecache(inode, inode->i_size);
930 		truncate_blocks(inode, inode->i_size);
931 	}
932 }
933 
934 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
935 		loff_t pos, unsigned len, unsigned flags,
936 		struct page **pagep, void **fsdata)
937 {
938 	struct inode *inode = mapping->host;
939 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
940 	struct page *page;
941 	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
942 	struct dnode_of_data dn;
943 	int err = 0;
944 
945 	trace_f2fs_write_begin(inode, pos, len, flags);
946 
947 	f2fs_balance_fs(sbi);
948 repeat:
949 	err = f2fs_convert_inline_data(inode, pos + len);
950 	if (err)
951 		goto fail;
952 
953 	page = grab_cache_page_write_begin(mapping, index, flags);
954 	if (!page) {
955 		err = -ENOMEM;
956 		goto fail;
957 	}
958 
959 	/* to avoid latency during memory pressure */
960 	unlock_page(page);
961 
962 	*pagep = page;
963 
964 	if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
965 		goto inline_data;
966 
967 	f2fs_lock_op(sbi);
968 	set_new_dnode(&dn, inode, NULL, NULL, 0);
969 	err = f2fs_reserve_block(&dn, index);
970 	f2fs_unlock_op(sbi);
971 	if (err) {
972 		f2fs_put_page(page, 0);
973 		goto fail;
974 	}
975 inline_data:
976 	lock_page(page);
977 	if (unlikely(page->mapping != mapping)) {
978 		f2fs_put_page(page, 1);
979 		goto repeat;
980 	}
981 
982 	f2fs_wait_on_page_writeback(page, DATA);
983 
984 	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
985 		return 0;
986 
987 	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
988 		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
989 		unsigned end = start + len;
990 
991 		/* Reading beyond i_size is simple: memset to zero */
992 		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
993 		goto out;
994 	}
995 
996 	if (dn.data_blkaddr == NEW_ADDR) {
997 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
998 	} else {
999 		if (f2fs_has_inline_data(inode)) {
1000 			err = f2fs_read_inline_data(inode, page);
1001 			if (err) {
1002 				page_cache_release(page);
1003 				goto fail;
1004 			}
1005 		} else {
1006 			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
1007 							READ_SYNC);
1008 			if (err)
1009 				goto fail;
1010 		}
1011 
1012 		lock_page(page);
1013 		if (unlikely(!PageUptodate(page))) {
1014 			f2fs_put_page(page, 1);
1015 			err = -EIO;
1016 			goto fail;
1017 		}
1018 		if (unlikely(page->mapping != mapping)) {
1019 			f2fs_put_page(page, 1);
1020 			goto repeat;
1021 		}
1022 	}
1023 out:
1024 	SetPageUptodate(page);
1025 	clear_cold_data(page);
1026 	return 0;
1027 fail:
1028 	f2fs_write_failed(mapping, pos + len);
1029 	return err;
1030 }
1031 
1032 static int f2fs_write_end(struct file *file,
1033 			struct address_space *mapping,
1034 			loff_t pos, unsigned len, unsigned copied,
1035 			struct page *page, void *fsdata)
1036 {
1037 	struct inode *inode = page->mapping->host;
1038 
1039 	trace_f2fs_write_end(inode, pos, len, copied);
1040 
1041 	set_page_dirty(page);
1042 
1043 	if (pos + copied > i_size_read(inode)) {
1044 		i_size_write(inode, pos + copied);
1045 		mark_inode_dirty(inode);
1046 		update_inode_page(inode);
1047 	}
1048 
1049 	f2fs_put_page(page, 1);
1050 	return copied;
1051 }
1052 
1053 static int check_direct_IO(struct inode *inode, int rw,
1054 		struct iov_iter *iter, loff_t offset)
1055 {
1056 	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
1057 
1058 	if (rw == READ)
1059 		return 0;
1060 
1061 	if (offset & blocksize_mask)
1062 		return -EINVAL;
1063 
1064 	if (iov_iter_alignment(iter) & blocksize_mask)
1065 		return -EINVAL;
1066 
1067 	return 0;
1068 }
1069 
1070 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
1071 		struct iov_iter *iter, loff_t offset)
1072 {
1073 	struct file *file = iocb->ki_filp;
1074 	struct address_space *mapping = file->f_mapping;
1075 	struct inode *inode = mapping->host;
1076 	size_t count = iov_iter_count(iter);
1077 	int err;
1078 
1079 	/* Let buffer I/O handle the inline data case. */
1080 	if (f2fs_has_inline_data(inode))
1081 		return 0;
1082 
1083 	if (check_direct_IO(inode, rw, iter, offset))
1084 		return 0;
1085 
1086 	/* clear fsync mark to recover these blocks */
1087 	fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino);
1088 
1089 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
1090 
1091 	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
1092 	if (err < 0 && (rw & WRITE))
1093 		f2fs_write_failed(mapping, offset + count);
1094 
1095 	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
1096 
1097 	return err;
1098 }
1099 
1100 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
1101 				      unsigned int length)
1102 {
1103 	struct inode *inode = page->mapping->host;
1104 	if (PageDirty(page))
1105 		inode_dec_dirty_dents(inode);
1106 	ClearPagePrivate(page);
1107 }
1108 
1109 static int f2fs_release_data_page(struct page *page, gfp_t wait)
1110 {
1111 	ClearPagePrivate(page);
1112 	return 1;
1113 }
1114 
1115 static int f2fs_set_data_page_dirty(struct page *page)
1116 {
1117 	struct address_space *mapping = page->mapping;
1118 	struct inode *inode = mapping->host;
1119 
1120 	trace_f2fs_set_page_dirty(page, DATA);
1121 
1122 	SetPageUptodate(page);
1123 	mark_inode_dirty(inode);
1124 
1125 	if (!PageDirty(page)) {
1126 		__set_page_dirty_nobuffers(page);
1127 		set_dirty_dir_page(inode, page);
1128 		return 1;
1129 	}
1130 	return 0;
1131 }
1132 
1133 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
1134 {
1135 	struct inode *inode = mapping->host;
1136 
1137 	if (f2fs_has_inline_data(inode))
1138 		return 0;
1139 
1140 	return generic_block_bmap(mapping, block, get_data_block);
1141 }
1142 
1143 const struct address_space_operations f2fs_dblock_aops = {
1144 	.readpage	= f2fs_read_data_page,
1145 	.readpages	= f2fs_read_data_pages,
1146 	.writepage	= f2fs_write_data_page,
1147 	.writepages	= f2fs_write_data_pages,
1148 	.write_begin	= f2fs_write_begin,
1149 	.write_end	= f2fs_write_end,
1150 	.set_page_dirty	= f2fs_set_data_page_dirty,
1151 	.invalidatepage	= f2fs_invalidate_data_page,
1152 	.releasepage	= f2fs_release_data_page,
1153 	.direct_IO	= f2fs_direct_IO,
1154 	.bmap		= f2fs_bmap,
1155 };
1156