xref: /openbmc/linux/fs/f2fs/file.c (revision 4354994f097d068a894aa1a0860da54571df3582)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/file.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/stat.h>
11 #include <linux/buffer_head.h>
12 #include <linux/writeback.h>
13 #include <linux/blkdev.h>
14 #include <linux/falloc.h>
15 #include <linux/types.h>
16 #include <linux/compat.h>
17 #include <linux/uaccess.h>
18 #include <linux/mount.h>
19 #include <linux/pagevec.h>
20 #include <linux/uio.h>
21 #include <linux/uuid.h>
22 #include <linux/file.h>
23 
24 #include "f2fs.h"
25 #include "node.h"
26 #include "segment.h"
27 #include "xattr.h"
28 #include "acl.h"
29 #include "gc.h"
30 #include "trace.h"
31 #include <trace/events/f2fs.h>
32 
33 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
34 {
35 	struct inode *inode = file_inode(vmf->vma->vm_file);
36 	vm_fault_t ret;
37 
38 	down_read(&F2FS_I(inode)->i_mmap_sem);
39 	ret = filemap_fault(vmf);
40 	up_read(&F2FS_I(inode)->i_mmap_sem);
41 
42 	return ret;
43 }
44 
45 static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
46 {
47 	struct page *page = vmf->page;
48 	struct inode *inode = file_inode(vmf->vma->vm_file);
49 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
50 	struct dnode_of_data dn = { .node_changed = false };
51 	int err;
52 
53 	if (unlikely(f2fs_cp_error(sbi))) {
54 		err = -EIO;
55 		goto err;
56 	}
57 
58 	sb_start_pagefault(inode->i_sb);
59 
60 	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
61 
62 	file_update_time(vmf->vma->vm_file);
63 	down_read(&F2FS_I(inode)->i_mmap_sem);
64 	lock_page(page);
65 	if (unlikely(page->mapping != inode->i_mapping ||
66 			page_offset(page) > i_size_read(inode) ||
67 			!PageUptodate(page))) {
68 		unlock_page(page);
69 		err = -EFAULT;
70 		goto out_sem;
71 	}
72 
73 	/* block allocation */
74 	__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
75 	set_new_dnode(&dn, inode, NULL, NULL, 0);
76 	err = f2fs_get_block(&dn, page->index);
77 	f2fs_put_dnode(&dn);
78 	__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
79 	if (err) {
80 		unlock_page(page);
81 		goto out_sem;
82 	}
83 
84 	/* fill the page */
85 	f2fs_wait_on_page_writeback(page, DATA, false);
86 
87 	/* wait for GCed page writeback via META_MAPPING */
88 	f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
89 
90 	/*
91 	 * check to see if the page is mapped already (no holes)
92 	 */
93 	if (PageMappedToDisk(page))
94 		goto out_sem;
95 
96 	/* page is wholly or partially inside EOF */
97 	if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
98 						i_size_read(inode)) {
99 		loff_t offset;
100 
101 		offset = i_size_read(inode) & ~PAGE_MASK;
102 		zero_user_segment(page, offset, PAGE_SIZE);
103 	}
104 	set_page_dirty(page);
105 	if (!PageUptodate(page))
106 		SetPageUptodate(page);
107 
108 	f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
109 
110 	trace_f2fs_vm_page_mkwrite(page, DATA);
111 out_sem:
112 	up_read(&F2FS_I(inode)->i_mmap_sem);
113 
114 	f2fs_balance_fs(sbi, dn.node_changed);
115 
116 	sb_end_pagefault(inode->i_sb);
117 	f2fs_update_time(sbi, REQ_TIME);
118 err:
119 	return block_page_mkwrite_return(err);
120 }
121 
122 static const struct vm_operations_struct f2fs_file_vm_ops = {
123 	.fault		= f2fs_filemap_fault,
124 	.map_pages	= filemap_map_pages,
125 	.page_mkwrite	= f2fs_vm_page_mkwrite,
126 };
127 
128 static int get_parent_ino(struct inode *inode, nid_t *pino)
129 {
130 	struct dentry *dentry;
131 
132 	inode = igrab(inode);
133 	dentry = d_find_any_alias(inode);
134 	iput(inode);
135 	if (!dentry)
136 		return 0;
137 
138 	*pino = parent_ino(dentry);
139 	dput(dentry);
140 	return 1;
141 }
142 
143 static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
144 {
145 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
146 	enum cp_reason_type cp_reason = CP_NO_NEEDED;
147 
148 	if (!S_ISREG(inode->i_mode))
149 		cp_reason = CP_NON_REGULAR;
150 	else if (inode->i_nlink != 1)
151 		cp_reason = CP_HARDLINK;
152 	else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
153 		cp_reason = CP_SB_NEED_CP;
154 	else if (file_wrong_pino(inode))
155 		cp_reason = CP_WRONG_PINO;
156 	else if (!f2fs_space_for_roll_forward(sbi))
157 		cp_reason = CP_NO_SPC_ROLL;
158 	else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
159 		cp_reason = CP_NODE_NEED_CP;
160 	else if (test_opt(sbi, FASTBOOT))
161 		cp_reason = CP_FASTBOOT_MODE;
162 	else if (F2FS_OPTION(sbi).active_logs == 2)
163 		cp_reason = CP_SPEC_LOG_NUM;
164 	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
165 		f2fs_need_dentry_mark(sbi, inode->i_ino) &&
166 		f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
167 							TRANS_DIR_INO))
168 		cp_reason = CP_RECOVER_DIR;
169 
170 	return cp_reason;
171 }
172 
173 static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
174 {
175 	struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
176 	bool ret = false;
177 	/* But we need to avoid that there are some inode updates */
178 	if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino))
179 		ret = true;
180 	f2fs_put_page(i, 0);
181 	return ret;
182 }
183 
184 static void try_to_fix_pino(struct inode *inode)
185 {
186 	struct f2fs_inode_info *fi = F2FS_I(inode);
187 	nid_t pino;
188 
189 	down_write(&fi->i_sem);
190 	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
191 			get_parent_ino(inode, &pino)) {
192 		f2fs_i_pino_write(inode, pino);
193 		file_got_pino(inode);
194 	}
195 	up_write(&fi->i_sem);
196 }
197 
198 static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
199 						int datasync, bool atomic)
200 {
201 	struct inode *inode = file->f_mapping->host;
202 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
203 	nid_t ino = inode->i_ino;
204 	int ret = 0;
205 	enum cp_reason_type cp_reason = 0;
206 	struct writeback_control wbc = {
207 		.sync_mode = WB_SYNC_ALL,
208 		.nr_to_write = LONG_MAX,
209 		.for_reclaim = 0,
210 	};
211 	unsigned int seq_id = 0;
212 
213 	if (unlikely(f2fs_readonly(inode->i_sb) ||
214 				is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
215 		return 0;
216 
217 	trace_f2fs_sync_file_enter(inode);
218 
219 	/* if fdatasync is triggered, let's do in-place-update */
220 	if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
221 		set_inode_flag(inode, FI_NEED_IPU);
222 	ret = file_write_and_wait_range(file, start, end);
223 	clear_inode_flag(inode, FI_NEED_IPU);
224 
225 	if (ret) {
226 		trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
227 		return ret;
228 	}
229 
230 	/* if the inode is dirty, let's recover all the time */
231 	if (!f2fs_skip_inode_update(inode, datasync)) {
232 		f2fs_write_inode(inode, NULL);
233 		goto go_write;
234 	}
235 
236 	/*
237 	 * if there is no written data, don't waste time to write recovery info.
238 	 */
239 	if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
240 			!f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
241 
242 		/* it may call write_inode just prior to fsync */
243 		if (need_inode_page_update(sbi, ino))
244 			goto go_write;
245 
246 		if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
247 				f2fs_exist_written_data(sbi, ino, UPDATE_INO))
248 			goto flush_out;
249 		goto out;
250 	}
251 go_write:
252 	/*
253 	 * Both of fdatasync() and fsync() are able to be recovered from
254 	 * sudden-power-off.
255 	 */
256 	down_read(&F2FS_I(inode)->i_sem);
257 	cp_reason = need_do_checkpoint(inode);
258 	up_read(&F2FS_I(inode)->i_sem);
259 
260 	if (cp_reason) {
261 		/* all the dirty node pages should be flushed for POR */
262 		ret = f2fs_sync_fs(inode->i_sb, 1);
263 
264 		/*
265 		 * We've secured consistency through sync_fs. Following pino
266 		 * will be used only for fsynced inodes after checkpoint.
267 		 */
268 		try_to_fix_pino(inode);
269 		clear_inode_flag(inode, FI_APPEND_WRITE);
270 		clear_inode_flag(inode, FI_UPDATE_WRITE);
271 		goto out;
272 	}
273 sync_nodes:
274 	atomic_inc(&sbi->wb_sync_req[NODE]);
275 	ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
276 	atomic_dec(&sbi->wb_sync_req[NODE]);
277 	if (ret)
278 		goto out;
279 
280 	/* if cp_error was enabled, we should avoid infinite loop */
281 	if (unlikely(f2fs_cp_error(sbi))) {
282 		ret = -EIO;
283 		goto out;
284 	}
285 
286 	if (f2fs_need_inode_block_update(sbi, ino)) {
287 		f2fs_mark_inode_dirty_sync(inode, true);
288 		f2fs_write_inode(inode, NULL);
289 		goto sync_nodes;
290 	}
291 
292 	/*
293 	 * If it's atomic_write, it's just fine to keep write ordering. So
294 	 * here we don't need to wait for node write completion, since we use
295 	 * node chain which serializes node blocks. If one of node writes are
296 	 * reordered, we can see simply broken chain, resulting in stopping
297 	 * roll-forward recovery. It means we'll recover all or none node blocks
298 	 * given fsync mark.
299 	 */
300 	if (!atomic) {
301 		ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
302 		if (ret)
303 			goto out;
304 	}
305 
306 	/* once recovery info is written, don't need to tack this */
307 	f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
308 	clear_inode_flag(inode, FI_APPEND_WRITE);
309 flush_out:
310 	if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
311 		ret = f2fs_issue_flush(sbi, inode->i_ino);
312 	if (!ret) {
313 		f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
314 		clear_inode_flag(inode, FI_UPDATE_WRITE);
315 		f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
316 	}
317 	f2fs_update_time(sbi, REQ_TIME);
318 out:
319 	trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
320 	f2fs_trace_ios(NULL, 1);
321 	return ret;
322 }
323 
324 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
325 {
326 	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
327 		return -EIO;
328 	return f2fs_do_sync_file(file, start, end, datasync, false);
329 }
330 
331 static pgoff_t __get_first_dirty_index(struct address_space *mapping,
332 						pgoff_t pgofs, int whence)
333 {
334 	struct page *page;
335 	int nr_pages;
336 
337 	if (whence != SEEK_DATA)
338 		return 0;
339 
340 	/* find first dirty page index */
341 	nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY,
342 				      1, &page);
343 	if (!nr_pages)
344 		return ULONG_MAX;
345 	pgofs = page->index;
346 	put_page(page);
347 	return pgofs;
348 }
349 
350 static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
351 				pgoff_t dirty, pgoff_t pgofs, int whence)
352 {
353 	switch (whence) {
354 	case SEEK_DATA:
355 		if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
356 			is_valid_data_blkaddr(sbi, blkaddr))
357 			return true;
358 		break;
359 	case SEEK_HOLE:
360 		if (blkaddr == NULL_ADDR)
361 			return true;
362 		break;
363 	}
364 	return false;
365 }
366 
367 static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
368 {
369 	struct inode *inode = file->f_mapping->host;
370 	loff_t maxbytes = inode->i_sb->s_maxbytes;
371 	struct dnode_of_data dn;
372 	pgoff_t pgofs, end_offset, dirty;
373 	loff_t data_ofs = offset;
374 	loff_t isize;
375 	int err = 0;
376 
377 	inode_lock(inode);
378 
379 	isize = i_size_read(inode);
380 	if (offset >= isize)
381 		goto fail;
382 
383 	/* handle inline data case */
384 	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
385 		if (whence == SEEK_HOLE)
386 			data_ofs = isize;
387 		goto found;
388 	}
389 
390 	pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
391 
392 	dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
393 
394 	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
395 		set_new_dnode(&dn, inode, NULL, NULL, 0);
396 		err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
397 		if (err && err != -ENOENT) {
398 			goto fail;
399 		} else if (err == -ENOENT) {
400 			/* direct node does not exists */
401 			if (whence == SEEK_DATA) {
402 				pgofs = f2fs_get_next_page_offset(&dn, pgofs);
403 				continue;
404 			} else {
405 				goto found;
406 			}
407 		}
408 
409 		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
410 
411 		/* find data/hole in dnode block */
412 		for (; dn.ofs_in_node < end_offset;
413 				dn.ofs_in_node++, pgofs++,
414 				data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
415 			block_t blkaddr;
416 
417 			blkaddr = datablock_addr(dn.inode,
418 					dn.node_page, dn.ofs_in_node);
419 
420 			if (__is_valid_data_blkaddr(blkaddr) &&
421 				!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
422 						blkaddr, DATA_GENERIC)) {
423 				f2fs_put_dnode(&dn);
424 				goto fail;
425 			}
426 
427 			if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
428 							pgofs, whence)) {
429 				f2fs_put_dnode(&dn);
430 				goto found;
431 			}
432 		}
433 		f2fs_put_dnode(&dn);
434 	}
435 
436 	if (whence == SEEK_DATA)
437 		goto fail;
438 found:
439 	if (whence == SEEK_HOLE && data_ofs > isize)
440 		data_ofs = isize;
441 	inode_unlock(inode);
442 	return vfs_setpos(file, data_ofs, maxbytes);
443 fail:
444 	inode_unlock(inode);
445 	return -ENXIO;
446 }
447 
448 static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
449 {
450 	struct inode *inode = file->f_mapping->host;
451 	loff_t maxbytes = inode->i_sb->s_maxbytes;
452 
453 	switch (whence) {
454 	case SEEK_SET:
455 	case SEEK_CUR:
456 	case SEEK_END:
457 		return generic_file_llseek_size(file, offset, whence,
458 						maxbytes, i_size_read(inode));
459 	case SEEK_DATA:
460 	case SEEK_HOLE:
461 		if (offset < 0)
462 			return -ENXIO;
463 		return f2fs_seek_block(file, offset, whence);
464 	}
465 
466 	return -EINVAL;
467 }
468 
469 static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
470 {
471 	struct inode *inode = file_inode(file);
472 	int err;
473 
474 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
475 		return -EIO;
476 
477 	/* we don't need to use inline_data strictly */
478 	err = f2fs_convert_inline_inode(inode);
479 	if (err)
480 		return err;
481 
482 	file_accessed(file);
483 	vma->vm_ops = &f2fs_file_vm_ops;
484 	return 0;
485 }
486 
487 static int f2fs_file_open(struct inode *inode, struct file *filp)
488 {
489 	int err = fscrypt_file_open(inode, filp);
490 
491 	if (err)
492 		return err;
493 
494 	filp->f_mode |= FMODE_NOWAIT;
495 
496 	return dquot_file_open(inode, filp);
497 }
498 
499 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
500 {
501 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
502 	struct f2fs_node *raw_node;
503 	int nr_free = 0, ofs = dn->ofs_in_node, len = count;
504 	__le32 *addr;
505 	int base = 0;
506 
507 	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
508 		base = get_extra_isize(dn->inode);
509 
510 	raw_node = F2FS_NODE(dn->node_page);
511 	addr = blkaddr_in_node(raw_node) + base + ofs;
512 
513 	for (; count > 0; count--, addr++, dn->ofs_in_node++) {
514 		block_t blkaddr = le32_to_cpu(*addr);
515 
516 		if (blkaddr == NULL_ADDR)
517 			continue;
518 
519 		dn->data_blkaddr = NULL_ADDR;
520 		f2fs_set_data_blkaddr(dn);
521 
522 		if (__is_valid_data_blkaddr(blkaddr) &&
523 			!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
524 			continue;
525 
526 		f2fs_invalidate_blocks(sbi, blkaddr);
527 		if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
528 			clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
529 		nr_free++;
530 	}
531 
532 	if (nr_free) {
533 		pgoff_t fofs;
534 		/*
535 		 * once we invalidate valid blkaddr in range [ofs, ofs + count],
536 		 * we will invalidate all blkaddr in the whole range.
537 		 */
538 		fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
539 							dn->inode) + ofs;
540 		f2fs_update_extent_cache_range(dn, fofs, 0, len);
541 		dec_valid_block_count(sbi, dn->inode, nr_free);
542 	}
543 	dn->ofs_in_node = ofs;
544 
545 	f2fs_update_time(sbi, REQ_TIME);
546 	trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
547 					 dn->ofs_in_node, nr_free);
548 }
549 
550 void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
551 {
552 	f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
553 }
554 
555 static int truncate_partial_data_page(struct inode *inode, u64 from,
556 								bool cache_only)
557 {
558 	loff_t offset = from & (PAGE_SIZE - 1);
559 	pgoff_t index = from >> PAGE_SHIFT;
560 	struct address_space *mapping = inode->i_mapping;
561 	struct page *page;
562 
563 	if (!offset && !cache_only)
564 		return 0;
565 
566 	if (cache_only) {
567 		page = find_lock_page(mapping, index);
568 		if (page && PageUptodate(page))
569 			goto truncate_out;
570 		f2fs_put_page(page, 1);
571 		return 0;
572 	}
573 
574 	page = f2fs_get_lock_data_page(inode, index, true);
575 	if (IS_ERR(page))
576 		return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
577 truncate_out:
578 	f2fs_wait_on_page_writeback(page, DATA, true);
579 	zero_user(page, offset, PAGE_SIZE - offset);
580 
581 	/* An encrypted inode should have a key and truncate the last page. */
582 	f2fs_bug_on(F2FS_I_SB(inode), cache_only && f2fs_encrypted_inode(inode));
583 	if (!cache_only)
584 		set_page_dirty(page);
585 	f2fs_put_page(page, 1);
586 	return 0;
587 }
588 
589 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
590 {
591 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
592 	struct dnode_of_data dn;
593 	pgoff_t free_from;
594 	int count = 0, err = 0;
595 	struct page *ipage;
596 	bool truncate_page = false;
597 
598 	trace_f2fs_truncate_blocks_enter(inode, from);
599 
600 	free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
601 
602 	if (free_from >= sbi->max_file_blocks)
603 		goto free_partial;
604 
605 	if (lock)
606 		f2fs_lock_op(sbi);
607 
608 	ipage = f2fs_get_node_page(sbi, inode->i_ino);
609 	if (IS_ERR(ipage)) {
610 		err = PTR_ERR(ipage);
611 		goto out;
612 	}
613 
614 	if (f2fs_has_inline_data(inode)) {
615 		f2fs_truncate_inline_inode(inode, ipage, from);
616 		f2fs_put_page(ipage, 1);
617 		truncate_page = true;
618 		goto out;
619 	}
620 
621 	set_new_dnode(&dn, inode, ipage, NULL, 0);
622 	err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
623 	if (err) {
624 		if (err == -ENOENT)
625 			goto free_next;
626 		goto out;
627 	}
628 
629 	count = ADDRS_PER_PAGE(dn.node_page, inode);
630 
631 	count -= dn.ofs_in_node;
632 	f2fs_bug_on(sbi, count < 0);
633 
634 	if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
635 		f2fs_truncate_data_blocks_range(&dn, count);
636 		free_from += count;
637 	}
638 
639 	f2fs_put_dnode(&dn);
640 free_next:
641 	err = f2fs_truncate_inode_blocks(inode, free_from);
642 out:
643 	if (lock)
644 		f2fs_unlock_op(sbi);
645 free_partial:
646 	/* lastly zero out the first data page */
647 	if (!err)
648 		err = truncate_partial_data_page(inode, from, truncate_page);
649 
650 	trace_f2fs_truncate_blocks_exit(inode, err);
651 	return err;
652 }
653 
654 int f2fs_truncate(struct inode *inode)
655 {
656 	int err;
657 
658 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
659 		return -EIO;
660 
661 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
662 				S_ISLNK(inode->i_mode)))
663 		return 0;
664 
665 	trace_f2fs_truncate(inode);
666 
667 	if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
668 		f2fs_show_injection_info(FAULT_TRUNCATE);
669 		return -EIO;
670 	}
671 
672 	/* we should check inline_data size */
673 	if (!f2fs_may_inline_data(inode)) {
674 		err = f2fs_convert_inline_inode(inode);
675 		if (err)
676 			return err;
677 	}
678 
679 	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
680 	if (err)
681 		return err;
682 
683 	inode->i_mtime = inode->i_ctime = current_time(inode);
684 	f2fs_mark_inode_dirty_sync(inode, false);
685 	return 0;
686 }
687 
688 int f2fs_getattr(const struct path *path, struct kstat *stat,
689 		 u32 request_mask, unsigned int query_flags)
690 {
691 	struct inode *inode = d_inode(path->dentry);
692 	struct f2fs_inode_info *fi = F2FS_I(inode);
693 	struct f2fs_inode *ri;
694 	unsigned int flags;
695 
696 	if (f2fs_has_extra_attr(inode) &&
697 			f2fs_sb_has_inode_crtime(inode->i_sb) &&
698 			F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
699 		stat->result_mask |= STATX_BTIME;
700 		stat->btime.tv_sec = fi->i_crtime.tv_sec;
701 		stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
702 	}
703 
704 	flags = fi->i_flags & F2FS_FL_USER_VISIBLE;
705 	if (flags & F2FS_APPEND_FL)
706 		stat->attributes |= STATX_ATTR_APPEND;
707 	if (flags & F2FS_COMPR_FL)
708 		stat->attributes |= STATX_ATTR_COMPRESSED;
709 	if (f2fs_encrypted_inode(inode))
710 		stat->attributes |= STATX_ATTR_ENCRYPTED;
711 	if (flags & F2FS_IMMUTABLE_FL)
712 		stat->attributes |= STATX_ATTR_IMMUTABLE;
713 	if (flags & F2FS_NODUMP_FL)
714 		stat->attributes |= STATX_ATTR_NODUMP;
715 
716 	stat->attributes_mask |= (STATX_ATTR_APPEND |
717 				  STATX_ATTR_COMPRESSED |
718 				  STATX_ATTR_ENCRYPTED |
719 				  STATX_ATTR_IMMUTABLE |
720 				  STATX_ATTR_NODUMP);
721 
722 	generic_fillattr(inode, stat);
723 
724 	/* we need to show initial sectors used for inline_data/dentries */
725 	if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
726 					f2fs_has_inline_dentry(inode))
727 		stat->blocks += (stat->size + 511) >> 9;
728 
729 	return 0;
730 }
731 
732 #ifdef CONFIG_F2FS_FS_POSIX_ACL
733 static void __setattr_copy(struct inode *inode, const struct iattr *attr)
734 {
735 	unsigned int ia_valid = attr->ia_valid;
736 
737 	if (ia_valid & ATTR_UID)
738 		inode->i_uid = attr->ia_uid;
739 	if (ia_valid & ATTR_GID)
740 		inode->i_gid = attr->ia_gid;
741 	if (ia_valid & ATTR_ATIME)
742 		inode->i_atime = timespec64_trunc(attr->ia_atime,
743 						  inode->i_sb->s_time_gran);
744 	if (ia_valid & ATTR_MTIME)
745 		inode->i_mtime = timespec64_trunc(attr->ia_mtime,
746 						  inode->i_sb->s_time_gran);
747 	if (ia_valid & ATTR_CTIME)
748 		inode->i_ctime = timespec64_trunc(attr->ia_ctime,
749 						  inode->i_sb->s_time_gran);
750 	if (ia_valid & ATTR_MODE) {
751 		umode_t mode = attr->ia_mode;
752 
753 		if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
754 			mode &= ~S_ISGID;
755 		set_acl_inode(inode, mode);
756 	}
757 }
758 #else
759 #define __setattr_copy setattr_copy
760 #endif
761 
762 int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
763 {
764 	struct inode *inode = d_inode(dentry);
765 	int err;
766 	bool size_changed = false;
767 
768 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
769 		return -EIO;
770 
771 	err = setattr_prepare(dentry, attr);
772 	if (err)
773 		return err;
774 
775 	err = fscrypt_prepare_setattr(dentry, attr);
776 	if (err)
777 		return err;
778 
779 	if (is_quota_modification(inode, attr)) {
780 		err = dquot_initialize(inode);
781 		if (err)
782 			return err;
783 	}
784 	if ((attr->ia_valid & ATTR_UID &&
785 		!uid_eq(attr->ia_uid, inode->i_uid)) ||
786 		(attr->ia_valid & ATTR_GID &&
787 		!gid_eq(attr->ia_gid, inode->i_gid))) {
788 		err = dquot_transfer(inode, attr);
789 		if (err)
790 			return err;
791 	}
792 
793 	if (attr->ia_valid & ATTR_SIZE) {
794 		bool to_smaller = (attr->ia_size <= i_size_read(inode));
795 
796 		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
797 		down_write(&F2FS_I(inode)->i_mmap_sem);
798 
799 		truncate_setsize(inode, attr->ia_size);
800 
801 		if (to_smaller)
802 			err = f2fs_truncate(inode);
803 		/*
804 		 * do not trim all blocks after i_size if target size is
805 		 * larger than i_size.
806 		 */
807 		up_write(&F2FS_I(inode)->i_mmap_sem);
808 		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
809 
810 		if (err)
811 			return err;
812 
813 		if (!to_smaller) {
814 			/* should convert inline inode here */
815 			if (!f2fs_may_inline_data(inode)) {
816 				err = f2fs_convert_inline_inode(inode);
817 				if (err)
818 					return err;
819 			}
820 			inode->i_mtime = inode->i_ctime = current_time(inode);
821 		}
822 
823 		down_write(&F2FS_I(inode)->i_sem);
824 		F2FS_I(inode)->last_disk_size = i_size_read(inode);
825 		up_write(&F2FS_I(inode)->i_sem);
826 
827 		size_changed = true;
828 	}
829 
830 	__setattr_copy(inode, attr);
831 
832 	if (attr->ia_valid & ATTR_MODE) {
833 		err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode));
834 		if (err || is_inode_flag_set(inode, FI_ACL_MODE)) {
835 			inode->i_mode = F2FS_I(inode)->i_acl_mode;
836 			clear_inode_flag(inode, FI_ACL_MODE);
837 		}
838 	}
839 
840 	/* file size may changed here */
841 	f2fs_mark_inode_dirty_sync(inode, size_changed);
842 
843 	/* inode change will produce dirty node pages flushed by checkpoint */
844 	f2fs_balance_fs(F2FS_I_SB(inode), true);
845 
846 	return err;
847 }
848 
849 const struct inode_operations f2fs_file_inode_operations = {
850 	.getattr	= f2fs_getattr,
851 	.setattr	= f2fs_setattr,
852 	.get_acl	= f2fs_get_acl,
853 	.set_acl	= f2fs_set_acl,
854 #ifdef CONFIG_F2FS_FS_XATTR
855 	.listxattr	= f2fs_listxattr,
856 #endif
857 	.fiemap		= f2fs_fiemap,
858 };
859 
860 static int fill_zero(struct inode *inode, pgoff_t index,
861 					loff_t start, loff_t len)
862 {
863 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
864 	struct page *page;
865 
866 	if (!len)
867 		return 0;
868 
869 	f2fs_balance_fs(sbi, true);
870 
871 	f2fs_lock_op(sbi);
872 	page = f2fs_get_new_data_page(inode, NULL, index, false);
873 	f2fs_unlock_op(sbi);
874 
875 	if (IS_ERR(page))
876 		return PTR_ERR(page);
877 
878 	f2fs_wait_on_page_writeback(page, DATA, true);
879 	zero_user(page, start, len);
880 	set_page_dirty(page);
881 	f2fs_put_page(page, 1);
882 	return 0;
883 }
884 
885 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
886 {
887 	int err;
888 
889 	while (pg_start < pg_end) {
890 		struct dnode_of_data dn;
891 		pgoff_t end_offset, count;
892 
893 		set_new_dnode(&dn, inode, NULL, NULL, 0);
894 		err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
895 		if (err) {
896 			if (err == -ENOENT) {
897 				pg_start = f2fs_get_next_page_offset(&dn,
898 								pg_start);
899 				continue;
900 			}
901 			return err;
902 		}
903 
904 		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
905 		count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
906 
907 		f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
908 
909 		f2fs_truncate_data_blocks_range(&dn, count);
910 		f2fs_put_dnode(&dn);
911 
912 		pg_start += count;
913 	}
914 	return 0;
915 }
916 
917 static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
918 {
919 	pgoff_t pg_start, pg_end;
920 	loff_t off_start, off_end;
921 	int ret;
922 
923 	ret = f2fs_convert_inline_inode(inode);
924 	if (ret)
925 		return ret;
926 
927 	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
928 	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
929 
930 	off_start = offset & (PAGE_SIZE - 1);
931 	off_end = (offset + len) & (PAGE_SIZE - 1);
932 
933 	if (pg_start == pg_end) {
934 		ret = fill_zero(inode, pg_start, off_start,
935 						off_end - off_start);
936 		if (ret)
937 			return ret;
938 	} else {
939 		if (off_start) {
940 			ret = fill_zero(inode, pg_start++, off_start,
941 						PAGE_SIZE - off_start);
942 			if (ret)
943 				return ret;
944 		}
945 		if (off_end) {
946 			ret = fill_zero(inode, pg_end, 0, off_end);
947 			if (ret)
948 				return ret;
949 		}
950 
951 		if (pg_start < pg_end) {
952 			struct address_space *mapping = inode->i_mapping;
953 			loff_t blk_start, blk_end;
954 			struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
955 
956 			f2fs_balance_fs(sbi, true);
957 
958 			blk_start = (loff_t)pg_start << PAGE_SHIFT;
959 			blk_end = (loff_t)pg_end << PAGE_SHIFT;
960 
961 			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
962 			down_write(&F2FS_I(inode)->i_mmap_sem);
963 
964 			truncate_inode_pages_range(mapping, blk_start,
965 					blk_end - 1);
966 
967 			f2fs_lock_op(sbi);
968 			ret = f2fs_truncate_hole(inode, pg_start, pg_end);
969 			f2fs_unlock_op(sbi);
970 
971 			up_write(&F2FS_I(inode)->i_mmap_sem);
972 			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
973 		}
974 	}
975 
976 	return ret;
977 }
978 
979 static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
980 				int *do_replace, pgoff_t off, pgoff_t len)
981 {
982 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
983 	struct dnode_of_data dn;
984 	int ret, done, i;
985 
986 next_dnode:
987 	set_new_dnode(&dn, inode, NULL, NULL, 0);
988 	ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
989 	if (ret && ret != -ENOENT) {
990 		return ret;
991 	} else if (ret == -ENOENT) {
992 		if (dn.max_level == 0)
993 			return -ENOENT;
994 		done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len);
995 		blkaddr += done;
996 		do_replace += done;
997 		goto next;
998 	}
999 
1000 	done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
1001 							dn.ofs_in_node, len);
1002 	for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
1003 		*blkaddr = datablock_addr(dn.inode,
1004 					dn.node_page, dn.ofs_in_node);
1005 		if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
1006 
1007 			if (test_opt(sbi, LFS)) {
1008 				f2fs_put_dnode(&dn);
1009 				return -ENOTSUPP;
1010 			}
1011 
1012 			/* do not invalidate this block address */
1013 			f2fs_update_data_blkaddr(&dn, NULL_ADDR);
1014 			*do_replace = 1;
1015 		}
1016 	}
1017 	f2fs_put_dnode(&dn);
1018 next:
1019 	len -= done;
1020 	off += done;
1021 	if (len)
1022 		goto next_dnode;
1023 	return 0;
1024 }
1025 
1026 static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
1027 				int *do_replace, pgoff_t off, int len)
1028 {
1029 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1030 	struct dnode_of_data dn;
1031 	int ret, i;
1032 
1033 	for (i = 0; i < len; i++, do_replace++, blkaddr++) {
1034 		if (*do_replace == 0)
1035 			continue;
1036 
1037 		set_new_dnode(&dn, inode, NULL, NULL, 0);
1038 		ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
1039 		if (ret) {
1040 			dec_valid_block_count(sbi, inode, 1);
1041 			f2fs_invalidate_blocks(sbi, *blkaddr);
1042 		} else {
1043 			f2fs_update_data_blkaddr(&dn, *blkaddr);
1044 		}
1045 		f2fs_put_dnode(&dn);
1046 	}
1047 	return 0;
1048 }
1049 
1050 static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
1051 			block_t *blkaddr, int *do_replace,
1052 			pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
1053 {
1054 	struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
1055 	pgoff_t i = 0;
1056 	int ret;
1057 
1058 	while (i < len) {
1059 		if (blkaddr[i] == NULL_ADDR && !full) {
1060 			i++;
1061 			continue;
1062 		}
1063 
1064 		if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
1065 			struct dnode_of_data dn;
1066 			struct node_info ni;
1067 			size_t new_size;
1068 			pgoff_t ilen;
1069 
1070 			set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
1071 			ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
1072 			if (ret)
1073 				return ret;
1074 
1075 			ret = f2fs_get_node_info(sbi, dn.nid, &ni);
1076 			if (ret) {
1077 				f2fs_put_dnode(&dn);
1078 				return ret;
1079 			}
1080 
1081 			ilen = min((pgoff_t)
1082 				ADDRS_PER_PAGE(dn.node_page, dst_inode) -
1083 						dn.ofs_in_node, len - i);
1084 			do {
1085 				dn.data_blkaddr = datablock_addr(dn.inode,
1086 						dn.node_page, dn.ofs_in_node);
1087 				f2fs_truncate_data_blocks_range(&dn, 1);
1088 
1089 				if (do_replace[i]) {
1090 					f2fs_i_blocks_write(src_inode,
1091 							1, false, false);
1092 					f2fs_i_blocks_write(dst_inode,
1093 							1, true, false);
1094 					f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
1095 					blkaddr[i], ni.version, true, false);
1096 
1097 					do_replace[i] = 0;
1098 				}
1099 				dn.ofs_in_node++;
1100 				i++;
1101 				new_size = (dst + i) << PAGE_SHIFT;
1102 				if (dst_inode->i_size < new_size)
1103 					f2fs_i_size_write(dst_inode, new_size);
1104 			} while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
1105 
1106 			f2fs_put_dnode(&dn);
1107 		} else {
1108 			struct page *psrc, *pdst;
1109 
1110 			psrc = f2fs_get_lock_data_page(src_inode,
1111 							src + i, true);
1112 			if (IS_ERR(psrc))
1113 				return PTR_ERR(psrc);
1114 			pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i,
1115 								true);
1116 			if (IS_ERR(pdst)) {
1117 				f2fs_put_page(psrc, 1);
1118 				return PTR_ERR(pdst);
1119 			}
1120 			f2fs_copy_page(psrc, pdst);
1121 			set_page_dirty(pdst);
1122 			f2fs_put_page(pdst, 1);
1123 			f2fs_put_page(psrc, 1);
1124 
1125 			ret = f2fs_truncate_hole(src_inode,
1126 						src + i, src + i + 1);
1127 			if (ret)
1128 				return ret;
1129 			i++;
1130 		}
1131 	}
1132 	return 0;
1133 }
1134 
1135 static int __exchange_data_block(struct inode *src_inode,
1136 			struct inode *dst_inode, pgoff_t src, pgoff_t dst,
1137 			pgoff_t len, bool full)
1138 {
1139 	block_t *src_blkaddr;
1140 	int *do_replace;
1141 	pgoff_t olen;
1142 	int ret;
1143 
1144 	while (len) {
1145 		olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
1146 
1147 		src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1148 					array_size(olen, sizeof(block_t)),
1149 					GFP_KERNEL);
1150 		if (!src_blkaddr)
1151 			return -ENOMEM;
1152 
1153 		do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
1154 					array_size(olen, sizeof(int)),
1155 					GFP_KERNEL);
1156 		if (!do_replace) {
1157 			kvfree(src_blkaddr);
1158 			return -ENOMEM;
1159 		}
1160 
1161 		ret = __read_out_blkaddrs(src_inode, src_blkaddr,
1162 					do_replace, src, olen);
1163 		if (ret)
1164 			goto roll_back;
1165 
1166 		ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
1167 					do_replace, src, dst, olen, full);
1168 		if (ret)
1169 			goto roll_back;
1170 
1171 		src += olen;
1172 		dst += olen;
1173 		len -= olen;
1174 
1175 		kvfree(src_blkaddr);
1176 		kvfree(do_replace);
1177 	}
1178 	return 0;
1179 
1180 roll_back:
1181 	__roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
1182 	kvfree(src_blkaddr);
1183 	kvfree(do_replace);
1184 	return ret;
1185 }
1186 
1187 static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
1188 {
1189 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1190 	pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
1191 	pgoff_t start = offset >> PAGE_SHIFT;
1192 	pgoff_t end = (offset + len) >> PAGE_SHIFT;
1193 	int ret;
1194 
1195 	f2fs_balance_fs(sbi, true);
1196 
1197 	/* avoid gc operation during block exchange */
1198 	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1199 	down_write(&F2FS_I(inode)->i_mmap_sem);
1200 
1201 	f2fs_lock_op(sbi);
1202 	f2fs_drop_extent_tree(inode);
1203 	truncate_pagecache(inode, offset);
1204 	ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
1205 	f2fs_unlock_op(sbi);
1206 
1207 	up_write(&F2FS_I(inode)->i_mmap_sem);
1208 	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1209 	return ret;
1210 }
1211 
1212 static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
1213 {
1214 	loff_t new_size;
1215 	int ret;
1216 
1217 	if (offset + len >= i_size_read(inode))
1218 		return -EINVAL;
1219 
1220 	/* collapse range should be aligned to block size of f2fs. */
1221 	if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1222 		return -EINVAL;
1223 
1224 	ret = f2fs_convert_inline_inode(inode);
1225 	if (ret)
1226 		return ret;
1227 
1228 	/* write out all dirty pages from offset */
1229 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1230 	if (ret)
1231 		return ret;
1232 
1233 	ret = f2fs_do_collapse(inode, offset, len);
1234 	if (ret)
1235 		return ret;
1236 
1237 	/* write out all moved pages, if possible */
1238 	down_write(&F2FS_I(inode)->i_mmap_sem);
1239 	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1240 	truncate_pagecache(inode, offset);
1241 
1242 	new_size = i_size_read(inode) - len;
1243 	truncate_pagecache(inode, new_size);
1244 
1245 	ret = f2fs_truncate_blocks(inode, new_size, true);
1246 	up_write(&F2FS_I(inode)->i_mmap_sem);
1247 	if (!ret)
1248 		f2fs_i_size_write(inode, new_size);
1249 	return ret;
1250 }
1251 
1252 static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
1253 								pgoff_t end)
1254 {
1255 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1256 	pgoff_t index = start;
1257 	unsigned int ofs_in_node = dn->ofs_in_node;
1258 	blkcnt_t count = 0;
1259 	int ret;
1260 
1261 	for (; index < end; index++, dn->ofs_in_node++) {
1262 		if (datablock_addr(dn->inode, dn->node_page,
1263 					dn->ofs_in_node) == NULL_ADDR)
1264 			count++;
1265 	}
1266 
1267 	dn->ofs_in_node = ofs_in_node;
1268 	ret = f2fs_reserve_new_blocks(dn, count);
1269 	if (ret)
1270 		return ret;
1271 
1272 	dn->ofs_in_node = ofs_in_node;
1273 	for (index = start; index < end; index++, dn->ofs_in_node++) {
1274 		dn->data_blkaddr = datablock_addr(dn->inode,
1275 					dn->node_page, dn->ofs_in_node);
1276 		/*
1277 		 * f2fs_reserve_new_blocks will not guarantee entire block
1278 		 * allocation.
1279 		 */
1280 		if (dn->data_blkaddr == NULL_ADDR) {
1281 			ret = -ENOSPC;
1282 			break;
1283 		}
1284 		if (dn->data_blkaddr != NEW_ADDR) {
1285 			f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
1286 			dn->data_blkaddr = NEW_ADDR;
1287 			f2fs_set_data_blkaddr(dn);
1288 		}
1289 	}
1290 
1291 	f2fs_update_extent_cache_range(dn, start, 0, index - start);
1292 
1293 	return ret;
1294 }
1295 
1296 static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
1297 								int mode)
1298 {
1299 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1300 	struct address_space *mapping = inode->i_mapping;
1301 	pgoff_t index, pg_start, pg_end;
1302 	loff_t new_size = i_size_read(inode);
1303 	loff_t off_start, off_end;
1304 	int ret = 0;
1305 
1306 	ret = inode_newsize_ok(inode, (len + offset));
1307 	if (ret)
1308 		return ret;
1309 
1310 	ret = f2fs_convert_inline_inode(inode);
1311 	if (ret)
1312 		return ret;
1313 
1314 	ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
1315 	if (ret)
1316 		return ret;
1317 
1318 	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
1319 	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
1320 
1321 	off_start = offset & (PAGE_SIZE - 1);
1322 	off_end = (offset + len) & (PAGE_SIZE - 1);
1323 
1324 	if (pg_start == pg_end) {
1325 		ret = fill_zero(inode, pg_start, off_start,
1326 						off_end - off_start);
1327 		if (ret)
1328 			return ret;
1329 
1330 		new_size = max_t(loff_t, new_size, offset + len);
1331 	} else {
1332 		if (off_start) {
1333 			ret = fill_zero(inode, pg_start++, off_start,
1334 						PAGE_SIZE - off_start);
1335 			if (ret)
1336 				return ret;
1337 
1338 			new_size = max_t(loff_t, new_size,
1339 					(loff_t)pg_start << PAGE_SHIFT);
1340 		}
1341 
1342 		for (index = pg_start; index < pg_end;) {
1343 			struct dnode_of_data dn;
1344 			unsigned int end_offset;
1345 			pgoff_t end;
1346 
1347 			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1348 			down_write(&F2FS_I(inode)->i_mmap_sem);
1349 
1350 			truncate_pagecache_range(inode,
1351 				(loff_t)index << PAGE_SHIFT,
1352 				((loff_t)pg_end << PAGE_SHIFT) - 1);
1353 
1354 			f2fs_lock_op(sbi);
1355 
1356 			set_new_dnode(&dn, inode, NULL, NULL, 0);
1357 			ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
1358 			if (ret) {
1359 				f2fs_unlock_op(sbi);
1360 				up_write(&F2FS_I(inode)->i_mmap_sem);
1361 				up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1362 				goto out;
1363 			}
1364 
1365 			end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1366 			end = min(pg_end, end_offset - dn.ofs_in_node + index);
1367 
1368 			ret = f2fs_do_zero_range(&dn, index, end);
1369 			f2fs_put_dnode(&dn);
1370 
1371 			f2fs_unlock_op(sbi);
1372 			up_write(&F2FS_I(inode)->i_mmap_sem);
1373 			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1374 
1375 			f2fs_balance_fs(sbi, dn.node_changed);
1376 
1377 			if (ret)
1378 				goto out;
1379 
1380 			index = end;
1381 			new_size = max_t(loff_t, new_size,
1382 					(loff_t)index << PAGE_SHIFT);
1383 		}
1384 
1385 		if (off_end) {
1386 			ret = fill_zero(inode, pg_end, 0, off_end);
1387 			if (ret)
1388 				goto out;
1389 
1390 			new_size = max_t(loff_t, new_size, offset + len);
1391 		}
1392 	}
1393 
1394 out:
1395 	if (new_size > i_size_read(inode)) {
1396 		if (mode & FALLOC_FL_KEEP_SIZE)
1397 			file_set_keep_isize(inode);
1398 		else
1399 			f2fs_i_size_write(inode, new_size);
1400 	}
1401 	return ret;
1402 }
1403 
1404 static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
1405 {
1406 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1407 	pgoff_t nr, pg_start, pg_end, delta, idx;
1408 	loff_t new_size;
1409 	int ret = 0;
1410 
1411 	new_size = i_size_read(inode) + len;
1412 	ret = inode_newsize_ok(inode, new_size);
1413 	if (ret)
1414 		return ret;
1415 
1416 	if (offset >= i_size_read(inode))
1417 		return -EINVAL;
1418 
1419 	/* insert range should be aligned to block size of f2fs. */
1420 	if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
1421 		return -EINVAL;
1422 
1423 	ret = f2fs_convert_inline_inode(inode);
1424 	if (ret)
1425 		return ret;
1426 
1427 	f2fs_balance_fs(sbi, true);
1428 
1429 	down_write(&F2FS_I(inode)->i_mmap_sem);
1430 	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
1431 	up_write(&F2FS_I(inode)->i_mmap_sem);
1432 	if (ret)
1433 		return ret;
1434 
1435 	/* write out all dirty pages from offset */
1436 	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1437 	if (ret)
1438 		return ret;
1439 
1440 	pg_start = offset >> PAGE_SHIFT;
1441 	pg_end = (offset + len) >> PAGE_SHIFT;
1442 	delta = pg_end - pg_start;
1443 	idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
1444 
1445 	/* avoid gc operation during block exchange */
1446 	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1447 	down_write(&F2FS_I(inode)->i_mmap_sem);
1448 	truncate_pagecache(inode, offset);
1449 
1450 	while (!ret && idx > pg_start) {
1451 		nr = idx - pg_start;
1452 		if (nr > delta)
1453 			nr = delta;
1454 		idx -= nr;
1455 
1456 		f2fs_lock_op(sbi);
1457 		f2fs_drop_extent_tree(inode);
1458 
1459 		ret = __exchange_data_block(inode, inode, idx,
1460 					idx + delta, nr, false);
1461 		f2fs_unlock_op(sbi);
1462 	}
1463 	up_write(&F2FS_I(inode)->i_mmap_sem);
1464 	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1465 
1466 	/* write out all moved pages, if possible */
1467 	down_write(&F2FS_I(inode)->i_mmap_sem);
1468 	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
1469 	truncate_pagecache(inode, offset);
1470 	up_write(&F2FS_I(inode)->i_mmap_sem);
1471 
1472 	if (!ret)
1473 		f2fs_i_size_write(inode, new_size);
1474 	return ret;
1475 }
1476 
1477 static int expand_inode_data(struct inode *inode, loff_t offset,
1478 					loff_t len, int mode)
1479 {
1480 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1481 	struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
1482 			.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE };
1483 	pgoff_t pg_end;
1484 	loff_t new_size = i_size_read(inode);
1485 	loff_t off_end;
1486 	int err;
1487 
1488 	err = inode_newsize_ok(inode, (len + offset));
1489 	if (err)
1490 		return err;
1491 
1492 	err = f2fs_convert_inline_inode(inode);
1493 	if (err)
1494 		return err;
1495 
1496 	f2fs_balance_fs(sbi, true);
1497 
1498 	pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
1499 	off_end = (offset + len) & (PAGE_SIZE - 1);
1500 
1501 	map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT;
1502 	map.m_len = pg_end - map.m_lblk;
1503 	if (off_end)
1504 		map.m_len++;
1505 
1506 	err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
1507 	if (err) {
1508 		pgoff_t last_off;
1509 
1510 		if (!map.m_len)
1511 			return err;
1512 
1513 		last_off = map.m_lblk + map.m_len - 1;
1514 
1515 		/* update new size to the failed position */
1516 		new_size = (last_off == pg_end) ? offset + len :
1517 					(loff_t)(last_off + 1) << PAGE_SHIFT;
1518 	} else {
1519 		new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
1520 	}
1521 
1522 	if (new_size > i_size_read(inode)) {
1523 		if (mode & FALLOC_FL_KEEP_SIZE)
1524 			file_set_keep_isize(inode);
1525 		else
1526 			f2fs_i_size_write(inode, new_size);
1527 	}
1528 
1529 	return err;
1530 }
1531 
1532 static long f2fs_fallocate(struct file *file, int mode,
1533 				loff_t offset, loff_t len)
1534 {
1535 	struct inode *inode = file_inode(file);
1536 	long ret = 0;
1537 
1538 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
1539 		return -EIO;
1540 
1541 	/* f2fs only support ->fallocate for regular file */
1542 	if (!S_ISREG(inode->i_mode))
1543 		return -EINVAL;
1544 
1545 	if (f2fs_encrypted_inode(inode) &&
1546 		(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
1547 		return -EOPNOTSUPP;
1548 
1549 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
1550 			FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
1551 			FALLOC_FL_INSERT_RANGE))
1552 		return -EOPNOTSUPP;
1553 
1554 	inode_lock(inode);
1555 
1556 	if (mode & FALLOC_FL_PUNCH_HOLE) {
1557 		if (offset >= inode->i_size)
1558 			goto out;
1559 
1560 		ret = punch_hole(inode, offset, len);
1561 	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
1562 		ret = f2fs_collapse_range(inode, offset, len);
1563 	} else if (mode & FALLOC_FL_ZERO_RANGE) {
1564 		ret = f2fs_zero_range(inode, offset, len, mode);
1565 	} else if (mode & FALLOC_FL_INSERT_RANGE) {
1566 		ret = f2fs_insert_range(inode, offset, len);
1567 	} else {
1568 		ret = expand_inode_data(inode, offset, len, mode);
1569 	}
1570 
1571 	if (!ret) {
1572 		inode->i_mtime = inode->i_ctime = current_time(inode);
1573 		f2fs_mark_inode_dirty_sync(inode, false);
1574 		f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1575 	}
1576 
1577 out:
1578 	inode_unlock(inode);
1579 
1580 	trace_f2fs_fallocate(inode, mode, offset, len, ret);
1581 	return ret;
1582 }
1583 
1584 static int f2fs_release_file(struct inode *inode, struct file *filp)
1585 {
1586 	/*
1587 	 * f2fs_relase_file is called at every close calls. So we should
1588 	 * not drop any inmemory pages by close called by other process.
1589 	 */
1590 	if (!(filp->f_mode & FMODE_WRITE) ||
1591 			atomic_read(&inode->i_writecount) != 1)
1592 		return 0;
1593 
1594 	/* some remained atomic pages should discarded */
1595 	if (f2fs_is_atomic_file(inode))
1596 		f2fs_drop_inmem_pages(inode);
1597 	if (f2fs_is_volatile_file(inode)) {
1598 		set_inode_flag(inode, FI_DROP_CACHE);
1599 		filemap_fdatawrite(inode->i_mapping);
1600 		clear_inode_flag(inode, FI_DROP_CACHE);
1601 		clear_inode_flag(inode, FI_VOLATILE_FILE);
1602 		stat_dec_volatile_write(inode);
1603 	}
1604 	return 0;
1605 }
1606 
1607 static int f2fs_file_flush(struct file *file, fl_owner_t id)
1608 {
1609 	struct inode *inode = file_inode(file);
1610 
1611 	/*
1612 	 * If the process doing a transaction is crashed, we should do
1613 	 * roll-back. Otherwise, other reader/write can see corrupted database
1614 	 * until all the writers close its file. Since this should be done
1615 	 * before dropping file lock, it needs to do in ->flush.
1616 	 */
1617 	if (f2fs_is_atomic_file(inode) &&
1618 			F2FS_I(inode)->inmem_task == current)
1619 		f2fs_drop_inmem_pages(inode);
1620 	return 0;
1621 }
1622 
1623 static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
1624 {
1625 	struct inode *inode = file_inode(filp);
1626 	struct f2fs_inode_info *fi = F2FS_I(inode);
1627 	unsigned int flags = fi->i_flags;
1628 
1629 	if (f2fs_encrypted_inode(inode))
1630 		flags |= F2FS_ENCRYPT_FL;
1631 	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
1632 		flags |= F2FS_INLINE_DATA_FL;
1633 
1634 	flags &= F2FS_FL_USER_VISIBLE;
1635 
1636 	return put_user(flags, (int __user *)arg);
1637 }
1638 
1639 static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags)
1640 {
1641 	struct f2fs_inode_info *fi = F2FS_I(inode);
1642 	unsigned int oldflags;
1643 
1644 	/* Is it quota file? Do not allow user to mess with it */
1645 	if (IS_NOQUOTA(inode))
1646 		return -EPERM;
1647 
1648 	flags = f2fs_mask_flags(inode->i_mode, flags);
1649 
1650 	oldflags = fi->i_flags;
1651 
1652 	if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL))
1653 		if (!capable(CAP_LINUX_IMMUTABLE))
1654 			return -EPERM;
1655 
1656 	flags = flags & F2FS_FL_USER_MODIFIABLE;
1657 	flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE;
1658 	fi->i_flags = flags;
1659 
1660 	if (fi->i_flags & F2FS_PROJINHERIT_FL)
1661 		set_inode_flag(inode, FI_PROJ_INHERIT);
1662 	else
1663 		clear_inode_flag(inode, FI_PROJ_INHERIT);
1664 
1665 	inode->i_ctime = current_time(inode);
1666 	f2fs_set_inode_flags(inode);
1667 	f2fs_mark_inode_dirty_sync(inode, false);
1668 	return 0;
1669 }
1670 
1671 static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
1672 {
1673 	struct inode *inode = file_inode(filp);
1674 	unsigned int flags;
1675 	int ret;
1676 
1677 	if (!inode_owner_or_capable(inode))
1678 		return -EACCES;
1679 
1680 	if (get_user(flags, (int __user *)arg))
1681 		return -EFAULT;
1682 
1683 	ret = mnt_want_write_file(filp);
1684 	if (ret)
1685 		return ret;
1686 
1687 	inode_lock(inode);
1688 
1689 	ret = __f2fs_ioc_setflags(inode, flags);
1690 
1691 	inode_unlock(inode);
1692 	mnt_drop_write_file(filp);
1693 	return ret;
1694 }
1695 
1696 static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
1697 {
1698 	struct inode *inode = file_inode(filp);
1699 
1700 	return put_user(inode->i_generation, (int __user *)arg);
1701 }
1702 
1703 static int f2fs_ioc_start_atomic_write(struct file *filp)
1704 {
1705 	struct inode *inode = file_inode(filp);
1706 	int ret;
1707 
1708 	if (!inode_owner_or_capable(inode))
1709 		return -EACCES;
1710 
1711 	if (!S_ISREG(inode->i_mode))
1712 		return -EINVAL;
1713 
1714 	ret = mnt_want_write_file(filp);
1715 	if (ret)
1716 		return ret;
1717 
1718 	inode_lock(inode);
1719 
1720 	if (f2fs_is_atomic_file(inode)) {
1721 		if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
1722 			ret = -EINVAL;
1723 		goto out;
1724 	}
1725 
1726 	ret = f2fs_convert_inline_inode(inode);
1727 	if (ret)
1728 		goto out;
1729 
1730 	down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1731 
1732 	if (!get_dirty_pages(inode))
1733 		goto skip_flush;
1734 
1735 	f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
1736 		"Unexpected flush for atomic writes: ino=%lu, npages=%u",
1737 					inode->i_ino, get_dirty_pages(inode));
1738 	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
1739 	if (ret) {
1740 		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1741 		goto out;
1742 	}
1743 skip_flush:
1744 	set_inode_flag(inode, FI_ATOMIC_FILE);
1745 	clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
1746 	up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
1747 
1748 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1749 	F2FS_I(inode)->inmem_task = current;
1750 	stat_inc_atomic_write(inode);
1751 	stat_update_max_atomic_write(inode);
1752 out:
1753 	inode_unlock(inode);
1754 	mnt_drop_write_file(filp);
1755 	return ret;
1756 }
1757 
1758 static int f2fs_ioc_commit_atomic_write(struct file *filp)
1759 {
1760 	struct inode *inode = file_inode(filp);
1761 	int ret;
1762 
1763 	if (!inode_owner_or_capable(inode))
1764 		return -EACCES;
1765 
1766 	ret = mnt_want_write_file(filp);
1767 	if (ret)
1768 		return ret;
1769 
1770 	f2fs_balance_fs(F2FS_I_SB(inode), true);
1771 
1772 	inode_lock(inode);
1773 
1774 	if (f2fs_is_volatile_file(inode)) {
1775 		ret = -EINVAL;
1776 		goto err_out;
1777 	}
1778 
1779 	if (f2fs_is_atomic_file(inode)) {
1780 		ret = f2fs_commit_inmem_pages(inode);
1781 		if (ret)
1782 			goto err_out;
1783 
1784 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1785 		if (!ret) {
1786 			clear_inode_flag(inode, FI_ATOMIC_FILE);
1787 			F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
1788 			stat_dec_atomic_write(inode);
1789 		}
1790 	} else {
1791 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
1792 	}
1793 err_out:
1794 	if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
1795 		clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
1796 		ret = -EINVAL;
1797 	}
1798 	inode_unlock(inode);
1799 	mnt_drop_write_file(filp);
1800 	return ret;
1801 }
1802 
1803 static int f2fs_ioc_start_volatile_write(struct file *filp)
1804 {
1805 	struct inode *inode = file_inode(filp);
1806 	int ret;
1807 
1808 	if (!inode_owner_or_capable(inode))
1809 		return -EACCES;
1810 
1811 	if (!S_ISREG(inode->i_mode))
1812 		return -EINVAL;
1813 
1814 	ret = mnt_want_write_file(filp);
1815 	if (ret)
1816 		return ret;
1817 
1818 	inode_lock(inode);
1819 
1820 	if (f2fs_is_volatile_file(inode))
1821 		goto out;
1822 
1823 	ret = f2fs_convert_inline_inode(inode);
1824 	if (ret)
1825 		goto out;
1826 
1827 	stat_inc_volatile_write(inode);
1828 	stat_update_max_volatile_write(inode);
1829 
1830 	set_inode_flag(inode, FI_VOLATILE_FILE);
1831 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1832 out:
1833 	inode_unlock(inode);
1834 	mnt_drop_write_file(filp);
1835 	return ret;
1836 }
1837 
1838 static int f2fs_ioc_release_volatile_write(struct file *filp)
1839 {
1840 	struct inode *inode = file_inode(filp);
1841 	int ret;
1842 
1843 	if (!inode_owner_or_capable(inode))
1844 		return -EACCES;
1845 
1846 	ret = mnt_want_write_file(filp);
1847 	if (ret)
1848 		return ret;
1849 
1850 	inode_lock(inode);
1851 
1852 	if (!f2fs_is_volatile_file(inode))
1853 		goto out;
1854 
1855 	if (!f2fs_is_first_block_written(inode)) {
1856 		ret = truncate_partial_data_page(inode, 0, true);
1857 		goto out;
1858 	}
1859 
1860 	ret = punch_hole(inode, 0, F2FS_BLKSIZE);
1861 out:
1862 	inode_unlock(inode);
1863 	mnt_drop_write_file(filp);
1864 	return ret;
1865 }
1866 
1867 static int f2fs_ioc_abort_volatile_write(struct file *filp)
1868 {
1869 	struct inode *inode = file_inode(filp);
1870 	int ret;
1871 
1872 	if (!inode_owner_or_capable(inode))
1873 		return -EACCES;
1874 
1875 	ret = mnt_want_write_file(filp);
1876 	if (ret)
1877 		return ret;
1878 
1879 	inode_lock(inode);
1880 
1881 	if (f2fs_is_atomic_file(inode))
1882 		f2fs_drop_inmem_pages(inode);
1883 	if (f2fs_is_volatile_file(inode)) {
1884 		clear_inode_flag(inode, FI_VOLATILE_FILE);
1885 		stat_dec_volatile_write(inode);
1886 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
1887 	}
1888 
1889 	clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
1890 
1891 	inode_unlock(inode);
1892 
1893 	mnt_drop_write_file(filp);
1894 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1895 	return ret;
1896 }
1897 
1898 static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
1899 {
1900 	struct inode *inode = file_inode(filp);
1901 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1902 	struct super_block *sb = sbi->sb;
1903 	__u32 in;
1904 	int ret = 0;
1905 
1906 	if (!capable(CAP_SYS_ADMIN))
1907 		return -EPERM;
1908 
1909 	if (get_user(in, (__u32 __user *)arg))
1910 		return -EFAULT;
1911 
1912 	if (in != F2FS_GOING_DOWN_FULLSYNC) {
1913 		ret = mnt_want_write_file(filp);
1914 		if (ret)
1915 			return ret;
1916 	}
1917 
1918 	switch (in) {
1919 	case F2FS_GOING_DOWN_FULLSYNC:
1920 		sb = freeze_bdev(sb->s_bdev);
1921 		if (IS_ERR(sb)) {
1922 			ret = PTR_ERR(sb);
1923 			goto out;
1924 		}
1925 		if (sb) {
1926 			f2fs_stop_checkpoint(sbi, false);
1927 			set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
1928 			thaw_bdev(sb->s_bdev, sb);
1929 		}
1930 		break;
1931 	case F2FS_GOING_DOWN_METASYNC:
1932 		/* do checkpoint only */
1933 		ret = f2fs_sync_fs(sb, 1);
1934 		if (ret)
1935 			goto out;
1936 		f2fs_stop_checkpoint(sbi, false);
1937 		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
1938 		break;
1939 	case F2FS_GOING_DOWN_NOSYNC:
1940 		f2fs_stop_checkpoint(sbi, false);
1941 		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
1942 		break;
1943 	case F2FS_GOING_DOWN_METAFLUSH:
1944 		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
1945 		f2fs_stop_checkpoint(sbi, false);
1946 		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
1947 		break;
1948 	default:
1949 		ret = -EINVAL;
1950 		goto out;
1951 	}
1952 
1953 	f2fs_stop_gc_thread(sbi);
1954 	f2fs_stop_discard_thread(sbi);
1955 
1956 	f2fs_drop_discard_cmd(sbi);
1957 	clear_opt(sbi, DISCARD);
1958 
1959 	f2fs_update_time(sbi, REQ_TIME);
1960 out:
1961 	if (in != F2FS_GOING_DOWN_FULLSYNC)
1962 		mnt_drop_write_file(filp);
1963 	return ret;
1964 }
1965 
1966 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
1967 {
1968 	struct inode *inode = file_inode(filp);
1969 	struct super_block *sb = inode->i_sb;
1970 	struct request_queue *q = bdev_get_queue(sb->s_bdev);
1971 	struct fstrim_range range;
1972 	int ret;
1973 
1974 	if (!capable(CAP_SYS_ADMIN))
1975 		return -EPERM;
1976 
1977 	if (!f2fs_hw_support_discard(F2FS_SB(sb)))
1978 		return -EOPNOTSUPP;
1979 
1980 	if (copy_from_user(&range, (struct fstrim_range __user *)arg,
1981 				sizeof(range)))
1982 		return -EFAULT;
1983 
1984 	ret = mnt_want_write_file(filp);
1985 	if (ret)
1986 		return ret;
1987 
1988 	range.minlen = max((unsigned int)range.minlen,
1989 				q->limits.discard_granularity);
1990 	ret = f2fs_trim_fs(F2FS_SB(sb), &range);
1991 	mnt_drop_write_file(filp);
1992 	if (ret < 0)
1993 		return ret;
1994 
1995 	if (copy_to_user((struct fstrim_range __user *)arg, &range,
1996 				sizeof(range)))
1997 		return -EFAULT;
1998 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1999 	return 0;
2000 }
2001 
2002 static bool uuid_is_nonzero(__u8 u[16])
2003 {
2004 	int i;
2005 
2006 	for (i = 0; i < 16; i++)
2007 		if (u[i])
2008 			return true;
2009 	return false;
2010 }
2011 
2012 static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
2013 {
2014 	struct inode *inode = file_inode(filp);
2015 
2016 	if (!f2fs_sb_has_encrypt(inode->i_sb))
2017 		return -EOPNOTSUPP;
2018 
2019 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2020 
2021 	return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
2022 }
2023 
2024 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
2025 {
2026 	if (!f2fs_sb_has_encrypt(file_inode(filp)->i_sb))
2027 		return -EOPNOTSUPP;
2028 	return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
2029 }
2030 
2031 static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
2032 {
2033 	struct inode *inode = file_inode(filp);
2034 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2035 	int err;
2036 
2037 	if (!f2fs_sb_has_encrypt(inode->i_sb))
2038 		return -EOPNOTSUPP;
2039 
2040 	err = mnt_want_write_file(filp);
2041 	if (err)
2042 		return err;
2043 
2044 	down_write(&sbi->sb_lock);
2045 
2046 	if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
2047 		goto got_it;
2048 
2049 	/* update superblock with uuid */
2050 	generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
2051 
2052 	err = f2fs_commit_super(sbi, false);
2053 	if (err) {
2054 		/* undo new data */
2055 		memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
2056 		goto out_err;
2057 	}
2058 got_it:
2059 	if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
2060 									16))
2061 		err = -EFAULT;
2062 out_err:
2063 	up_write(&sbi->sb_lock);
2064 	mnt_drop_write_file(filp);
2065 	return err;
2066 }
2067 
2068 static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
2069 {
2070 	struct inode *inode = file_inode(filp);
2071 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2072 	__u32 sync;
2073 	int ret;
2074 
2075 	if (!capable(CAP_SYS_ADMIN))
2076 		return -EPERM;
2077 
2078 	if (get_user(sync, (__u32 __user *)arg))
2079 		return -EFAULT;
2080 
2081 	if (f2fs_readonly(sbi->sb))
2082 		return -EROFS;
2083 
2084 	ret = mnt_want_write_file(filp);
2085 	if (ret)
2086 		return ret;
2087 
2088 	if (!sync) {
2089 		if (!mutex_trylock(&sbi->gc_mutex)) {
2090 			ret = -EBUSY;
2091 			goto out;
2092 		}
2093 	} else {
2094 		mutex_lock(&sbi->gc_mutex);
2095 	}
2096 
2097 	ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
2098 out:
2099 	mnt_drop_write_file(filp);
2100 	return ret;
2101 }
2102 
2103 static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
2104 {
2105 	struct inode *inode = file_inode(filp);
2106 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2107 	struct f2fs_gc_range range;
2108 	u64 end;
2109 	int ret;
2110 
2111 	if (!capable(CAP_SYS_ADMIN))
2112 		return -EPERM;
2113 
2114 	if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
2115 							sizeof(range)))
2116 		return -EFAULT;
2117 
2118 	if (f2fs_readonly(sbi->sb))
2119 		return -EROFS;
2120 
2121 	end = range.start + range.len;
2122 	if (range.start < MAIN_BLKADDR(sbi) || end >= MAX_BLKADDR(sbi)) {
2123 		return -EINVAL;
2124 	}
2125 
2126 	ret = mnt_want_write_file(filp);
2127 	if (ret)
2128 		return ret;
2129 
2130 do_more:
2131 	if (!range.sync) {
2132 		if (!mutex_trylock(&sbi->gc_mutex)) {
2133 			ret = -EBUSY;
2134 			goto out;
2135 		}
2136 	} else {
2137 		mutex_lock(&sbi->gc_mutex);
2138 	}
2139 
2140 	ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
2141 	range.start += sbi->blocks_per_seg;
2142 	if (range.start <= end)
2143 		goto do_more;
2144 out:
2145 	mnt_drop_write_file(filp);
2146 	return ret;
2147 }
2148 
2149 static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
2150 {
2151 	struct inode *inode = file_inode(filp);
2152 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2153 	int ret;
2154 
2155 	if (!capable(CAP_SYS_ADMIN))
2156 		return -EPERM;
2157 
2158 	if (f2fs_readonly(sbi->sb))
2159 		return -EROFS;
2160 
2161 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2162 		f2fs_msg(sbi->sb, KERN_INFO,
2163 			"Skipping Checkpoint. Checkpoints currently disabled.");
2164 		return -EINVAL;
2165 	}
2166 
2167 	ret = mnt_want_write_file(filp);
2168 	if (ret)
2169 		return ret;
2170 
2171 	ret = f2fs_sync_fs(sbi->sb, 1);
2172 
2173 	mnt_drop_write_file(filp);
2174 	return ret;
2175 }
2176 
2177 static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
2178 					struct file *filp,
2179 					struct f2fs_defragment *range)
2180 {
2181 	struct inode *inode = file_inode(filp);
2182 	struct f2fs_map_blocks map = { .m_next_extent = NULL,
2183 					.m_seg_type = NO_CHECK_TYPE };
2184 	struct extent_info ei = {0, 0, 0};
2185 	pgoff_t pg_start, pg_end, next_pgofs;
2186 	unsigned int blk_per_seg = sbi->blocks_per_seg;
2187 	unsigned int total = 0, sec_num;
2188 	block_t blk_end = 0;
2189 	bool fragmented = false;
2190 	int err;
2191 
2192 	/* if in-place-update policy is enabled, don't waste time here */
2193 	if (f2fs_should_update_inplace(inode, NULL))
2194 		return -EINVAL;
2195 
2196 	pg_start = range->start >> PAGE_SHIFT;
2197 	pg_end = (range->start + range->len) >> PAGE_SHIFT;
2198 
2199 	f2fs_balance_fs(sbi, true);
2200 
2201 	inode_lock(inode);
2202 
2203 	/* writeback all dirty pages in the range */
2204 	err = filemap_write_and_wait_range(inode->i_mapping, range->start,
2205 						range->start + range->len - 1);
2206 	if (err)
2207 		goto out;
2208 
2209 	/*
2210 	 * lookup mapping info in extent cache, skip defragmenting if physical
2211 	 * block addresses are continuous.
2212 	 */
2213 	if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
2214 		if (ei.fofs + ei.len >= pg_end)
2215 			goto out;
2216 	}
2217 
2218 	map.m_lblk = pg_start;
2219 	map.m_next_pgofs = &next_pgofs;
2220 
2221 	/*
2222 	 * lookup mapping info in dnode page cache, skip defragmenting if all
2223 	 * physical block addresses are continuous even if there are hole(s)
2224 	 * in logical blocks.
2225 	 */
2226 	while (map.m_lblk < pg_end) {
2227 		map.m_len = pg_end - map.m_lblk;
2228 		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
2229 		if (err)
2230 			goto out;
2231 
2232 		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2233 			map.m_lblk = next_pgofs;
2234 			continue;
2235 		}
2236 
2237 		if (blk_end && blk_end != map.m_pblk)
2238 			fragmented = true;
2239 
2240 		/* record total count of block that we're going to move */
2241 		total += map.m_len;
2242 
2243 		blk_end = map.m_pblk + map.m_len;
2244 
2245 		map.m_lblk += map.m_len;
2246 	}
2247 
2248 	if (!fragmented)
2249 		goto out;
2250 
2251 	sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
2252 
2253 	/*
2254 	 * make sure there are enough free section for LFS allocation, this can
2255 	 * avoid defragment running in SSR mode when free section are allocated
2256 	 * intensively
2257 	 */
2258 	if (has_not_enough_free_secs(sbi, 0, sec_num)) {
2259 		err = -EAGAIN;
2260 		goto out;
2261 	}
2262 
2263 	map.m_lblk = pg_start;
2264 	map.m_len = pg_end - pg_start;
2265 	total = 0;
2266 
2267 	while (map.m_lblk < pg_end) {
2268 		pgoff_t idx;
2269 		int cnt = 0;
2270 
2271 do_map:
2272 		map.m_len = pg_end - map.m_lblk;
2273 		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
2274 		if (err)
2275 			goto clear_out;
2276 
2277 		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
2278 			map.m_lblk = next_pgofs;
2279 			continue;
2280 		}
2281 
2282 		set_inode_flag(inode, FI_DO_DEFRAG);
2283 
2284 		idx = map.m_lblk;
2285 		while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
2286 			struct page *page;
2287 
2288 			page = f2fs_get_lock_data_page(inode, idx, true);
2289 			if (IS_ERR(page)) {
2290 				err = PTR_ERR(page);
2291 				goto clear_out;
2292 			}
2293 
2294 			set_page_dirty(page);
2295 			f2fs_put_page(page, 1);
2296 
2297 			idx++;
2298 			cnt++;
2299 			total++;
2300 		}
2301 
2302 		map.m_lblk = idx;
2303 
2304 		if (idx < pg_end && cnt < blk_per_seg)
2305 			goto do_map;
2306 
2307 		clear_inode_flag(inode, FI_DO_DEFRAG);
2308 
2309 		err = filemap_fdatawrite(inode->i_mapping);
2310 		if (err)
2311 			goto out;
2312 	}
2313 clear_out:
2314 	clear_inode_flag(inode, FI_DO_DEFRAG);
2315 out:
2316 	inode_unlock(inode);
2317 	if (!err)
2318 		range->len = (u64)total << PAGE_SHIFT;
2319 	return err;
2320 }
2321 
2322 static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
2323 {
2324 	struct inode *inode = file_inode(filp);
2325 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2326 	struct f2fs_defragment range;
2327 	int err;
2328 
2329 	if (!capable(CAP_SYS_ADMIN))
2330 		return -EPERM;
2331 
2332 	if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode))
2333 		return -EINVAL;
2334 
2335 	if (f2fs_readonly(sbi->sb))
2336 		return -EROFS;
2337 
2338 	if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
2339 							sizeof(range)))
2340 		return -EFAULT;
2341 
2342 	/* verify alignment of offset & size */
2343 	if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
2344 		return -EINVAL;
2345 
2346 	if (unlikely((range.start + range.len) >> PAGE_SHIFT >
2347 					sbi->max_file_blocks))
2348 		return -EINVAL;
2349 
2350 	err = mnt_want_write_file(filp);
2351 	if (err)
2352 		return err;
2353 
2354 	err = f2fs_defragment_range(sbi, filp, &range);
2355 	mnt_drop_write_file(filp);
2356 
2357 	f2fs_update_time(sbi, REQ_TIME);
2358 	if (err < 0)
2359 		return err;
2360 
2361 	if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
2362 							sizeof(range)))
2363 		return -EFAULT;
2364 
2365 	return 0;
2366 }
2367 
2368 static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
2369 			struct file *file_out, loff_t pos_out, size_t len)
2370 {
2371 	struct inode *src = file_inode(file_in);
2372 	struct inode *dst = file_inode(file_out);
2373 	struct f2fs_sb_info *sbi = F2FS_I_SB(src);
2374 	size_t olen = len, dst_max_i_size = 0;
2375 	size_t dst_osize;
2376 	int ret;
2377 
2378 	if (file_in->f_path.mnt != file_out->f_path.mnt ||
2379 				src->i_sb != dst->i_sb)
2380 		return -EXDEV;
2381 
2382 	if (unlikely(f2fs_readonly(src->i_sb)))
2383 		return -EROFS;
2384 
2385 	if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
2386 		return -EINVAL;
2387 
2388 	if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
2389 		return -EOPNOTSUPP;
2390 
2391 	if (src == dst) {
2392 		if (pos_in == pos_out)
2393 			return 0;
2394 		if (pos_out > pos_in && pos_out < pos_in + len)
2395 			return -EINVAL;
2396 	}
2397 
2398 	inode_lock(src);
2399 	if (src != dst) {
2400 		ret = -EBUSY;
2401 		if (!inode_trylock(dst))
2402 			goto out;
2403 	}
2404 
2405 	ret = -EINVAL;
2406 	if (pos_in + len > src->i_size || pos_in + len < pos_in)
2407 		goto out_unlock;
2408 	if (len == 0)
2409 		olen = len = src->i_size - pos_in;
2410 	if (pos_in + len == src->i_size)
2411 		len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
2412 	if (len == 0) {
2413 		ret = 0;
2414 		goto out_unlock;
2415 	}
2416 
2417 	dst_osize = dst->i_size;
2418 	if (pos_out + olen > dst->i_size)
2419 		dst_max_i_size = pos_out + olen;
2420 
2421 	/* verify the end result is block aligned */
2422 	if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
2423 			!IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
2424 			!IS_ALIGNED(pos_out, F2FS_BLKSIZE))
2425 		goto out_unlock;
2426 
2427 	ret = f2fs_convert_inline_inode(src);
2428 	if (ret)
2429 		goto out_unlock;
2430 
2431 	ret = f2fs_convert_inline_inode(dst);
2432 	if (ret)
2433 		goto out_unlock;
2434 
2435 	/* write out all dirty pages from offset */
2436 	ret = filemap_write_and_wait_range(src->i_mapping,
2437 					pos_in, pos_in + len);
2438 	if (ret)
2439 		goto out_unlock;
2440 
2441 	ret = filemap_write_and_wait_range(dst->i_mapping,
2442 					pos_out, pos_out + len);
2443 	if (ret)
2444 		goto out_unlock;
2445 
2446 	f2fs_balance_fs(sbi, true);
2447 
2448 	down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
2449 	if (src != dst) {
2450 		ret = -EBUSY;
2451 		if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
2452 			goto out_src;
2453 	}
2454 
2455 	f2fs_lock_op(sbi);
2456 	ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
2457 				pos_out >> F2FS_BLKSIZE_BITS,
2458 				len >> F2FS_BLKSIZE_BITS, false);
2459 
2460 	if (!ret) {
2461 		if (dst_max_i_size)
2462 			f2fs_i_size_write(dst, dst_max_i_size);
2463 		else if (dst_osize != dst->i_size)
2464 			f2fs_i_size_write(dst, dst_osize);
2465 	}
2466 	f2fs_unlock_op(sbi);
2467 
2468 	if (src != dst)
2469 		up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
2470 out_src:
2471 	up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
2472 out_unlock:
2473 	if (src != dst)
2474 		inode_unlock(dst);
2475 out:
2476 	inode_unlock(src);
2477 	return ret;
2478 }
2479 
2480 static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
2481 {
2482 	struct f2fs_move_range range;
2483 	struct fd dst;
2484 	int err;
2485 
2486 	if (!(filp->f_mode & FMODE_READ) ||
2487 			!(filp->f_mode & FMODE_WRITE))
2488 		return -EBADF;
2489 
2490 	if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
2491 							sizeof(range)))
2492 		return -EFAULT;
2493 
2494 	dst = fdget(range.dst_fd);
2495 	if (!dst.file)
2496 		return -EBADF;
2497 
2498 	if (!(dst.file->f_mode & FMODE_WRITE)) {
2499 		err = -EBADF;
2500 		goto err_out;
2501 	}
2502 
2503 	err = mnt_want_write_file(filp);
2504 	if (err)
2505 		goto err_out;
2506 
2507 	err = f2fs_move_file_range(filp, range.pos_in, dst.file,
2508 					range.pos_out, range.len);
2509 
2510 	mnt_drop_write_file(filp);
2511 	if (err)
2512 		goto err_out;
2513 
2514 	if (copy_to_user((struct f2fs_move_range __user *)arg,
2515 						&range, sizeof(range)))
2516 		err = -EFAULT;
2517 err_out:
2518 	fdput(dst);
2519 	return err;
2520 }
2521 
2522 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
2523 {
2524 	struct inode *inode = file_inode(filp);
2525 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2526 	struct sit_info *sm = SIT_I(sbi);
2527 	unsigned int start_segno = 0, end_segno = 0;
2528 	unsigned int dev_start_segno = 0, dev_end_segno = 0;
2529 	struct f2fs_flush_device range;
2530 	int ret;
2531 
2532 	if (!capable(CAP_SYS_ADMIN))
2533 		return -EPERM;
2534 
2535 	if (f2fs_readonly(sbi->sb))
2536 		return -EROFS;
2537 
2538 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2539 		return -EINVAL;
2540 
2541 	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
2542 							sizeof(range)))
2543 		return -EFAULT;
2544 
2545 	if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
2546 			sbi->segs_per_sec != 1) {
2547 		f2fs_msg(sbi->sb, KERN_WARNING,
2548 			"Can't flush %u in %d for segs_per_sec %u != 1\n",
2549 				range.dev_num, sbi->s_ndevs,
2550 				sbi->segs_per_sec);
2551 		return -EINVAL;
2552 	}
2553 
2554 	ret = mnt_want_write_file(filp);
2555 	if (ret)
2556 		return ret;
2557 
2558 	if (range.dev_num != 0)
2559 		dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
2560 	dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
2561 
2562 	start_segno = sm->last_victim[FLUSH_DEVICE];
2563 	if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
2564 		start_segno = dev_start_segno;
2565 	end_segno = min(start_segno + range.segments, dev_end_segno);
2566 
2567 	while (start_segno < end_segno) {
2568 		if (!mutex_trylock(&sbi->gc_mutex)) {
2569 			ret = -EBUSY;
2570 			goto out;
2571 		}
2572 		sm->last_victim[GC_CB] = end_segno + 1;
2573 		sm->last_victim[GC_GREEDY] = end_segno + 1;
2574 		sm->last_victim[ALLOC_NEXT] = end_segno + 1;
2575 		ret = f2fs_gc(sbi, true, true, start_segno);
2576 		if (ret == -EAGAIN)
2577 			ret = 0;
2578 		else if (ret < 0)
2579 			break;
2580 		start_segno++;
2581 	}
2582 out:
2583 	mnt_drop_write_file(filp);
2584 	return ret;
2585 }
2586 
2587 static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
2588 {
2589 	struct inode *inode = file_inode(filp);
2590 	u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
2591 
2592 	/* Must validate to set it with SQLite behavior in Android. */
2593 	sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
2594 
2595 	return put_user(sb_feature, (u32 __user *)arg);
2596 }
2597 
2598 #ifdef CONFIG_QUOTA
2599 static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
2600 {
2601 	struct inode *inode = file_inode(filp);
2602 	struct f2fs_inode_info *fi = F2FS_I(inode);
2603 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2604 	struct super_block *sb = sbi->sb;
2605 	struct dquot *transfer_to[MAXQUOTAS] = {};
2606 	struct page *ipage;
2607 	kprojid_t kprojid;
2608 	int err;
2609 
2610 	if (!f2fs_sb_has_project_quota(sb)) {
2611 		if (projid != F2FS_DEF_PROJID)
2612 			return -EOPNOTSUPP;
2613 		else
2614 			return 0;
2615 	}
2616 
2617 	if (!f2fs_has_extra_attr(inode))
2618 		return -EOPNOTSUPP;
2619 
2620 	kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
2621 
2622 	if (projid_eq(kprojid, F2FS_I(inode)->i_projid))
2623 		return 0;
2624 
2625 	err = -EPERM;
2626 	/* Is it quota file? Do not allow user to mess with it */
2627 	if (IS_NOQUOTA(inode))
2628 		return err;
2629 
2630 	ipage = f2fs_get_node_page(sbi, inode->i_ino);
2631 	if (IS_ERR(ipage))
2632 		return PTR_ERR(ipage);
2633 
2634 	if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize,
2635 								i_projid)) {
2636 		err = -EOVERFLOW;
2637 		f2fs_put_page(ipage, 1);
2638 		return err;
2639 	}
2640 	f2fs_put_page(ipage, 1);
2641 
2642 	err = dquot_initialize(inode);
2643 	if (err)
2644 		return err;
2645 
2646 	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
2647 	if (!IS_ERR(transfer_to[PRJQUOTA])) {
2648 		err = __dquot_transfer(inode, transfer_to);
2649 		dqput(transfer_to[PRJQUOTA]);
2650 		if (err)
2651 			goto out_dirty;
2652 	}
2653 
2654 	F2FS_I(inode)->i_projid = kprojid;
2655 	inode->i_ctime = current_time(inode);
2656 out_dirty:
2657 	f2fs_mark_inode_dirty_sync(inode, true);
2658 	return err;
2659 }
2660 #else
2661 static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
2662 {
2663 	if (projid != F2FS_DEF_PROJID)
2664 		return -EOPNOTSUPP;
2665 	return 0;
2666 }
2667 #endif
2668 
2669 /* Transfer internal flags to xflags */
2670 static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags)
2671 {
2672 	__u32 xflags = 0;
2673 
2674 	if (iflags & F2FS_SYNC_FL)
2675 		xflags |= FS_XFLAG_SYNC;
2676 	if (iflags & F2FS_IMMUTABLE_FL)
2677 		xflags |= FS_XFLAG_IMMUTABLE;
2678 	if (iflags & F2FS_APPEND_FL)
2679 		xflags |= FS_XFLAG_APPEND;
2680 	if (iflags & F2FS_NODUMP_FL)
2681 		xflags |= FS_XFLAG_NODUMP;
2682 	if (iflags & F2FS_NOATIME_FL)
2683 		xflags |= FS_XFLAG_NOATIME;
2684 	if (iflags & F2FS_PROJINHERIT_FL)
2685 		xflags |= FS_XFLAG_PROJINHERIT;
2686 	return xflags;
2687 }
2688 
2689 #define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
2690 				  FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
2691 				  FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
2692 
2693 /* Transfer xflags flags to internal */
2694 static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags)
2695 {
2696 	unsigned long iflags = 0;
2697 
2698 	if (xflags & FS_XFLAG_SYNC)
2699 		iflags |= F2FS_SYNC_FL;
2700 	if (xflags & FS_XFLAG_IMMUTABLE)
2701 		iflags |= F2FS_IMMUTABLE_FL;
2702 	if (xflags & FS_XFLAG_APPEND)
2703 		iflags |= F2FS_APPEND_FL;
2704 	if (xflags & FS_XFLAG_NODUMP)
2705 		iflags |= F2FS_NODUMP_FL;
2706 	if (xflags & FS_XFLAG_NOATIME)
2707 		iflags |= F2FS_NOATIME_FL;
2708 	if (xflags & FS_XFLAG_PROJINHERIT)
2709 		iflags |= F2FS_PROJINHERIT_FL;
2710 
2711 	return iflags;
2712 }
2713 
2714 static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg)
2715 {
2716 	struct inode *inode = file_inode(filp);
2717 	struct f2fs_inode_info *fi = F2FS_I(inode);
2718 	struct fsxattr fa;
2719 
2720 	memset(&fa, 0, sizeof(struct fsxattr));
2721 	fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags &
2722 				F2FS_FL_USER_VISIBLE);
2723 
2724 	if (f2fs_sb_has_project_quota(inode->i_sb))
2725 		fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
2726 							fi->i_projid);
2727 
2728 	if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa)))
2729 		return -EFAULT;
2730 	return 0;
2731 }
2732 
2733 static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
2734 {
2735 	/*
2736 	 * Project Quota ID state is only allowed to change from within the init
2737 	 * namespace. Enforce that restriction only if we are trying to change
2738 	 * the quota ID state. Everything else is allowed in user namespaces.
2739 	 */
2740 	if (current_user_ns() == &init_user_ns)
2741 		return 0;
2742 
2743 	if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid)
2744 		return -EINVAL;
2745 
2746 	if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) {
2747 		if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
2748 			return -EINVAL;
2749 	} else {
2750 		if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
2751 			return -EINVAL;
2752 	}
2753 
2754 	return 0;
2755 }
2756 
2757 static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
2758 {
2759 	struct inode *inode = file_inode(filp);
2760 	struct f2fs_inode_info *fi = F2FS_I(inode);
2761 	struct fsxattr fa;
2762 	unsigned int flags;
2763 	int err;
2764 
2765 	if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa)))
2766 		return -EFAULT;
2767 
2768 	/* Make sure caller has proper permission */
2769 	if (!inode_owner_or_capable(inode))
2770 		return -EACCES;
2771 
2772 	if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS)
2773 		return -EOPNOTSUPP;
2774 
2775 	flags = f2fs_xflags_to_iflags(fa.fsx_xflags);
2776 	if (f2fs_mask_flags(inode->i_mode, flags) != flags)
2777 		return -EOPNOTSUPP;
2778 
2779 	err = mnt_want_write_file(filp);
2780 	if (err)
2781 		return err;
2782 
2783 	inode_lock(inode);
2784 	err = f2fs_ioctl_check_project(inode, &fa);
2785 	if (err)
2786 		goto out;
2787 	flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) |
2788 				(flags & F2FS_FL_XFLAG_VISIBLE);
2789 	err = __f2fs_ioc_setflags(inode, flags);
2790 	if (err)
2791 		goto out;
2792 
2793 	err = f2fs_ioc_setproject(filp, fa.fsx_projid);
2794 out:
2795 	inode_unlock(inode);
2796 	mnt_drop_write_file(filp);
2797 	return err;
2798 }
2799 
2800 int f2fs_pin_file_control(struct inode *inode, bool inc)
2801 {
2802 	struct f2fs_inode_info *fi = F2FS_I(inode);
2803 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2804 
2805 	/* Use i_gc_failures for normal file as a risk signal. */
2806 	if (inc)
2807 		f2fs_i_gc_failures_write(inode,
2808 				fi->i_gc_failures[GC_FAILURE_PIN] + 1);
2809 
2810 	if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
2811 		f2fs_msg(sbi->sb, KERN_WARNING,
2812 			"%s: Enable GC = ino %lx after %x GC trials\n",
2813 			__func__, inode->i_ino,
2814 			fi->i_gc_failures[GC_FAILURE_PIN]);
2815 		clear_inode_flag(inode, FI_PIN_FILE);
2816 		return -EAGAIN;
2817 	}
2818 	return 0;
2819 }
2820 
2821 static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
2822 {
2823 	struct inode *inode = file_inode(filp);
2824 	__u32 pin;
2825 	int ret = 0;
2826 
2827 	if (!inode_owner_or_capable(inode))
2828 		return -EACCES;
2829 
2830 	if (get_user(pin, (__u32 __user *)arg))
2831 		return -EFAULT;
2832 
2833 	if (!S_ISREG(inode->i_mode))
2834 		return -EINVAL;
2835 
2836 	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
2837 		return -EROFS;
2838 
2839 	ret = mnt_want_write_file(filp);
2840 	if (ret)
2841 		return ret;
2842 
2843 	inode_lock(inode);
2844 
2845 	if (f2fs_should_update_outplace(inode, NULL)) {
2846 		ret = -EINVAL;
2847 		goto out;
2848 	}
2849 
2850 	if (!pin) {
2851 		clear_inode_flag(inode, FI_PIN_FILE);
2852 		f2fs_i_gc_failures_write(inode, 0);
2853 		goto done;
2854 	}
2855 
2856 	if (f2fs_pin_file_control(inode, false)) {
2857 		ret = -EAGAIN;
2858 		goto out;
2859 	}
2860 	ret = f2fs_convert_inline_inode(inode);
2861 	if (ret)
2862 		goto out;
2863 
2864 	set_inode_flag(inode, FI_PIN_FILE);
2865 	ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
2866 done:
2867 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2868 out:
2869 	inode_unlock(inode);
2870 	mnt_drop_write_file(filp);
2871 	return ret;
2872 }
2873 
2874 static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
2875 {
2876 	struct inode *inode = file_inode(filp);
2877 	__u32 pin = 0;
2878 
2879 	if (is_inode_flag_set(inode, FI_PIN_FILE))
2880 		pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
2881 	return put_user(pin, (u32 __user *)arg);
2882 }
2883 
2884 int f2fs_precache_extents(struct inode *inode)
2885 {
2886 	struct f2fs_inode_info *fi = F2FS_I(inode);
2887 	struct f2fs_map_blocks map;
2888 	pgoff_t m_next_extent;
2889 	loff_t end;
2890 	int err;
2891 
2892 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
2893 		return -EOPNOTSUPP;
2894 
2895 	map.m_lblk = 0;
2896 	map.m_next_pgofs = NULL;
2897 	map.m_next_extent = &m_next_extent;
2898 	map.m_seg_type = NO_CHECK_TYPE;
2899 	end = F2FS_I_SB(inode)->max_file_blocks;
2900 
2901 	while (map.m_lblk < end) {
2902 		map.m_len = end - map.m_lblk;
2903 
2904 		down_write(&fi->i_gc_rwsem[WRITE]);
2905 		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
2906 		up_write(&fi->i_gc_rwsem[WRITE]);
2907 		if (err)
2908 			return err;
2909 
2910 		map.m_lblk = m_next_extent;
2911 	}
2912 
2913 	return err;
2914 }
2915 
2916 static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
2917 {
2918 	return f2fs_precache_extents(file_inode(filp));
2919 }
2920 
2921 long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
2922 {
2923 	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
2924 		return -EIO;
2925 
2926 	switch (cmd) {
2927 	case F2FS_IOC_GETFLAGS:
2928 		return f2fs_ioc_getflags(filp, arg);
2929 	case F2FS_IOC_SETFLAGS:
2930 		return f2fs_ioc_setflags(filp, arg);
2931 	case F2FS_IOC_GETVERSION:
2932 		return f2fs_ioc_getversion(filp, arg);
2933 	case F2FS_IOC_START_ATOMIC_WRITE:
2934 		return f2fs_ioc_start_atomic_write(filp);
2935 	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
2936 		return f2fs_ioc_commit_atomic_write(filp);
2937 	case F2FS_IOC_START_VOLATILE_WRITE:
2938 		return f2fs_ioc_start_volatile_write(filp);
2939 	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
2940 		return f2fs_ioc_release_volatile_write(filp);
2941 	case F2FS_IOC_ABORT_VOLATILE_WRITE:
2942 		return f2fs_ioc_abort_volatile_write(filp);
2943 	case F2FS_IOC_SHUTDOWN:
2944 		return f2fs_ioc_shutdown(filp, arg);
2945 	case FITRIM:
2946 		return f2fs_ioc_fitrim(filp, arg);
2947 	case F2FS_IOC_SET_ENCRYPTION_POLICY:
2948 		return f2fs_ioc_set_encryption_policy(filp, arg);
2949 	case F2FS_IOC_GET_ENCRYPTION_POLICY:
2950 		return f2fs_ioc_get_encryption_policy(filp, arg);
2951 	case F2FS_IOC_GET_ENCRYPTION_PWSALT:
2952 		return f2fs_ioc_get_encryption_pwsalt(filp, arg);
2953 	case F2FS_IOC_GARBAGE_COLLECT:
2954 		return f2fs_ioc_gc(filp, arg);
2955 	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
2956 		return f2fs_ioc_gc_range(filp, arg);
2957 	case F2FS_IOC_WRITE_CHECKPOINT:
2958 		return f2fs_ioc_write_checkpoint(filp, arg);
2959 	case F2FS_IOC_DEFRAGMENT:
2960 		return f2fs_ioc_defragment(filp, arg);
2961 	case F2FS_IOC_MOVE_RANGE:
2962 		return f2fs_ioc_move_range(filp, arg);
2963 	case F2FS_IOC_FLUSH_DEVICE:
2964 		return f2fs_ioc_flush_device(filp, arg);
2965 	case F2FS_IOC_GET_FEATURES:
2966 		return f2fs_ioc_get_features(filp, arg);
2967 	case F2FS_IOC_FSGETXATTR:
2968 		return f2fs_ioc_fsgetxattr(filp, arg);
2969 	case F2FS_IOC_FSSETXATTR:
2970 		return f2fs_ioc_fssetxattr(filp, arg);
2971 	case F2FS_IOC_GET_PIN_FILE:
2972 		return f2fs_ioc_get_pin_file(filp, arg);
2973 	case F2FS_IOC_SET_PIN_FILE:
2974 		return f2fs_ioc_set_pin_file(filp, arg);
2975 	case F2FS_IOC_PRECACHE_EXTENTS:
2976 		return f2fs_ioc_precache_extents(filp, arg);
2977 	default:
2978 		return -ENOTTY;
2979 	}
2980 }
2981 
2982 static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
2983 {
2984 	struct file *file = iocb->ki_filp;
2985 	struct inode *inode = file_inode(file);
2986 	ssize_t ret;
2987 
2988 	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2989 		return -EIO;
2990 
2991 	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
2992 		return -EINVAL;
2993 
2994 	if (!inode_trylock(inode)) {
2995 		if (iocb->ki_flags & IOCB_NOWAIT)
2996 			return -EAGAIN;
2997 		inode_lock(inode);
2998 	}
2999 
3000 	ret = generic_write_checks(iocb, from);
3001 	if (ret > 0) {
3002 		bool preallocated = false;
3003 		size_t target_size = 0;
3004 		int err;
3005 
3006 		if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
3007 			set_inode_flag(inode, FI_NO_PREALLOC);
3008 
3009 		if ((iocb->ki_flags & IOCB_NOWAIT) &&
3010 			(iocb->ki_flags & IOCB_DIRECT)) {
3011 				if (!f2fs_overwrite_io(inode, iocb->ki_pos,
3012 						iov_iter_count(from)) ||
3013 					f2fs_has_inline_data(inode) ||
3014 					f2fs_force_buffered_io(inode,
3015 							iocb, from)) {
3016 						clear_inode_flag(inode,
3017 								FI_NO_PREALLOC);
3018 						inode_unlock(inode);
3019 						return -EAGAIN;
3020 				}
3021 
3022 		} else {
3023 			preallocated = true;
3024 			target_size = iocb->ki_pos + iov_iter_count(from);
3025 
3026 			err = f2fs_preallocate_blocks(iocb, from);
3027 			if (err) {
3028 				clear_inode_flag(inode, FI_NO_PREALLOC);
3029 				inode_unlock(inode);
3030 				return err;
3031 			}
3032 		}
3033 		ret = __generic_file_write_iter(iocb, from);
3034 		clear_inode_flag(inode, FI_NO_PREALLOC);
3035 
3036 		/* if we couldn't write data, we should deallocate blocks. */
3037 		if (preallocated && i_size_read(inode) < target_size)
3038 			f2fs_truncate(inode);
3039 
3040 		if (ret > 0)
3041 			f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
3042 	}
3043 	inode_unlock(inode);
3044 
3045 	if (ret > 0)
3046 		ret = generic_write_sync(iocb, ret);
3047 	return ret;
3048 }
3049 
3050 #ifdef CONFIG_COMPAT
3051 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3052 {
3053 	switch (cmd) {
3054 	case F2FS_IOC32_GETFLAGS:
3055 		cmd = F2FS_IOC_GETFLAGS;
3056 		break;
3057 	case F2FS_IOC32_SETFLAGS:
3058 		cmd = F2FS_IOC_SETFLAGS;
3059 		break;
3060 	case F2FS_IOC32_GETVERSION:
3061 		cmd = F2FS_IOC_GETVERSION;
3062 		break;
3063 	case F2FS_IOC_START_ATOMIC_WRITE:
3064 	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
3065 	case F2FS_IOC_START_VOLATILE_WRITE:
3066 	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
3067 	case F2FS_IOC_ABORT_VOLATILE_WRITE:
3068 	case F2FS_IOC_SHUTDOWN:
3069 	case F2FS_IOC_SET_ENCRYPTION_POLICY:
3070 	case F2FS_IOC_GET_ENCRYPTION_PWSALT:
3071 	case F2FS_IOC_GET_ENCRYPTION_POLICY:
3072 	case F2FS_IOC_GARBAGE_COLLECT:
3073 	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
3074 	case F2FS_IOC_WRITE_CHECKPOINT:
3075 	case F2FS_IOC_DEFRAGMENT:
3076 	case F2FS_IOC_MOVE_RANGE:
3077 	case F2FS_IOC_FLUSH_DEVICE:
3078 	case F2FS_IOC_GET_FEATURES:
3079 	case F2FS_IOC_FSGETXATTR:
3080 	case F2FS_IOC_FSSETXATTR:
3081 	case F2FS_IOC_GET_PIN_FILE:
3082 	case F2FS_IOC_SET_PIN_FILE:
3083 	case F2FS_IOC_PRECACHE_EXTENTS:
3084 		break;
3085 	default:
3086 		return -ENOIOCTLCMD;
3087 	}
3088 	return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
3089 }
3090 #endif
3091 
3092 const struct file_operations f2fs_file_operations = {
3093 	.llseek		= f2fs_llseek,
3094 	.read_iter	= generic_file_read_iter,
3095 	.write_iter	= f2fs_file_write_iter,
3096 	.open		= f2fs_file_open,
3097 	.release	= f2fs_release_file,
3098 	.mmap		= f2fs_file_mmap,
3099 	.flush		= f2fs_file_flush,
3100 	.fsync		= f2fs_sync_file,
3101 	.fallocate	= f2fs_fallocate,
3102 	.unlocked_ioctl	= f2fs_ioctl,
3103 #ifdef CONFIG_COMPAT
3104 	.compat_ioctl	= f2fs_compat_ioctl,
3105 #endif
3106 	.splice_read	= generic_file_splice_read,
3107 	.splice_write	= iter_file_splice_write,
3108 };
3109