xref: /openbmc/linux/fs/xfs/xfs_file.c (revision 55e43d6abd078ed6d219902ce8cb4d68e3c993ba)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
2c59d87c4SChristoph Hellwig /*
3c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4c59d87c4SChristoph Hellwig  * All Rights Reserved.
5c59d87c4SChristoph Hellwig  */
6c59d87c4SChristoph Hellwig #include "xfs.h"
7c59d87c4SChristoph Hellwig #include "xfs_fs.h"
870a9883cSDave Chinner #include "xfs_shared.h"
9a4fbe6abSDave Chinner #include "xfs_format.h"
10239880efSDave Chinner #include "xfs_log_format.h"
11239880efSDave Chinner #include "xfs_trans_resv.h"
12c59d87c4SChristoph Hellwig #include "xfs_mount.h"
13c59d87c4SChristoph Hellwig #include "xfs_inode.h"
14239880efSDave Chinner #include "xfs_trans.h"
15c59d87c4SChristoph Hellwig #include "xfs_inode_item.h"
16c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
17c24b5dfaSDave Chinner #include "xfs_bmap_util.h"
182b9ab5abSDave Chinner #include "xfs_dir2.h"
19c24b5dfaSDave Chinner #include "xfs_dir2_priv.h"
20c59d87c4SChristoph Hellwig #include "xfs_ioctl.h"
21c59d87c4SChristoph Hellwig #include "xfs_trace.h"
22239880efSDave Chinner #include "xfs_log.h"
23dc06f398SBrian Foster #include "xfs_icache.h"
24781355c6SChristoph Hellwig #include "xfs_pnfs.h"
2568a9f5e7SChristoph Hellwig #include "xfs_iomap.h"
260613f16cSDarrick J. Wong #include "xfs_reflink.h"
277531c9abSDarrick J. Wong #include "xfs_file.h"
28c59d87c4SChristoph Hellwig 
29ea6c49b7SShiyang Ruan #include <linux/dax.h>
30c59d87c4SChristoph Hellwig #include <linux/falloc.h>
3166114cadSTejun Heo #include <linux/backing-dev.h>
32a39e596bSChristoph Hellwig #include <linux/mman.h>
3340144e49SJan Kara #include <linux/fadvise.h>
34f736d93dSChristoph Hellwig #include <linux/mount.h>
35c59d87c4SChristoph Hellwig 
36c59d87c4SChristoph Hellwig static const struct vm_operations_struct xfs_file_vm_ops;
37c59d87c4SChristoph Hellwig 
3825219dbfSDarrick J. Wong /*
3925219dbfSDarrick J. Wong  * Decide if the given file range is aligned to the size of the fundamental
4025219dbfSDarrick J. Wong  * allocation unit for the file.
4125219dbfSDarrick J. Wong  */
42*c070b880SDarrick J. Wong bool
xfs_is_falloc_aligned(struct xfs_inode * ip,loff_t pos,long long int len)4325219dbfSDarrick J. Wong xfs_is_falloc_aligned(
4425219dbfSDarrick J. Wong 	struct xfs_inode	*ip,
4525219dbfSDarrick J. Wong 	loff_t			pos,
4625219dbfSDarrick J. Wong 	long long int		len)
4725219dbfSDarrick J. Wong {
48*c070b880SDarrick J. Wong 	unsigned int		alloc_unit = xfs_inode_alloc_unitsize(ip);
4925219dbfSDarrick J. Wong 
50*c070b880SDarrick J. Wong 	if (!is_power_of_2(alloc_unit)) {
5125219dbfSDarrick J. Wong 		u32	mod;
5225219dbfSDarrick J. Wong 
53*c070b880SDarrick J. Wong 		div_u64_rem(pos, alloc_unit, &mod);
5425219dbfSDarrick J. Wong 		if (mod)
5525219dbfSDarrick J. Wong 			return false;
56*c070b880SDarrick J. Wong 		div_u64_rem(len, alloc_unit, &mod);
5725219dbfSDarrick J. Wong 		return mod == 0;
5825219dbfSDarrick J. Wong 	}
5925219dbfSDarrick J. Wong 
60*c070b880SDarrick J. Wong 	return !((pos | len) & (alloc_unit - 1));
6125219dbfSDarrick J. Wong }
6225219dbfSDarrick J. Wong 
631da2f2dbSChristoph Hellwig /*
641da2f2dbSChristoph Hellwig  * Fsync operations on directories are much simpler than on regular files,
651da2f2dbSChristoph Hellwig  * as there is no file data to flush, and thus also no need for explicit
661da2f2dbSChristoph Hellwig  * cache flush operations, and there are no non-transaction metadata updates
671da2f2dbSChristoph Hellwig  * on directories either.
681da2f2dbSChristoph Hellwig  */
691da2f2dbSChristoph Hellwig STATIC int
xfs_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)701da2f2dbSChristoph Hellwig xfs_dir_fsync(
711da2f2dbSChristoph Hellwig 	struct file		*file,
721da2f2dbSChristoph Hellwig 	loff_t			start,
731da2f2dbSChristoph Hellwig 	loff_t			end,
741da2f2dbSChristoph Hellwig 	int			datasync)
751da2f2dbSChristoph Hellwig {
761da2f2dbSChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(file->f_mapping->host);
771da2f2dbSChristoph Hellwig 
781da2f2dbSChristoph Hellwig 	trace_xfs_dir_fsync(ip);
7954fbdd10SChristoph Hellwig 	return xfs_log_force_inode(ip);
801da2f2dbSChristoph Hellwig }
811da2f2dbSChristoph Hellwig 
825f9b4b0dSDave Chinner static xfs_csn_t
xfs_fsync_seq(struct xfs_inode * ip,bool datasync)835f9b4b0dSDave Chinner xfs_fsync_seq(
84f22c7f87SChristoph Hellwig 	struct xfs_inode	*ip,
85f22c7f87SChristoph Hellwig 	bool			datasync)
86f22c7f87SChristoph Hellwig {
87f22c7f87SChristoph Hellwig 	if (!xfs_ipincount(ip))
88f22c7f87SChristoph Hellwig 		return 0;
89f22c7f87SChristoph Hellwig 	if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
90f22c7f87SChristoph Hellwig 		return 0;
915f9b4b0dSDave Chinner 	return ip->i_itemp->ili_commit_seq;
92f22c7f87SChristoph Hellwig }
93f22c7f87SChristoph Hellwig 
94f22c7f87SChristoph Hellwig /*
95f22c7f87SChristoph Hellwig  * All metadata updates are logged, which means that we just have to flush the
96f22c7f87SChristoph Hellwig  * log up to the latest LSN that touched the inode.
97f22c7f87SChristoph Hellwig  *
98f22c7f87SChristoph Hellwig  * If we have concurrent fsync/fdatasync() calls, we need them to all block on
99f22c7f87SChristoph Hellwig  * the log force before we clear the ili_fsync_fields field. This ensures that
100f22c7f87SChristoph Hellwig  * we don't get a racing sync operation that does not wait for the metadata to
101f22c7f87SChristoph Hellwig  * hit the journal before returning.  If we race with clearing ili_fsync_fields,
102f22c7f87SChristoph Hellwig  * then all that will happen is the log force will do nothing as the lsn will
103f22c7f87SChristoph Hellwig  * already be on disk.  We can't race with setting ili_fsync_fields because that
104f22c7f87SChristoph Hellwig  * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
105f22c7f87SChristoph Hellwig  * shared until after the ili_fsync_fields is cleared.
106f22c7f87SChristoph Hellwig  */
107f22c7f87SChristoph Hellwig static  int
xfs_fsync_flush_log(struct xfs_inode * ip,bool datasync,int * log_flushed)108f22c7f87SChristoph Hellwig xfs_fsync_flush_log(
109f22c7f87SChristoph Hellwig 	struct xfs_inode	*ip,
110f22c7f87SChristoph Hellwig 	bool			datasync,
111f22c7f87SChristoph Hellwig 	int			*log_flushed)
112f22c7f87SChristoph Hellwig {
113f22c7f87SChristoph Hellwig 	int			error = 0;
1145f9b4b0dSDave Chinner 	xfs_csn_t		seq;
115f22c7f87SChristoph Hellwig 
116f22c7f87SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_SHARED);
1175f9b4b0dSDave Chinner 	seq = xfs_fsync_seq(ip, datasync);
1185f9b4b0dSDave Chinner 	if (seq) {
1195f9b4b0dSDave Chinner 		error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
120f22c7f87SChristoph Hellwig 					  log_flushed);
121f22c7f87SChristoph Hellwig 
122f22c7f87SChristoph Hellwig 		spin_lock(&ip->i_itemp->ili_lock);
123f22c7f87SChristoph Hellwig 		ip->i_itemp->ili_fsync_fields = 0;
124f22c7f87SChristoph Hellwig 		spin_unlock(&ip->i_itemp->ili_lock);
125f22c7f87SChristoph Hellwig 	}
126f22c7f87SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
127f22c7f87SChristoph Hellwig 	return error;
128f22c7f87SChristoph Hellwig }
129f22c7f87SChristoph Hellwig 
130c59d87c4SChristoph Hellwig STATIC int
xfs_file_fsync(struct file * file,loff_t start,loff_t end,int datasync)131c59d87c4SChristoph Hellwig xfs_file_fsync(
132c59d87c4SChristoph Hellwig 	struct file		*file,
133c59d87c4SChristoph Hellwig 	loff_t			start,
134c59d87c4SChristoph Hellwig 	loff_t			end,
135c59d87c4SChristoph Hellwig 	int			datasync)
136c59d87c4SChristoph Hellwig {
137f22c7f87SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(file->f_mapping->host);
138c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
1397d839e32SDarrick J. Wong 	int			error, err2;
140c59d87c4SChristoph Hellwig 	int			log_flushed = 0;
141c59d87c4SChristoph Hellwig 
142c59d87c4SChristoph Hellwig 	trace_xfs_file_fsync(ip);
143c59d87c4SChristoph Hellwig 
1441b180274SJeff Layton 	error = file_write_and_wait_range(file, start, end);
145c59d87c4SChristoph Hellwig 	if (error)
146c59d87c4SChristoph Hellwig 		return error;
147c59d87c4SChristoph Hellwig 
14875c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
149b474c7aeSEric Sandeen 		return -EIO;
150c59d87c4SChristoph Hellwig 
151c59d87c4SChristoph Hellwig 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
152c59d87c4SChristoph Hellwig 
153c59d87c4SChristoph Hellwig 	/*
1542291dab2SDave Chinner 	 * If we have an RT and/or log subvolume we need to make sure to flush
1552291dab2SDave Chinner 	 * the write cache the device used for file data first.  This is to
1562291dab2SDave Chinner 	 * ensure newly written file data make it to disk before logging the new
1572291dab2SDave Chinner 	 * inode size in case of an extending write.
158c59d87c4SChristoph Hellwig 	 */
159c59d87c4SChristoph Hellwig 	if (XFS_IS_REALTIME_INODE(ip))
1607d839e32SDarrick J. Wong 		error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
161c59d87c4SChristoph Hellwig 	else if (mp->m_logdev_targp != mp->m_ddev_targp)
1627d839e32SDarrick J. Wong 		error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
163c59d87c4SChristoph Hellwig 
164c59d87c4SChristoph Hellwig 	/*
165ae29e422SChristoph Hellwig 	 * Any inode that has dirty modifications in the log is pinned.  The
1667d839e32SDarrick J. Wong 	 * racy check here for a pinned inode will not catch modifications
167ae29e422SChristoph Hellwig 	 * that happen concurrently to the fsync call, but fsync semantics
168ae29e422SChristoph Hellwig 	 * only require to sync previously completed I/O.
169c59d87c4SChristoph Hellwig 	 */
1707d839e32SDarrick J. Wong 	if (xfs_ipincount(ip)) {
1717d839e32SDarrick J. Wong 		err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed);
1727d839e32SDarrick J. Wong 		if (err2 && !error)
1737d839e32SDarrick J. Wong 			error = err2;
1747d839e32SDarrick J. Wong 	}
175b1037058SChristoph Hellwig 
176c59d87c4SChristoph Hellwig 	/*
177c59d87c4SChristoph Hellwig 	 * If we only have a single device, and the log force about was
178c59d87c4SChristoph Hellwig 	 * a no-op we might have to flush the data device cache here.
179c59d87c4SChristoph Hellwig 	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
180c59d87c4SChristoph Hellwig 	 * an already allocated file and thus do not have any metadata to
181c59d87c4SChristoph Hellwig 	 * commit.
182c59d87c4SChristoph Hellwig 	 */
1832291dab2SDave Chinner 	if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
1847d839e32SDarrick J. Wong 	    mp->m_logdev_targp == mp->m_ddev_targp) {
1857d839e32SDarrick J. Wong 		err2 = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
1867d839e32SDarrick J. Wong 		if (err2 && !error)
1877d839e32SDarrick J. Wong 			error = err2;
1887d839e32SDarrick J. Wong 	}
189c59d87c4SChristoph Hellwig 
1902451337dSDave Chinner 	return error;
191c59d87c4SChristoph Hellwig }
192c59d87c4SChristoph Hellwig 
193f50b8f47SChristoph Hellwig static int
xfs_ilock_iocb(struct kiocb * iocb,unsigned int lock_mode)194f50b8f47SChristoph Hellwig xfs_ilock_iocb(
195f50b8f47SChristoph Hellwig 	struct kiocb		*iocb,
196f50b8f47SChristoph Hellwig 	unsigned int		lock_mode)
197f50b8f47SChristoph Hellwig {
198f50b8f47SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
199f50b8f47SChristoph Hellwig 
200f50b8f47SChristoph Hellwig 	if (iocb->ki_flags & IOCB_NOWAIT) {
201f50b8f47SChristoph Hellwig 		if (!xfs_ilock_nowait(ip, lock_mode))
202f50b8f47SChristoph Hellwig 			return -EAGAIN;
203f50b8f47SChristoph Hellwig 	} else {
204f50b8f47SChristoph Hellwig 		xfs_ilock(ip, lock_mode);
205f50b8f47SChristoph Hellwig 	}
206f50b8f47SChristoph Hellwig 
207f50b8f47SChristoph Hellwig 	return 0;
208f50b8f47SChristoph Hellwig }
209f50b8f47SChristoph Hellwig 
210d7d84772SCatherine Hoang static int
xfs_ilock_iocb_for_write(struct kiocb * iocb,unsigned int * lock_mode)211d7d84772SCatherine Hoang xfs_ilock_iocb_for_write(
212d7d84772SCatherine Hoang 	struct kiocb		*iocb,
213d7d84772SCatherine Hoang 	unsigned int		*lock_mode)
214d7d84772SCatherine Hoang {
215d7d84772SCatherine Hoang 	ssize_t			ret;
216d7d84772SCatherine Hoang 	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
217d7d84772SCatherine Hoang 
218d7d84772SCatherine Hoang 	ret = xfs_ilock_iocb(iocb, *lock_mode);
219d7d84772SCatherine Hoang 	if (ret)
220d7d84772SCatherine Hoang 		return ret;
221d7d84772SCatherine Hoang 
222d7d84772SCatherine Hoang 	if (*lock_mode == XFS_IOLOCK_EXCL)
223d7d84772SCatherine Hoang 		return 0;
224d7d84772SCatherine Hoang 	if (!xfs_iflags_test(ip, XFS_IREMAPPING))
225d7d84772SCatherine Hoang 		return 0;
226d7d84772SCatherine Hoang 
227d7d84772SCatherine Hoang 	xfs_iunlock(ip, *lock_mode);
228d7d84772SCatherine Hoang 	*lock_mode = XFS_IOLOCK_EXCL;
229d7d84772SCatherine Hoang 	return xfs_ilock_iocb(iocb, *lock_mode);
230d7d84772SCatherine Hoang }
231d7d84772SCatherine Hoang 
232d7d84772SCatherine Hoang static unsigned int
xfs_ilock_for_write_fault(struct xfs_inode * ip)233d7d84772SCatherine Hoang xfs_ilock_for_write_fault(
234d7d84772SCatherine Hoang 	struct xfs_inode	*ip)
235d7d84772SCatherine Hoang {
236d7d84772SCatherine Hoang 	/* get a shared lock if no remapping in progress */
237d7d84772SCatherine Hoang 	xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
238d7d84772SCatherine Hoang 	if (!xfs_iflags_test(ip, XFS_IREMAPPING))
239d7d84772SCatherine Hoang 		return XFS_MMAPLOCK_SHARED;
240d7d84772SCatherine Hoang 
241d7d84772SCatherine Hoang 	/* wait for remapping to complete */
242d7d84772SCatherine Hoang 	xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
243d7d84772SCatherine Hoang 	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
244d7d84772SCatherine Hoang 	return XFS_MMAPLOCK_EXCL;
245d7d84772SCatherine Hoang }
246d7d84772SCatherine Hoang 
247c59d87c4SChristoph Hellwig STATIC ssize_t
xfs_file_dio_read(struct kiocb * iocb,struct iov_iter * to)248ee1b218bSChristoph Hellwig xfs_file_dio_read(
249c59d87c4SChristoph Hellwig 	struct kiocb		*iocb,
250b4f5d2c6SAl Viro 	struct iov_iter		*to)
251c59d87c4SChristoph Hellwig {
252acdda3aaSChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
253acdda3aaSChristoph Hellwig 	ssize_t			ret;
254c59d87c4SChristoph Hellwig 
2553e40b13cSChristoph Hellwig 	trace_xfs_file_direct_read(iocb, to);
256c59d87c4SChristoph Hellwig 
2573e40b13cSChristoph Hellwig 	if (!iov_iter_count(to))
258f1285ff0SChristoph Hellwig 		return 0; /* skip atime */
259c59d87c4SChristoph Hellwig 
260a447d7cdSChristoph Hellwig 	file_accessed(iocb->ki_filp);
261a447d7cdSChristoph Hellwig 
262f50b8f47SChristoph Hellwig 	ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
263f50b8f47SChristoph Hellwig 	if (ret)
264f50b8f47SChristoph Hellwig 		return ret;
265786f847fSChristoph Hellwig 	ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0);
26665523218SChristoph Hellwig 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
267acdda3aaSChristoph Hellwig 
26816d4d435SChristoph Hellwig 	return ret;
26916d4d435SChristoph Hellwig }
27016d4d435SChristoph Hellwig 
271f021bd07SArnd Bergmann static noinline ssize_t
xfs_file_dax_read(struct kiocb * iocb,struct iov_iter * to)27216d4d435SChristoph Hellwig xfs_file_dax_read(
27316d4d435SChristoph Hellwig 	struct kiocb		*iocb,
27416d4d435SChristoph Hellwig 	struct iov_iter		*to)
27516d4d435SChristoph Hellwig {
2766c31f495SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(iocb->ki_filp->f_mapping->host);
27716d4d435SChristoph Hellwig 	ssize_t			ret = 0;
27816d4d435SChristoph Hellwig 
2793e40b13cSChristoph Hellwig 	trace_xfs_file_dax_read(iocb, to);
28016d4d435SChristoph Hellwig 
2813e40b13cSChristoph Hellwig 	if (!iov_iter_count(to))
28216d4d435SChristoph Hellwig 		return 0; /* skip atime */
28316d4d435SChristoph Hellwig 
284f50b8f47SChristoph Hellwig 	ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
285f50b8f47SChristoph Hellwig 	if (ret)
286f50b8f47SChristoph Hellwig 		return ret;
287690c2a38SChristoph Hellwig 	ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
28865523218SChristoph Hellwig 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
289bbc5a740SChristoph Hellwig 
290f1285ff0SChristoph Hellwig 	file_accessed(iocb->ki_filp);
291bbc5a740SChristoph Hellwig 	return ret;
292bbc5a740SChristoph Hellwig }
293bbc5a740SChristoph Hellwig 
294bbc5a740SChristoph Hellwig STATIC ssize_t
xfs_file_buffered_read(struct kiocb * iocb,struct iov_iter * to)295ee1b218bSChristoph Hellwig xfs_file_buffered_read(
296bbc5a740SChristoph Hellwig 	struct kiocb		*iocb,
297bbc5a740SChristoph Hellwig 	struct iov_iter		*to)
298bbc5a740SChristoph Hellwig {
299bbc5a740SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
300bbc5a740SChristoph Hellwig 	ssize_t			ret;
301bbc5a740SChristoph Hellwig 
3023e40b13cSChristoph Hellwig 	trace_xfs_file_buffered_read(iocb, to);
303bbc5a740SChristoph Hellwig 
304f50b8f47SChristoph Hellwig 	ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
305f50b8f47SChristoph Hellwig 	if (ret)
306f50b8f47SChristoph Hellwig 		return ret;
307b4f5d2c6SAl Viro 	ret = generic_file_read_iter(iocb, to);
30865523218SChristoph Hellwig 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
309bbc5a740SChristoph Hellwig 
310bbc5a740SChristoph Hellwig 	return ret;
311bbc5a740SChristoph Hellwig }
312bbc5a740SChristoph Hellwig 
313bbc5a740SChristoph Hellwig STATIC ssize_t
xfs_file_read_iter(struct kiocb * iocb,struct iov_iter * to)314bbc5a740SChristoph Hellwig xfs_file_read_iter(
315bbc5a740SChristoph Hellwig 	struct kiocb		*iocb,
316bbc5a740SChristoph Hellwig 	struct iov_iter		*to)
317bbc5a740SChristoph Hellwig {
31816d4d435SChristoph Hellwig 	struct inode		*inode = file_inode(iocb->ki_filp);
31916d4d435SChristoph Hellwig 	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
320bbc5a740SChristoph Hellwig 	ssize_t			ret = 0;
321bbc5a740SChristoph Hellwig 
322bbc5a740SChristoph Hellwig 	XFS_STATS_INC(mp, xs_read_calls);
323bbc5a740SChristoph Hellwig 
32475c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
325bbc5a740SChristoph Hellwig 		return -EIO;
326bbc5a740SChristoph Hellwig 
32716d4d435SChristoph Hellwig 	if (IS_DAX(inode))
32816d4d435SChristoph Hellwig 		ret = xfs_file_dax_read(iocb, to);
32916d4d435SChristoph Hellwig 	else if (iocb->ki_flags & IOCB_DIRECT)
330ee1b218bSChristoph Hellwig 		ret = xfs_file_dio_read(iocb, to);
331bbc5a740SChristoph Hellwig 	else
332ee1b218bSChristoph Hellwig 		ret = xfs_file_buffered_read(iocb, to);
333bbc5a740SChristoph Hellwig 
334c59d87c4SChristoph Hellwig 	if (ret > 0)
335ff6d6af2SBill O'Donnell 		XFS_STATS_ADD(mp, xs_read_bytes, ret);
336c59d87c4SChristoph Hellwig 	return ret;
337c59d87c4SChristoph Hellwig }
338c59d87c4SChristoph Hellwig 
33954919f94SDavid Howells STATIC ssize_t
xfs_file_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)34054919f94SDavid Howells xfs_file_splice_read(
34154919f94SDavid Howells 	struct file		*in,
34254919f94SDavid Howells 	loff_t			*ppos,
34354919f94SDavid Howells 	struct pipe_inode_info	*pipe,
34454919f94SDavid Howells 	size_t			len,
34554919f94SDavid Howells 	unsigned int		flags)
34654919f94SDavid Howells {
34754919f94SDavid Howells 	struct inode		*inode = file_inode(in);
34854919f94SDavid Howells 	struct xfs_inode	*ip = XFS_I(inode);
34954919f94SDavid Howells 	struct xfs_mount	*mp = ip->i_mount;
35054919f94SDavid Howells 	ssize_t			ret = 0;
35154919f94SDavid Howells 
35254919f94SDavid Howells 	XFS_STATS_INC(mp, xs_read_calls);
35354919f94SDavid Howells 
35454919f94SDavid Howells 	if (xfs_is_shutdown(mp))
35554919f94SDavid Howells 		return -EIO;
35654919f94SDavid Howells 
35754919f94SDavid Howells 	trace_xfs_file_splice_read(ip, *ppos, len);
35854919f94SDavid Howells 
35954919f94SDavid Howells 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
36054919f94SDavid Howells 	ret = filemap_splice_read(in, ppos, pipe, len, flags);
36154919f94SDavid Howells 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
36254919f94SDavid Howells 	if (ret > 0)
36354919f94SDavid Howells 		XFS_STATS_ADD(mp, xs_read_bytes, ret);
36454919f94SDavid Howells 	return ret;
36554919f94SDavid Howells }
36654919f94SDavid Howells 
367c59d87c4SChristoph Hellwig /*
368c59d87c4SChristoph Hellwig  * Common pre-write limit and setup checks.
369c59d87c4SChristoph Hellwig  *
3705bf1f262SChristoph Hellwig  * Called with the iolocked held either shared and exclusive according to
3715bf1f262SChristoph Hellwig  * @iolock, and returns with it held.  Might upgrade the iolock to exclusive
3725bf1f262SChristoph Hellwig  * if called for a direct write beyond i_size.
373c59d87c4SChristoph Hellwig  */
374c59d87c4SChristoph Hellwig STATIC ssize_t
xfs_file_write_checks(struct kiocb * iocb,struct iov_iter * from,unsigned int * iolock)375ee1b218bSChristoph Hellwig xfs_file_write_checks(
37699733fa3SAl Viro 	struct kiocb		*iocb,
37799733fa3SAl Viro 	struct iov_iter		*from,
378a1033753SDave Chinner 	unsigned int		*iolock)
379c59d87c4SChristoph Hellwig {
38099733fa3SAl Viro 	struct file		*file = iocb->ki_filp;
381c59d87c4SChristoph Hellwig 	struct inode		*inode = file->f_mapping->host;
382c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
3833309dd04SAl Viro 	ssize_t			error = 0;
38499733fa3SAl Viro 	size_t			count = iov_iter_count(from);
3853136e8bbSBrian Foster 	bool			drained_dio = false;
386f5c54717SChristoph Hellwig 	loff_t			isize;
387c59d87c4SChristoph Hellwig 
3887271d243SDave Chinner restart:
3893309dd04SAl Viro 	error = generic_write_checks(iocb, from);
3903309dd04SAl Viro 	if (error <= 0)
391c59d87c4SChristoph Hellwig 		return error;
392c59d87c4SChristoph Hellwig 
393354be7e3SChristoph Hellwig 	if (iocb->ki_flags & IOCB_NOWAIT) {
394354be7e3SChristoph Hellwig 		error = break_layout(inode, false);
395354be7e3SChristoph Hellwig 		if (error == -EWOULDBLOCK)
396354be7e3SChristoph Hellwig 			error = -EAGAIN;
397354be7e3SChristoph Hellwig 	} else {
39869eb5fa1SDan Williams 		error = xfs_break_layouts(inode, iolock, BREAK_WRITE);
399354be7e3SChristoph Hellwig 	}
400354be7e3SChristoph Hellwig 
401781355c6SChristoph Hellwig 	if (error)
402781355c6SChristoph Hellwig 		return error;
403781355c6SChristoph Hellwig 
40465523218SChristoph Hellwig 	/*
40565523218SChristoph Hellwig 	 * For changing security info in file_remove_privs() we need i_rwsem
40665523218SChristoph Hellwig 	 * exclusively.
40765523218SChristoph Hellwig 	 */
408a6de82caSJan Kara 	if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
40965523218SChristoph Hellwig 		xfs_iunlock(ip, *iolock);
410a6de82caSJan Kara 		*iolock = XFS_IOLOCK_EXCL;
411354be7e3SChristoph Hellwig 		error = xfs_ilock_iocb(iocb, *iolock);
412354be7e3SChristoph Hellwig 		if (error) {
413354be7e3SChristoph Hellwig 			*iolock = 0;
414354be7e3SChristoph Hellwig 			return error;
415354be7e3SChristoph Hellwig 		}
416a6de82caSJan Kara 		goto restart;
417a6de82caSJan Kara 	}
418977ec4ddSDave Chinner 
419c59d87c4SChristoph Hellwig 	/*
420c59d87c4SChristoph Hellwig 	 * If the offset is beyond the size of the file, we need to zero any
421c59d87c4SChristoph Hellwig 	 * blocks that fall between the existing EOF and the start of this
422977ec4ddSDave Chinner 	 * write.  If zeroing is needed and we are currently holding the iolock
423977ec4ddSDave Chinner 	 * shared, we need to update it to exclusive which implies having to
424977ec4ddSDave Chinner 	 * redo all checks before.
425b9d59846SDave Chinner 	 *
426977ec4ddSDave Chinner 	 * We need to serialise against EOF updates that occur in IO completions
427977ec4ddSDave Chinner 	 * here. We want to make sure that nobody is changing the size while we
428977ec4ddSDave Chinner 	 * do this check until we have placed an IO barrier (i.e.  hold the
429977ec4ddSDave Chinner 	 * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.  The
430977ec4ddSDave Chinner 	 * spinlock effectively forms a memory barrier once we have the
431977ec4ddSDave Chinner 	 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
432977ec4ddSDave Chinner 	 * hence be able to correctly determine if we need to run zeroing.
433977ec4ddSDave Chinner 	 *
434977ec4ddSDave Chinner 	 * We can do an unlocked check here safely as IO completion can only
435977ec4ddSDave Chinner 	 * extend EOF. Truncate is locked out at this point, so the EOF can
436977ec4ddSDave Chinner 	 * not move backwards, only forwards. Hence we only need to take the
437977ec4ddSDave Chinner 	 * slow path and spin locks when we are at or beyond the current EOF.
438c59d87c4SChristoph Hellwig 	 */
439977ec4ddSDave Chinner 	if (iocb->ki_pos <= i_size_read(inode))
440977ec4ddSDave Chinner 		goto out;
441977ec4ddSDave Chinner 
442b9d59846SDave Chinner 	spin_lock(&ip->i_flags_lock);
443f5c54717SChristoph Hellwig 	isize = i_size_read(inode);
444f5c54717SChristoph Hellwig 	if (iocb->ki_pos > isize) {
445b9d59846SDave Chinner 		spin_unlock(&ip->i_flags_lock);
446354be7e3SChristoph Hellwig 
447354be7e3SChristoph Hellwig 		if (iocb->ki_flags & IOCB_NOWAIT)
448354be7e3SChristoph Hellwig 			return -EAGAIN;
449354be7e3SChristoph Hellwig 
4503136e8bbSBrian Foster 		if (!drained_dio) {
4517271d243SDave Chinner 			if (*iolock == XFS_IOLOCK_SHARED) {
45265523218SChristoph Hellwig 				xfs_iunlock(ip, *iolock);
4537271d243SDave Chinner 				*iolock = XFS_IOLOCK_EXCL;
45465523218SChristoph Hellwig 				xfs_ilock(ip, *iolock);
4553309dd04SAl Viro 				iov_iter_reexpand(from, count);
4563136e8bbSBrian Foster 			}
45740c63fbcSDave Chinner 			/*
45840c63fbcSDave Chinner 			 * We now have an IO submission barrier in place, but
45940c63fbcSDave Chinner 			 * AIO can do EOF updates during IO completion and hence
46040c63fbcSDave Chinner 			 * we now need to wait for all of them to drain. Non-AIO
46140c63fbcSDave Chinner 			 * DIO will have drained before we are given the
46240c63fbcSDave Chinner 			 * XFS_IOLOCK_EXCL, and so for most cases this wait is a
46340c63fbcSDave Chinner 			 * no-op.
46440c63fbcSDave Chinner 			 */
46540c63fbcSDave Chinner 			inode_dio_wait(inode);
4663136e8bbSBrian Foster 			drained_dio = true;
4677271d243SDave Chinner 			goto restart;
4687271d243SDave Chinner 		}
469f5c54717SChristoph Hellwig 
470f5c54717SChristoph Hellwig 		trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
471f1ba5fafSShiyang Ruan 		error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
472c59d87c4SChristoph Hellwig 		if (error)
473c59d87c4SChristoph Hellwig 			return error;
474b9d59846SDave Chinner 	} else
475b9d59846SDave Chinner 		spin_unlock(&ip->i_flags_lock);
476c59d87c4SChristoph Hellwig 
477977ec4ddSDave Chinner out:
4781aa91d9cSStefan Roesch 	return kiocb_modified(iocb);
479c59d87c4SChristoph Hellwig }
480c59d87c4SChristoph Hellwig 
481acdda3aaSChristoph Hellwig static int
xfs_dio_write_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned flags)482acdda3aaSChristoph Hellwig xfs_dio_write_end_io(
483acdda3aaSChristoph Hellwig 	struct kiocb		*iocb,
484acdda3aaSChristoph Hellwig 	ssize_t			size,
4856fe7b990SMatthew Bobrowski 	int			error,
486acdda3aaSChristoph Hellwig 	unsigned		flags)
487acdda3aaSChristoph Hellwig {
488acdda3aaSChristoph Hellwig 	struct inode		*inode = file_inode(iocb->ki_filp);
489acdda3aaSChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
490acdda3aaSChristoph Hellwig 	loff_t			offset = iocb->ki_pos;
49173d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
492acdda3aaSChristoph Hellwig 
493acdda3aaSChristoph Hellwig 	trace_xfs_end_io_direct_write(ip, offset, size);
494acdda3aaSChristoph Hellwig 
49575c8c50fSDave Chinner 	if (xfs_is_shutdown(ip->i_mount))
496acdda3aaSChristoph Hellwig 		return -EIO;
497acdda3aaSChristoph Hellwig 
4986fe7b990SMatthew Bobrowski 	if (error)
4996fe7b990SMatthew Bobrowski 		return error;
5006fe7b990SMatthew Bobrowski 	if (!size)
5016fe7b990SMatthew Bobrowski 		return 0;
502acdda3aaSChristoph Hellwig 
503ed5c3e66SDave Chinner 	/*
504ed5c3e66SDave Chinner 	 * Capture amount written on completion as we can't reliably account
505ed5c3e66SDave Chinner 	 * for it on submission.
506ed5c3e66SDave Chinner 	 */
507ed5c3e66SDave Chinner 	XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
508ed5c3e66SDave Chinner 
50973d30d48SChristoph Hellwig 	/*
51073d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
51173d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
51273d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
51373d30d48SChristoph Hellwig 	 */
51473d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
51573d30d48SChristoph Hellwig 
516ee70daabSEryu Guan 	if (flags & IOMAP_DIO_COW) {
517ee70daabSEryu Guan 		error = xfs_reflink_end_cow(ip, offset, size);
518ee70daabSEryu Guan 		if (error)
51973d30d48SChristoph Hellwig 			goto out;
520ee70daabSEryu Guan 	}
521ee70daabSEryu Guan 
522ee70daabSEryu Guan 	/*
523ee70daabSEryu Guan 	 * Unwritten conversion updates the in-core isize after extent
524ee70daabSEryu Guan 	 * conversion but before updating the on-disk size. Updating isize any
525ee70daabSEryu Guan 	 * earlier allows a racing dio read to find unwritten extents before
526ee70daabSEryu Guan 	 * they are converted.
527ee70daabSEryu Guan 	 */
52873d30d48SChristoph Hellwig 	if (flags & IOMAP_DIO_UNWRITTEN) {
52973d30d48SChristoph Hellwig 		error = xfs_iomap_write_unwritten(ip, offset, size, true);
53073d30d48SChristoph Hellwig 		goto out;
53173d30d48SChristoph Hellwig 	}
532ee70daabSEryu Guan 
533acdda3aaSChristoph Hellwig 	/*
534acdda3aaSChristoph Hellwig 	 * We need to update the in-core inode size here so that we don't end up
535acdda3aaSChristoph Hellwig 	 * with the on-disk inode size being outside the in-core inode size. We
536acdda3aaSChristoph Hellwig 	 * have no other method of updating EOF for AIO, so always do it here
537acdda3aaSChristoph Hellwig 	 * if necessary.
538acdda3aaSChristoph Hellwig 	 *
539acdda3aaSChristoph Hellwig 	 * We need to lock the test/set EOF update as we can be racing with
540acdda3aaSChristoph Hellwig 	 * other IO completions here to update the EOF. Failing to serialise
541acdda3aaSChristoph Hellwig 	 * here can result in EOF moving backwards and Bad Things Happen when
542acdda3aaSChristoph Hellwig 	 * that occurs.
543977ec4ddSDave Chinner 	 *
544977ec4ddSDave Chinner 	 * As IO completion only ever extends EOF, we can do an unlocked check
545977ec4ddSDave Chinner 	 * here to avoid taking the spinlock. If we land within the current EOF,
546977ec4ddSDave Chinner 	 * then we do not need to do an extending update at all, and we don't
547977ec4ddSDave Chinner 	 * need to take the lock to check this. If we race with an update moving
548977ec4ddSDave Chinner 	 * EOF, then we'll either still be beyond EOF and need to take the lock,
549977ec4ddSDave Chinner 	 * or we'll be within EOF and we don't need to take it at all.
550acdda3aaSChristoph Hellwig 	 */
551977ec4ddSDave Chinner 	if (offset + size <= i_size_read(inode))
552977ec4ddSDave Chinner 		goto out;
553977ec4ddSDave Chinner 
554acdda3aaSChristoph Hellwig 	spin_lock(&ip->i_flags_lock);
555acdda3aaSChristoph Hellwig 	if (offset + size > i_size_read(inode)) {
556acdda3aaSChristoph Hellwig 		i_size_write(inode, offset + size);
557acdda3aaSChristoph Hellwig 		spin_unlock(&ip->i_flags_lock);
558acdda3aaSChristoph Hellwig 		error = xfs_setfilesize(ip, offset, size);
559ee70daabSEryu Guan 	} else {
560ee70daabSEryu Guan 		spin_unlock(&ip->i_flags_lock);
561ee70daabSEryu Guan 	}
562acdda3aaSChristoph Hellwig 
56373d30d48SChristoph Hellwig out:
56473d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
565acdda3aaSChristoph Hellwig 	return error;
566acdda3aaSChristoph Hellwig }
567acdda3aaSChristoph Hellwig 
568838c4f3dSChristoph Hellwig static const struct iomap_dio_ops xfs_dio_write_ops = {
569838c4f3dSChristoph Hellwig 	.end_io		= xfs_dio_write_end_io,
570838c4f3dSChristoph Hellwig };
571838c4f3dSChristoph Hellwig 
572c59d87c4SChristoph Hellwig /*
573caa89dbcSDave Chinner  * Handle block aligned direct I/O writes
574c59d87c4SChristoph Hellwig  */
575caa89dbcSDave Chinner static noinline ssize_t
xfs_file_dio_write_aligned(struct xfs_inode * ip,struct kiocb * iocb,struct iov_iter * from)576caa89dbcSDave Chinner xfs_file_dio_write_aligned(
577caa89dbcSDave Chinner 	struct xfs_inode	*ip,
578c59d87c4SChristoph Hellwig 	struct kiocb		*iocb,
579b3188919SAl Viro 	struct iov_iter		*from)
580c59d87c4SChristoph Hellwig {
581a1033753SDave Chinner 	unsigned int		iolock = XFS_IOLOCK_SHARED;
582caa89dbcSDave Chinner 	ssize_t			ret;
583c59d87c4SChristoph Hellwig 
584d7d84772SCatherine Hoang 	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
585caa89dbcSDave Chinner 	if (ret)
586caa89dbcSDave Chinner 		return ret;
587caa89dbcSDave Chinner 	ret = xfs_file_write_checks(iocb, from, &iolock);
588caa89dbcSDave Chinner 	if (ret)
589caa89dbcSDave Chinner 		goto out_unlock;
590c59d87c4SChristoph Hellwig 
5910ee7a3f6SChristoph Hellwig 	/*
592caa89dbcSDave Chinner 	 * We don't need to hold the IOLOCK exclusively across the IO, so demote
593caa89dbcSDave Chinner 	 * the iolock back to shared if we had to take the exclusive lock in
594caa89dbcSDave Chinner 	 * xfs_file_write_checks() for other reasons.
5950ee7a3f6SChristoph Hellwig 	 */
596caa89dbcSDave Chinner 	if (iolock == XFS_IOLOCK_EXCL) {
597caa89dbcSDave Chinner 		xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
598caa89dbcSDave Chinner 		iolock = XFS_IOLOCK_SHARED;
599caa89dbcSDave Chinner 	}
600caa89dbcSDave Chinner 	trace_xfs_file_direct_write(iocb, from);
601caa89dbcSDave Chinner 	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
602786f847fSChristoph Hellwig 			   &xfs_dio_write_ops, 0, NULL, 0);
603caa89dbcSDave Chinner out_unlock:
604caa89dbcSDave Chinner 	if (iolock)
605caa89dbcSDave Chinner 		xfs_iunlock(ip, iolock);
606caa89dbcSDave Chinner 	return ret;
607caa89dbcSDave Chinner }
60854a4ef8aSChristoph Hellwig 
60954a4ef8aSChristoph Hellwig /*
610caa89dbcSDave Chinner  * Handle block unaligned direct I/O writes
611caa89dbcSDave Chinner  *
612caa89dbcSDave Chinner  * In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
613caa89dbcSDave Chinner  * them to be done in parallel with reads and other direct I/O writes.  However,
614caa89dbcSDave Chinner  * if the I/O is not aligned to filesystem blocks, the direct I/O layer may need
615caa89dbcSDave Chinner  * to do sub-block zeroing and that requires serialisation against other direct
616caa89dbcSDave Chinner  * I/O to the same block.  In this case we need to serialise the submission of
617caa89dbcSDave Chinner  * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
618ed1128c2SDave Chinner  * In the case where sub-block zeroing is not required, we can do concurrent
619ed1128c2SDave Chinner  * sub-block dios to the same block successfully.
620caa89dbcSDave Chinner  *
621ed1128c2SDave Chinner  * Optimistically submit the I/O using the shared lock first, but use the
622ed1128c2SDave Chinner  * IOMAP_DIO_OVERWRITE_ONLY flag to tell the lower layers to return -EAGAIN
623ed1128c2SDave Chinner  * if block allocation or partial block zeroing would be required.  In that case
624ed1128c2SDave Chinner  * we try again with the exclusive lock.
625caa89dbcSDave Chinner  */
626caa89dbcSDave Chinner static noinline ssize_t
xfs_file_dio_write_unaligned(struct xfs_inode * ip,struct kiocb * iocb,struct iov_iter * from)627caa89dbcSDave Chinner xfs_file_dio_write_unaligned(
628caa89dbcSDave Chinner 	struct xfs_inode	*ip,
629caa89dbcSDave Chinner 	struct kiocb		*iocb,
630caa89dbcSDave Chinner 	struct iov_iter		*from)
631caa89dbcSDave Chinner {
632ed1128c2SDave Chinner 	size_t			isize = i_size_read(VFS_I(ip));
633ed1128c2SDave Chinner 	size_t			count = iov_iter_count(from);
634a1033753SDave Chinner 	unsigned int		iolock = XFS_IOLOCK_SHARED;
635ed1128c2SDave Chinner 	unsigned int		flags = IOMAP_DIO_OVERWRITE_ONLY;
636caa89dbcSDave Chinner 	ssize_t			ret;
637caa89dbcSDave Chinner 
638ed1128c2SDave Chinner 	/*
639ed1128c2SDave Chinner 	 * Extending writes need exclusivity because of the sub-block zeroing
640ed1128c2SDave Chinner 	 * that the DIO code always does for partial tail blocks beyond EOF, so
641ed1128c2SDave Chinner 	 * don't even bother trying the fast path in this case.
642ed1128c2SDave Chinner 	 */
643ed1128c2SDave Chinner 	if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
644caa89dbcSDave Chinner 		if (iocb->ki_flags & IOCB_NOWAIT)
645caa89dbcSDave Chinner 			return -EAGAIN;
64693e6aa43SKaixu Xia retry_exclusive:
647ed1128c2SDave Chinner 		iolock = XFS_IOLOCK_EXCL;
648ed1128c2SDave Chinner 		flags = IOMAP_DIO_FORCE_WAIT;
649ed1128c2SDave Chinner 	}
650ed1128c2SDave Chinner 
651d7d84772SCatherine Hoang 	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
652ed1128c2SDave Chinner 	if (ret)
653ed1128c2SDave Chinner 		return ret;
654caa89dbcSDave Chinner 
655caa89dbcSDave Chinner 	/*
656caa89dbcSDave Chinner 	 * We can't properly handle unaligned direct I/O to reflink files yet,
657caa89dbcSDave Chinner 	 * as we can't unshare a partial block.
65854a4ef8aSChristoph Hellwig 	 */
65966ae56a5SChristoph Hellwig 	if (xfs_is_cow_inode(ip)) {
660896f72d0SChristoph Hellwig 		trace_xfs_reflink_bounce_dio_write(iocb, from);
661caa89dbcSDave Chinner 		ret = -ENOTBLK;
662caa89dbcSDave Chinner 		goto out_unlock;
66329a5d29eSGoldwyn Rodrigues 	}
6640ee7a3f6SChristoph Hellwig 
665ee1b218bSChristoph Hellwig 	ret = xfs_file_write_checks(iocb, from, &iolock);
666c59d87c4SChristoph Hellwig 	if (ret)
667caa89dbcSDave Chinner 		goto out_unlock;
668c59d87c4SChristoph Hellwig 
669c59d87c4SChristoph Hellwig 	/*
670ed1128c2SDave Chinner 	 * If we are doing exclusive unaligned I/O, this must be the only I/O
671ed1128c2SDave Chinner 	 * in-flight.  Otherwise we risk data corruption due to unwritten extent
672ed1128c2SDave Chinner 	 * conversions from the AIO end_io handler.  Wait for all other I/O to
673ed1128c2SDave Chinner 	 * drain first.
674c59d87c4SChristoph Hellwig 	 */
675ed1128c2SDave Chinner 	if (flags & IOMAP_DIO_FORCE_WAIT)
676caa89dbcSDave Chinner 		inode_dio_wait(VFS_I(ip));
677c59d87c4SChristoph Hellwig 
6783e40b13cSChristoph Hellwig 	trace_xfs_file_direct_write(iocb, from);
679f150b423SChristoph Hellwig 	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
680786f847fSChristoph Hellwig 			   &xfs_dio_write_ops, flags, NULL, 0);
681ed1128c2SDave Chinner 
682ed1128c2SDave Chinner 	/*
683ed1128c2SDave Chinner 	 * Retry unaligned I/O with exclusive blocking semantics if the DIO
684ed1128c2SDave Chinner 	 * layer rejected it for mapping or locking reasons. If we are doing
685ed1128c2SDave Chinner 	 * nonblocking user I/O, propagate the error.
686ed1128c2SDave Chinner 	 */
687ed1128c2SDave Chinner 	if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
688ed1128c2SDave Chinner 		ASSERT(flags & IOMAP_DIO_OVERWRITE_ONLY);
689ed1128c2SDave Chinner 		xfs_iunlock(ip, iolock);
690ed1128c2SDave Chinner 		goto retry_exclusive;
691ed1128c2SDave Chinner 	}
692ed1128c2SDave Chinner 
693caa89dbcSDave Chinner out_unlock:
694354be7e3SChristoph Hellwig 	if (iolock)
69565523218SChristoph Hellwig 		xfs_iunlock(ip, iolock);
69616d4d435SChristoph Hellwig 	return ret;
69716d4d435SChristoph Hellwig }
69816d4d435SChristoph Hellwig 
699caa89dbcSDave Chinner static ssize_t
xfs_file_dio_write(struct kiocb * iocb,struct iov_iter * from)700caa89dbcSDave Chinner xfs_file_dio_write(
701caa89dbcSDave Chinner 	struct kiocb		*iocb,
702caa89dbcSDave Chinner 	struct iov_iter		*from)
703caa89dbcSDave Chinner {
704caa89dbcSDave Chinner 	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
705caa89dbcSDave Chinner 	struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
706caa89dbcSDave Chinner 	size_t			count = iov_iter_count(from);
707caa89dbcSDave Chinner 
708caa89dbcSDave Chinner 	/* direct I/O must be aligned to device logical sector size */
709caa89dbcSDave Chinner 	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
710caa89dbcSDave Chinner 		return -EINVAL;
711caa89dbcSDave Chinner 	if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
712caa89dbcSDave Chinner 		return xfs_file_dio_write_unaligned(ip, iocb, from);
713caa89dbcSDave Chinner 	return xfs_file_dio_write_aligned(ip, iocb, from);
714caa89dbcSDave Chinner }
715caa89dbcSDave Chinner 
716f021bd07SArnd Bergmann static noinline ssize_t
xfs_file_dax_write(struct kiocb * iocb,struct iov_iter * from)71716d4d435SChristoph Hellwig xfs_file_dax_write(
71816d4d435SChristoph Hellwig 	struct kiocb		*iocb,
71916d4d435SChristoph Hellwig 	struct iov_iter		*from)
72016d4d435SChristoph Hellwig {
7216c31f495SChristoph Hellwig 	struct inode		*inode = iocb->ki_filp->f_mapping->host;
72216d4d435SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
723a1033753SDave Chinner 	unsigned int		iolock = XFS_IOLOCK_EXCL;
7246c31f495SChristoph Hellwig 	ssize_t			ret, error = 0;
7256c31f495SChristoph Hellwig 	loff_t			pos;
72616d4d435SChristoph Hellwig 
727f50b8f47SChristoph Hellwig 	ret = xfs_ilock_iocb(iocb, iolock);
728f50b8f47SChristoph Hellwig 	if (ret)
729f50b8f47SChristoph Hellwig 		return ret;
730ee1b218bSChristoph Hellwig 	ret = xfs_file_write_checks(iocb, from, &iolock);
73116d4d435SChristoph Hellwig 	if (ret)
73216d4d435SChristoph Hellwig 		goto out;
73316d4d435SChristoph Hellwig 
7346c31f495SChristoph Hellwig 	pos = iocb->ki_pos;
7358b2180b3SDave Chinner 
7363e40b13cSChristoph Hellwig 	trace_xfs_file_dax_write(iocb, from);
737ea6c49b7SShiyang Ruan 	ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops);
7386c31f495SChristoph Hellwig 	if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
7396c31f495SChristoph Hellwig 		i_size_write(inode, iocb->ki_pos);
7406c31f495SChristoph Hellwig 		error = xfs_setfilesize(ip, pos, ret);
74116d4d435SChristoph Hellwig 	}
74216d4d435SChristoph Hellwig out:
743354be7e3SChristoph Hellwig 	if (iolock)
74465523218SChristoph Hellwig 		xfs_iunlock(ip, iolock);
745ed5c3e66SDave Chinner 	if (error)
746ed5c3e66SDave Chinner 		return error;
747ed5c3e66SDave Chinner 
748ed5c3e66SDave Chinner 	if (ret > 0) {
749ed5c3e66SDave Chinner 		XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
750ed5c3e66SDave Chinner 
751ed5c3e66SDave Chinner 		/* Handle various SYNC-type writes */
752ed5c3e66SDave Chinner 		ret = generic_write_sync(iocb, ret);
753ed5c3e66SDave Chinner 	}
754ed5c3e66SDave Chinner 	return ret;
755c59d87c4SChristoph Hellwig }
756c59d87c4SChristoph Hellwig 
757c59d87c4SChristoph Hellwig STATIC ssize_t
xfs_file_buffered_write(struct kiocb * iocb,struct iov_iter * from)758ee1b218bSChristoph Hellwig xfs_file_buffered_write(
759c59d87c4SChristoph Hellwig 	struct kiocb		*iocb,
760b3188919SAl Viro 	struct iov_iter		*from)
761c59d87c4SChristoph Hellwig {
7622d9ac431SKaixu Xia 	struct inode		*inode = iocb->ki_filp->f_mapping->host;
763c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
764c59d87c4SChristoph Hellwig 	ssize_t			ret;
765a636b1d1SDarrick J. Wong 	bool			cleared_space = false;
766a1033753SDave Chinner 	unsigned int		iolock;
767c59d87c4SChristoph Hellwig 
768c3155097SBrian Foster write_retry:
769c3155097SBrian Foster 	iolock = XFS_IOLOCK_EXCL;
7701aa91d9cSStefan Roesch 	ret = xfs_ilock_iocb(iocb, iolock);
7711aa91d9cSStefan Roesch 	if (ret)
7721aa91d9cSStefan Roesch 		return ret;
773c59d87c4SChristoph Hellwig 
774ee1b218bSChristoph Hellwig 	ret = xfs_file_write_checks(iocb, from, &iolock);
775c59d87c4SChristoph Hellwig 	if (ret)
776d0606464SChristoph Hellwig 		goto out;
777c59d87c4SChristoph Hellwig 
7783e40b13cSChristoph Hellwig 	trace_xfs_file_buffered_write(iocb, from);
779f150b423SChristoph Hellwig 	ret = iomap_file_buffered_write(iocb, from,
780f150b423SChristoph Hellwig 			&xfs_buffered_write_iomap_ops);
781dc06f398SBrian Foster 
782c59d87c4SChristoph Hellwig 	/*
783dc06f398SBrian Foster 	 * If we hit a space limit, try to free up some lingering preallocated
784dc06f398SBrian Foster 	 * space before returning an error. In the case of ENOSPC, first try to
785dc06f398SBrian Foster 	 * write back all dirty inodes to free up some of the excess reserved
786dc06f398SBrian Foster 	 * metadata space. This reduces the chances that the eofblocks scan
787dc06f398SBrian Foster 	 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
788dc06f398SBrian Foster 	 * also behaves as a filter to prevent too many eofblocks scans from
789111068f8SDarrick J. Wong 	 * running at the same time.  Use a synchronous scan to increase the
790111068f8SDarrick J. Wong 	 * effectiveness of the scan.
791c59d87c4SChristoph Hellwig 	 */
792a636b1d1SDarrick J. Wong 	if (ret == -EDQUOT && !cleared_space) {
793c3155097SBrian Foster 		xfs_iunlock(ip, iolock);
7942d53f66bSDarrick J. Wong 		xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
795111068f8SDarrick J. Wong 		cleared_space = true;
796dc06f398SBrian Foster 		goto write_retry;
797a636b1d1SDarrick J. Wong 	} else if (ret == -ENOSPC && !cleared_space) {
798b26b2bf1SDarrick J. Wong 		struct xfs_icwalk	icw = {0};
799dc06f398SBrian Foster 
800a636b1d1SDarrick J. Wong 		cleared_space = true;
8019aa05000SDave Chinner 		xfs_flush_inodes(ip->i_mount);
802c3155097SBrian Foster 
803c3155097SBrian Foster 		xfs_iunlock(ip, iolock);
804b26b2bf1SDarrick J. Wong 		icw.icw_flags = XFS_ICWALK_FLAG_SYNC;
805b26b2bf1SDarrick J. Wong 		xfs_blockgc_free_space(ip->i_mount, &icw);
806c59d87c4SChristoph Hellwig 		goto write_retry;
807c59d87c4SChristoph Hellwig 	}
808d0606464SChristoph Hellwig 
809d0606464SChristoph Hellwig out:
810c3155097SBrian Foster 	if (iolock)
81165523218SChristoph Hellwig 		xfs_iunlock(ip, iolock);
812ed5c3e66SDave Chinner 
813ed5c3e66SDave Chinner 	if (ret > 0) {
814ed5c3e66SDave Chinner 		XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
815ed5c3e66SDave Chinner 		/* Handle various SYNC-type writes */
816ed5c3e66SDave Chinner 		ret = generic_write_sync(iocb, ret);
817ed5c3e66SDave Chinner 	}
818c59d87c4SChristoph Hellwig 	return ret;
819c59d87c4SChristoph Hellwig }
820c59d87c4SChristoph Hellwig 
821c59d87c4SChristoph Hellwig STATIC ssize_t
xfs_file_write_iter(struct kiocb * iocb,struct iov_iter * from)822bf97f3bcSAl Viro xfs_file_write_iter(
823c59d87c4SChristoph Hellwig 	struct kiocb		*iocb,
824bf97f3bcSAl Viro 	struct iov_iter		*from)
825c59d87c4SChristoph Hellwig {
8262d9ac431SKaixu Xia 	struct inode		*inode = iocb->ki_filp->f_mapping->host;
827c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
828c59d87c4SChristoph Hellwig 	ssize_t			ret;
829bf97f3bcSAl Viro 	size_t			ocount = iov_iter_count(from);
830c59d87c4SChristoph Hellwig 
831ff6d6af2SBill O'Donnell 	XFS_STATS_INC(ip->i_mount, xs_write_calls);
832c59d87c4SChristoph Hellwig 
833c59d87c4SChristoph Hellwig 	if (ocount == 0)
834c59d87c4SChristoph Hellwig 		return 0;
835c59d87c4SChristoph Hellwig 
83675c8c50fSDave Chinner 	if (xfs_is_shutdown(ip->i_mount))
837bf97f3bcSAl Viro 		return -EIO;
838c59d87c4SChristoph Hellwig 
83916d4d435SChristoph Hellwig 	if (IS_DAX(inode))
840ed5c3e66SDave Chinner 		return xfs_file_dax_write(iocb, from);
841ed5c3e66SDave Chinner 
842ed5c3e66SDave Chinner 	if (iocb->ki_flags & IOCB_DIRECT) {
8430613f16cSDarrick J. Wong 		/*
8440613f16cSDarrick J. Wong 		 * Allow a directio write to fall back to a buffered
8450613f16cSDarrick J. Wong 		 * write *only* in the case that we're doing a reflink
8460613f16cSDarrick J. Wong 		 * CoW.  In all other directio scenarios we do not
8470613f16cSDarrick J. Wong 		 * allow an operation to fall back to buffered mode.
8480613f16cSDarrick J. Wong 		 */
849ee1b218bSChristoph Hellwig 		ret = xfs_file_dio_write(iocb, from);
85080e543aeSChristoph Hellwig 		if (ret != -ENOTBLK)
851c59d87c4SChristoph Hellwig 			return ret;
852c59d87c4SChristoph Hellwig 	}
853c59d87c4SChristoph Hellwig 
854ee1b218bSChristoph Hellwig 	return xfs_file_buffered_write(iocb, from);
855ed5c3e66SDave Chinner }
856ed5c3e66SDave Chinner 
857d6dc57e2SDan Williams static void
xfs_wait_dax_page(struct inode * inode)858d6dc57e2SDan Williams xfs_wait_dax_page(
859e25ff835SDave Jiang 	struct inode		*inode)
860d6dc57e2SDan Williams {
861d6dc57e2SDan Williams 	struct xfs_inode        *ip = XFS_I(inode);
862d6dc57e2SDan Williams 
863d6dc57e2SDan Williams 	xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
864d6dc57e2SDan Williams 	schedule();
865d6dc57e2SDan Williams 	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
866d6dc57e2SDan Williams }
867d6dc57e2SDan Williams 
86813f9e267SShiyang Ruan int
xfs_break_dax_layouts(struct inode * inode,bool * retry)869d6dc57e2SDan Williams xfs_break_dax_layouts(
870d6dc57e2SDan Williams 	struct inode		*inode,
871e25ff835SDave Jiang 	bool			*retry)
872d6dc57e2SDan Williams {
873d6dc57e2SDan Williams 	struct page		*page;
874d6dc57e2SDan Williams 
875d6dc57e2SDan Williams 	ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
876d6dc57e2SDan Williams 
877d6dc57e2SDan Williams 	page = dax_layout_busy_page(inode->i_mapping);
878d6dc57e2SDan Williams 	if (!page)
879d6dc57e2SDan Williams 		return 0;
880d6dc57e2SDan Williams 
881e25ff835SDave Jiang 	*retry = true;
882d6dc57e2SDan Williams 	return ___wait_var_event(&page->_refcount,
883d6dc57e2SDan Williams 			atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
884e25ff835SDave Jiang 			0, 0, xfs_wait_dax_page(inode));
885d6dc57e2SDan Williams }
886d6dc57e2SDan Williams 
88769eb5fa1SDan Williams int
xfs_break_layouts(struct inode * inode,uint * iolock,enum layout_break_reason reason)88869eb5fa1SDan Williams xfs_break_layouts(
88969eb5fa1SDan Williams 	struct inode		*inode,
89069eb5fa1SDan Williams 	uint			*iolock,
89169eb5fa1SDan Williams 	enum layout_break_reason reason)
89269eb5fa1SDan Williams {
89369eb5fa1SDan Williams 	bool			retry;
894d6dc57e2SDan Williams 	int			error;
89569eb5fa1SDan Williams 
89669eb5fa1SDan Williams 	ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
89769eb5fa1SDan Williams 
898d6dc57e2SDan Williams 	do {
899d6dc57e2SDan Williams 		retry = false;
90069eb5fa1SDan Williams 		switch (reason) {
90169eb5fa1SDan Williams 		case BREAK_UNMAP:
902a4722a64SEric Sandeen 			error = xfs_break_dax_layouts(inode, &retry);
903d6dc57e2SDan Williams 			if (error || retry)
904d6dc57e2SDan Williams 				break;
90553004ee7SGustavo A. R. Silva 			fallthrough;
90669eb5fa1SDan Williams 		case BREAK_WRITE:
907d6dc57e2SDan Williams 			error = xfs_break_leased_layouts(inode, iolock, &retry);
908d6dc57e2SDan Williams 			break;
90969eb5fa1SDan Williams 		default:
91069eb5fa1SDan Williams 			WARN_ON_ONCE(1);
911d6dc57e2SDan Williams 			error = -EINVAL;
91269eb5fa1SDan Williams 		}
913d6dc57e2SDan Williams 	} while (error == 0 && retry);
914d6dc57e2SDan Williams 
915d6dc57e2SDan Williams 	return error;
91669eb5fa1SDan Williams }
91769eb5fa1SDan Williams 
918cea267c2SDave Chinner /* Does this file, inode, or mount want synchronous writes? */
xfs_file_sync_writes(struct file * filp)919cea267c2SDave Chinner static inline bool xfs_file_sync_writes(struct file *filp)
920cea267c2SDave Chinner {
921cea267c2SDave Chinner 	struct xfs_inode	*ip = XFS_I(file_inode(filp));
922cea267c2SDave Chinner 
923cea267c2SDave Chinner 	if (xfs_has_wsync(ip->i_mount))
924cea267c2SDave Chinner 		return true;
925cea267c2SDave Chinner 	if (filp->f_flags & (__O_SYNC | O_DSYNC))
926cea267c2SDave Chinner 		return true;
927cea267c2SDave Chinner 	if (IS_SYNC(file_inode(filp)))
928cea267c2SDave Chinner 		return true;
929cea267c2SDave Chinner 
930cea267c2SDave Chinner 	return false;
931cea267c2SDave Chinner }
932cea267c2SDave Chinner 
933a904b1caSNamjae Jeon #define	XFS_FALLOC_FL_SUPPORTED						\
934a904b1caSNamjae Jeon 		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
935a904b1caSNamjae Jeon 		 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |	\
93698cc2db5SDarrick J. Wong 		 FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
937a904b1caSNamjae Jeon 
938c59d87c4SChristoph Hellwig STATIC long
xfs_file_fallocate(struct file * file,int mode,loff_t offset,loff_t len)939c59d87c4SChristoph Hellwig xfs_file_fallocate(
940c59d87c4SChristoph Hellwig 	struct file		*file,
941c59d87c4SChristoph Hellwig 	int			mode,
942c59d87c4SChristoph Hellwig 	loff_t			offset,
943c59d87c4SChristoph Hellwig 	loff_t			len)
944c59d87c4SChristoph Hellwig {
945496ad9aaSAl Viro 	struct inode		*inode = file_inode(file);
94683aee9e4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
947c59d87c4SChristoph Hellwig 	long			error;
948c63a8eaeSDan Williams 	uint			iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
949c59d87c4SChristoph Hellwig 	loff_t			new_size = 0;
950749f24f3SThomas Meyer 	bool			do_file_insert = false;
951c59d87c4SChristoph Hellwig 
95283aee9e4SChristoph Hellwig 	if (!S_ISREG(inode->i_mode))
95383aee9e4SChristoph Hellwig 		return -EINVAL;
954a904b1caSNamjae Jeon 	if (mode & ~XFS_FALLOC_FL_SUPPORTED)
955c59d87c4SChristoph Hellwig 		return -EOPNOTSUPP;
956c59d87c4SChristoph Hellwig 
957781355c6SChristoph Hellwig 	xfs_ilock(ip, iolock);
95869eb5fa1SDan Williams 	error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
959781355c6SChristoph Hellwig 	if (error)
960781355c6SChristoph Hellwig 		goto out_unlock;
961781355c6SChristoph Hellwig 
962249bd908SDave Chinner 	/*
963249bd908SDave Chinner 	 * Must wait for all AIO to complete before we continue as AIO can
964249bd908SDave Chinner 	 * change the file size on completion without holding any locks we
965249bd908SDave Chinner 	 * currently hold. We must do this first because AIO can update both
966249bd908SDave Chinner 	 * the on disk and in memory inode sizes, and the operations that follow
967249bd908SDave Chinner 	 * require the in-memory size to be fully up-to-date.
968249bd908SDave Chinner 	 */
969249bd908SDave Chinner 	inode_dio_wait(inode);
970249bd908SDave Chinner 
971249bd908SDave Chinner 	/*
972249bd908SDave Chinner 	 * Now AIO and DIO has drained we flush and (if necessary) invalidate
973249bd908SDave Chinner 	 * the cached range over the first operation we are about to run.
974249bd908SDave Chinner 	 *
975249bd908SDave Chinner 	 * We care about zero and collapse here because they both run a hole
976249bd908SDave Chinner 	 * punch over the range first. Because that can zero data, and the range
977249bd908SDave Chinner 	 * of invalidation for the shift operations is much larger, we still do
978249bd908SDave Chinner 	 * the required flush for collapse in xfs_prepare_shift().
979249bd908SDave Chinner 	 *
980249bd908SDave Chinner 	 * Insert has the same range requirements as collapse, and we extend the
981249bd908SDave Chinner 	 * file first which can zero data. Hence insert has the same
982249bd908SDave Chinner 	 * flush/invalidate requirements as collapse and so they are both
983249bd908SDave Chinner 	 * handled at the right time by xfs_prepare_shift().
984249bd908SDave Chinner 	 */
985249bd908SDave Chinner 	if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
986249bd908SDave Chinner 		    FALLOC_FL_COLLAPSE_RANGE)) {
987249bd908SDave Chinner 		error = xfs_flush_unmap_range(ip, offset, len);
988249bd908SDave Chinner 		if (error)
989249bd908SDave Chinner 			goto out_unlock;
990249bd908SDave Chinner 	}
991249bd908SDave Chinner 
992fbe7e520SDave Chinner 	error = file_modified(file);
993fbe7e520SDave Chinner 	if (error)
994fbe7e520SDave Chinner 		goto out_unlock;
995fbe7e520SDave Chinner 
99683aee9e4SChristoph Hellwig 	if (mode & FALLOC_FL_PUNCH_HOLE) {
99783aee9e4SChristoph Hellwig 		error = xfs_free_file_space(ip, offset, len);
99883aee9e4SChristoph Hellwig 		if (error)
99983aee9e4SChristoph Hellwig 			goto out_unlock;
1000e1d8fb88SNamjae Jeon 	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
100125219dbfSDarrick J. Wong 		if (!xfs_is_falloc_aligned(ip, offset, len)) {
10022451337dSDave Chinner 			error = -EINVAL;
1003e1d8fb88SNamjae Jeon 			goto out_unlock;
1004e1d8fb88SNamjae Jeon 		}
1005e1d8fb88SNamjae Jeon 
100623fffa92SLukas Czerner 		/*
100723fffa92SLukas Czerner 		 * There is no need to overlap collapse range with EOF,
100823fffa92SLukas Czerner 		 * in which case it is effectively a truncate operation
100923fffa92SLukas Czerner 		 */
101023fffa92SLukas Czerner 		if (offset + len >= i_size_read(inode)) {
10112451337dSDave Chinner 			error = -EINVAL;
101223fffa92SLukas Czerner 			goto out_unlock;
101323fffa92SLukas Czerner 		}
101423fffa92SLukas Czerner 
1015e1d8fb88SNamjae Jeon 		new_size = i_size_read(inode) - len;
1016e1d8fb88SNamjae Jeon 
1017e1d8fb88SNamjae Jeon 		error = xfs_collapse_file_space(ip, offset, len);
1018e1d8fb88SNamjae Jeon 		if (error)
1019e1d8fb88SNamjae Jeon 			goto out_unlock;
1020a904b1caSNamjae Jeon 	} else if (mode & FALLOC_FL_INSERT_RANGE) {
10217d83fb14SDarrick J. Wong 		loff_t		isize = i_size_read(inode);
1022a904b1caSNamjae Jeon 
102325219dbfSDarrick J. Wong 		if (!xfs_is_falloc_aligned(ip, offset, len)) {
1024a904b1caSNamjae Jeon 			error = -EINVAL;
1025a904b1caSNamjae Jeon 			goto out_unlock;
1026a904b1caSNamjae Jeon 		}
1027a904b1caSNamjae Jeon 
10287d83fb14SDarrick J. Wong 		/*
10297d83fb14SDarrick J. Wong 		 * New inode size must not exceed ->s_maxbytes, accounting for
10307d83fb14SDarrick J. Wong 		 * possible signed overflow.
10317d83fb14SDarrick J. Wong 		 */
10327d83fb14SDarrick J. Wong 		if (inode->i_sb->s_maxbytes - isize < len) {
1033a904b1caSNamjae Jeon 			error = -EFBIG;
1034a904b1caSNamjae Jeon 			goto out_unlock;
1035a904b1caSNamjae Jeon 		}
10367d83fb14SDarrick J. Wong 		new_size = isize + len;
1037a904b1caSNamjae Jeon 
1038a904b1caSNamjae Jeon 		/* Offset should be less than i_size */
10397d83fb14SDarrick J. Wong 		if (offset >= isize) {
1040a904b1caSNamjae Jeon 			error = -EINVAL;
1041a904b1caSNamjae Jeon 			goto out_unlock;
1042a904b1caSNamjae Jeon 		}
1043749f24f3SThomas Meyer 		do_file_insert = true;
104483aee9e4SChristoph Hellwig 	} else {
1045c59d87c4SChristoph Hellwig 		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
1046c59d87c4SChristoph Hellwig 		    offset + len > i_size_read(inode)) {
1047c59d87c4SChristoph Hellwig 			new_size = offset + len;
10482451337dSDave Chinner 			error = inode_newsize_ok(inode, new_size);
1049c59d87c4SChristoph Hellwig 			if (error)
1050c59d87c4SChristoph Hellwig 				goto out_unlock;
1051c59d87c4SChristoph Hellwig 		}
1052c59d87c4SChristoph Hellwig 
105366ae56a5SChristoph Hellwig 		if (mode & FALLOC_FL_ZERO_RANGE) {
1054360c09c0SChristoph Hellwig 			/*
1055360c09c0SChristoph Hellwig 			 * Punch a hole and prealloc the range.  We use a hole
1056360c09c0SChristoph Hellwig 			 * punch rather than unwritten extent conversion for two
1057360c09c0SChristoph Hellwig 			 * reasons:
1058360c09c0SChristoph Hellwig 			 *
1059360c09c0SChristoph Hellwig 			 *   1.) Hole punch handles partial block zeroing for us.
1060360c09c0SChristoph Hellwig 			 *   2.) If prealloc returns ENOSPC, the file range is
1061360c09c0SChristoph Hellwig 			 *       still zero-valued by virtue of the hole punch.
1062360c09c0SChristoph Hellwig 			 */
1063360c09c0SChristoph Hellwig 			unsigned int blksize = i_blocksize(inode);
1064360c09c0SChristoph Hellwig 
1065360c09c0SChristoph Hellwig 			trace_xfs_zero_file_space(ip);
1066360c09c0SChristoph Hellwig 
1067360c09c0SChristoph Hellwig 			error = xfs_free_file_space(ip, offset, len);
1068360c09c0SChristoph Hellwig 			if (error)
1069360c09c0SChristoph Hellwig 				goto out_unlock;
1070360c09c0SChristoph Hellwig 
1071360c09c0SChristoph Hellwig 			len = round_up(offset + len, blksize) -
1072360c09c0SChristoph Hellwig 			      round_down(offset, blksize);
1073360c09c0SChristoph Hellwig 			offset = round_down(offset, blksize);
107466ae56a5SChristoph Hellwig 		} else if (mode & FALLOC_FL_UNSHARE_RANGE) {
107598cc2db5SDarrick J. Wong 			error = xfs_reflink_unshare(ip, offset, len);
107698cc2db5SDarrick J. Wong 			if (error)
107798cc2db5SDarrick J. Wong 				goto out_unlock;
107866ae56a5SChristoph Hellwig 		} else {
107966ae56a5SChristoph Hellwig 			/*
108066ae56a5SChristoph Hellwig 			 * If always_cow mode we can't use preallocations and
108166ae56a5SChristoph Hellwig 			 * thus should not create them.
108266ae56a5SChristoph Hellwig 			 */
108366ae56a5SChristoph Hellwig 			if (xfs_is_always_cow_inode(ip)) {
108466ae56a5SChristoph Hellwig 				error = -EOPNOTSUPP;
108566ae56a5SChristoph Hellwig 				goto out_unlock;
108666ae56a5SChristoph Hellwig 			}
1087360c09c0SChristoph Hellwig 		}
108866ae56a5SChristoph Hellwig 
1089360c09c0SChristoph Hellwig 		if (!xfs_is_always_cow_inode(ip)) {
10904d1b97f9SDarrick J. Wong 			error = xfs_alloc_file_space(ip, offset, len);
109183aee9e4SChristoph Hellwig 			if (error)
109283aee9e4SChristoph Hellwig 				goto out_unlock;
109383aee9e4SChristoph Hellwig 		}
1094fbe7e520SDave Chinner 	}
1095fbe7e520SDave Chinner 
1096c59d87c4SChristoph Hellwig 	/* Change file size if needed */
1097c59d87c4SChristoph Hellwig 	if (new_size) {
1098c59d87c4SChristoph Hellwig 		struct iattr iattr;
1099c59d87c4SChristoph Hellwig 
1100c59d87c4SChristoph Hellwig 		iattr.ia_valid = ATTR_SIZE;
1101c59d87c4SChristoph Hellwig 		iattr.ia_size = new_size;
1102c1632a0fSChristian Brauner 		error = xfs_vn_setattr_size(file_mnt_idmap(file),
1103f736d93dSChristoph Hellwig 					    file_dentry(file), &iattr);
1104a904b1caSNamjae Jeon 		if (error)
1105a904b1caSNamjae Jeon 			goto out_unlock;
1106c59d87c4SChristoph Hellwig 	}
1107c59d87c4SChristoph Hellwig 
1108a904b1caSNamjae Jeon 	/*
1109a904b1caSNamjae Jeon 	 * Perform hole insertion now that the file size has been
1110a904b1caSNamjae Jeon 	 * updated so that if we crash during the operation we don't
1111a904b1caSNamjae Jeon 	 * leave shifted extents past EOF and hence losing access to
1112a904b1caSNamjae Jeon 	 * the data that is contained within them.
1113a904b1caSNamjae Jeon 	 */
1114472c6e46SDave Chinner 	if (do_file_insert) {
1115a904b1caSNamjae Jeon 		error = xfs_insert_file_space(ip, offset, len);
1116472c6e46SDave Chinner 		if (error)
1117472c6e46SDave Chinner 			goto out_unlock;
1118472c6e46SDave Chinner 	}
1119472c6e46SDave Chinner 
1120cea267c2SDave Chinner 	if (xfs_file_sync_writes(file))
1121472c6e46SDave Chinner 		error = xfs_log_force_inode(ip);
1122a904b1caSNamjae Jeon 
1123c59d87c4SChristoph Hellwig out_unlock:
1124781355c6SChristoph Hellwig 	xfs_iunlock(ip, iolock);
11252451337dSDave Chinner 	return error;
1126c59d87c4SChristoph Hellwig }
1127c59d87c4SChristoph Hellwig 
112840144e49SJan Kara STATIC int
xfs_file_fadvise(struct file * file,loff_t start,loff_t end,int advice)112940144e49SJan Kara xfs_file_fadvise(
113040144e49SJan Kara 	struct file	*file,
113140144e49SJan Kara 	loff_t		start,
113240144e49SJan Kara 	loff_t		end,
113340144e49SJan Kara 	int		advice)
113440144e49SJan Kara {
113540144e49SJan Kara 	struct xfs_inode *ip = XFS_I(file_inode(file));
113640144e49SJan Kara 	int ret;
113740144e49SJan Kara 	int lockflags = 0;
113840144e49SJan Kara 
113940144e49SJan Kara 	/*
114040144e49SJan Kara 	 * Operations creating pages in page cache need protection from hole
114140144e49SJan Kara 	 * punching and similar ops
114240144e49SJan Kara 	 */
114340144e49SJan Kara 	if (advice == POSIX_FADV_WILLNEED) {
114440144e49SJan Kara 		lockflags = XFS_IOLOCK_SHARED;
114540144e49SJan Kara 		xfs_ilock(ip, lockflags);
114640144e49SJan Kara 	}
114740144e49SJan Kara 	ret = generic_fadvise(file, start, end, advice);
114840144e49SJan Kara 	if (lockflags)
114940144e49SJan Kara 		xfs_iunlock(ip, lockflags);
115040144e49SJan Kara 	return ret;
115140144e49SJan Kara }
11523fc9f5e4SDarrick J. Wong 
1153da034bccSEric Biggers STATIC loff_t
xfs_file_remap_range(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,loff_t len,unsigned int remap_flags)11542e5dfc99SDarrick J. Wong xfs_file_remap_range(
11559fe26045SDarrick J. Wong 	struct file		*file_in,
11569fe26045SDarrick J. Wong 	loff_t			pos_in,
11579fe26045SDarrick J. Wong 	struct file		*file_out,
11589fe26045SDarrick J. Wong 	loff_t			pos_out,
115942ec3d4cSDarrick J. Wong 	loff_t			len,
11602e5dfc99SDarrick J. Wong 	unsigned int		remap_flags)
11619fe26045SDarrick J. Wong {
11623fc9f5e4SDarrick J. Wong 	struct inode		*inode_in = file_inode(file_in);
11633fc9f5e4SDarrick J. Wong 	struct xfs_inode	*src = XFS_I(inode_in);
11643fc9f5e4SDarrick J. Wong 	struct inode		*inode_out = file_inode(file_out);
11653fc9f5e4SDarrick J. Wong 	struct xfs_inode	*dest = XFS_I(inode_out);
11663fc9f5e4SDarrick J. Wong 	struct xfs_mount	*mp = src->i_mount;
11673fc9f5e4SDarrick J. Wong 	loff_t			remapped = 0;
11683fc9f5e4SDarrick J. Wong 	xfs_extlen_t		cowextsize;
11693fc9f5e4SDarrick J. Wong 	int			ret;
11703fc9f5e4SDarrick J. Wong 
11712e5dfc99SDarrick J. Wong 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
11722e5dfc99SDarrick J. Wong 		return -EINVAL;
1173cc714660SDarrick J. Wong 
117438c26bfdSDave Chinner 	if (!xfs_has_reflink(mp))
11753fc9f5e4SDarrick J. Wong 		return -EOPNOTSUPP;
11763fc9f5e4SDarrick J. Wong 
117775c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
11783fc9f5e4SDarrick J. Wong 		return -EIO;
11793fc9f5e4SDarrick J. Wong 
11803fc9f5e4SDarrick J. Wong 	/* Prepare and then clone file data. */
11813fc9f5e4SDarrick J. Wong 	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
11823fc9f5e4SDarrick J. Wong 			&len, remap_flags);
1183451d34eeSDarrick J. Wong 	if (ret || len == 0)
11843fc9f5e4SDarrick J. Wong 		return ret;
11853fc9f5e4SDarrick J. Wong 
11863fc9f5e4SDarrick J. Wong 	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
11873fc9f5e4SDarrick J. Wong 
11883fc9f5e4SDarrick J. Wong 	ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
11893fc9f5e4SDarrick J. Wong 			&remapped);
11903fc9f5e4SDarrick J. Wong 	if (ret)
11913fc9f5e4SDarrick J. Wong 		goto out_unlock;
11923fc9f5e4SDarrick J. Wong 
11933fc9f5e4SDarrick J. Wong 	/*
11943fc9f5e4SDarrick J. Wong 	 * Carry the cowextsize hint from src to dest if we're sharing the
11953fc9f5e4SDarrick J. Wong 	 * entire source file to the entire destination file, the source file
11963fc9f5e4SDarrick J. Wong 	 * has a cowextsize hint, and the destination file does not.
11973fc9f5e4SDarrick J. Wong 	 */
11983fc9f5e4SDarrick J. Wong 	cowextsize = 0;
11993fc9f5e4SDarrick J. Wong 	if (pos_in == 0 && len == i_size_read(inode_in) &&
12003e09ab8fSChristoph Hellwig 	    (src->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
12013fc9f5e4SDarrick J. Wong 	    pos_out == 0 && len >= i_size_read(inode_out) &&
12023e09ab8fSChristoph Hellwig 	    !(dest->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE))
1203b33ce57dSChristoph Hellwig 		cowextsize = src->i_cowextsize;
12043fc9f5e4SDarrick J. Wong 
12053fc9f5e4SDarrick J. Wong 	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
12063fc9f5e4SDarrick J. Wong 			remap_flags);
12075833112dSChristoph Hellwig 	if (ret)
12085833112dSChristoph Hellwig 		goto out_unlock;
12093fc9f5e4SDarrick J. Wong 
12105ffce3ccSDarrick J. Wong 	if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
12115833112dSChristoph Hellwig 		xfs_log_force_inode(dest);
12123fc9f5e4SDarrick J. Wong out_unlock:
1213d7d84772SCatherine Hoang 	xfs_iunlock2_remapping(src, dest);
12143fc9f5e4SDarrick J. Wong 	if (ret)
12153fc9f5e4SDarrick J. Wong 		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
1216549f2fc3SDarrick J. Wong 	/*
1217549f2fc3SDarrick J. Wong 	 * If the caller did not set CAN_SHORTEN, then it is not prepared to
1218549f2fc3SDarrick J. Wong 	 * handle partial results -- either the whole remap succeeds, or we
1219549f2fc3SDarrick J. Wong 	 * must say why it did not.  In this case, any error should be returned
1220549f2fc3SDarrick J. Wong 	 * to the caller.
1221549f2fc3SDarrick J. Wong 	 */
1222549f2fc3SDarrick J. Wong 	if (ret && remapped < len && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
1223549f2fc3SDarrick J. Wong 		return ret;
12243fc9f5e4SDarrick J. Wong 	return remapped > 0 ? remapped : ret;
12259fe26045SDarrick J. Wong }
1226c59d87c4SChristoph Hellwig 
1227c59d87c4SChristoph Hellwig STATIC int
xfs_file_open(struct inode * inode,struct file * file)1228c59d87c4SChristoph Hellwig xfs_file_open(
1229c59d87c4SChristoph Hellwig 	struct inode	*inode,
1230c59d87c4SChristoph Hellwig 	struct file	*file)
1231c59d87c4SChristoph Hellwig {
123275c8c50fSDave Chinner 	if (xfs_is_shutdown(XFS_M(inode->i_sb)))
1233c59d87c4SChristoph Hellwig 		return -EIO;
1234d8aeb44aSJens Axboe 	file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
1235b2943499SChristoph Hellwig 			FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
1236f3bf67c6SMatthew Wilcox (Oracle) 	return generic_file_open(inode, file);
1237c59d87c4SChristoph Hellwig }
1238c59d87c4SChristoph Hellwig 
1239c59d87c4SChristoph Hellwig STATIC int
xfs_dir_open(struct inode * inode,struct file * file)1240c59d87c4SChristoph Hellwig xfs_dir_open(
1241c59d87c4SChristoph Hellwig 	struct inode	*inode,
1242c59d87c4SChristoph Hellwig 	struct file	*file)
1243c59d87c4SChristoph Hellwig {
1244c59d87c4SChristoph Hellwig 	struct xfs_inode *ip = XFS_I(inode);
1245a1033753SDave Chinner 	unsigned int	mode;
1246c59d87c4SChristoph Hellwig 	int		error;
1247c59d87c4SChristoph Hellwig 
1248c59d87c4SChristoph Hellwig 	error = xfs_file_open(inode, file);
1249c59d87c4SChristoph Hellwig 	if (error)
1250c59d87c4SChristoph Hellwig 		return error;
1251c59d87c4SChristoph Hellwig 
1252c59d87c4SChristoph Hellwig 	/*
1253c59d87c4SChristoph Hellwig 	 * If there are any blocks, read-ahead block 0 as we're almost
1254c59d87c4SChristoph Hellwig 	 * certain to have the next operation be a read there.
1255c59d87c4SChristoph Hellwig 	 */
1256309ecac8SChristoph Hellwig 	mode = xfs_ilock_data_map_shared(ip);
1257daf83964SChristoph Hellwig 	if (ip->i_df.if_nextents > 0)
125806566fdaSChristoph Hellwig 		error = xfs_dir3_data_readahead(ip, 0, 0);
1259c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, mode);
12607a652bbeSDarrick J. Wong 	return error;
1261c59d87c4SChristoph Hellwig }
1262c59d87c4SChristoph Hellwig 
1263c59d87c4SChristoph Hellwig STATIC int
xfs_file_release(struct inode * inode,struct file * filp)1264c59d87c4SChristoph Hellwig xfs_file_release(
1265c59d87c4SChristoph Hellwig 	struct inode	*inode,
1266c59d87c4SChristoph Hellwig 	struct file	*filp)
1267c59d87c4SChristoph Hellwig {
12682451337dSDave Chinner 	return xfs_release(XFS_I(inode));
1269c59d87c4SChristoph Hellwig }
1270c59d87c4SChristoph Hellwig 
1271c59d87c4SChristoph Hellwig STATIC int
xfs_file_readdir(struct file * file,struct dir_context * ctx)1272c59d87c4SChristoph Hellwig xfs_file_readdir(
1273b8227554SAl Viro 	struct file	*file,
1274b8227554SAl Viro 	struct dir_context *ctx)
1275c59d87c4SChristoph Hellwig {
1276b8227554SAl Viro 	struct inode	*inode = file_inode(file);
1277c59d87c4SChristoph Hellwig 	xfs_inode_t	*ip = XFS_I(inode);
1278c59d87c4SChristoph Hellwig 	size_t		bufsize;
1279c59d87c4SChristoph Hellwig 
1280c59d87c4SChristoph Hellwig 	/*
1281c59d87c4SChristoph Hellwig 	 * The Linux API doesn't pass down the total size of the buffer
1282c59d87c4SChristoph Hellwig 	 * we read into down to the filesystem.  With the filldir concept
1283c59d87c4SChristoph Hellwig 	 * it's not needed for correct information, but the XFS dir2 leaf
1284c59d87c4SChristoph Hellwig 	 * code wants an estimate of the buffer size to calculate it's
1285c59d87c4SChristoph Hellwig 	 * readahead window and size the buffers used for mapping to
1286c59d87c4SChristoph Hellwig 	 * physical blocks.
1287c59d87c4SChristoph Hellwig 	 *
1288c59d87c4SChristoph Hellwig 	 * Try to give it an estimate that's good enough, maybe at some
1289c59d87c4SChristoph Hellwig 	 * point we can change the ->readdir prototype to include the
1290c59d87c4SChristoph Hellwig 	 * buffer size.  For now we use the current glibc buffer size.
1291c59d87c4SChristoph Hellwig 	 */
129213d2c10bSChristoph Hellwig 	bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_disk_size);
1293c59d87c4SChristoph Hellwig 
1294acb9553cSDarrick J. Wong 	return xfs_readdir(NULL, ip, ctx, bufsize);
12953fe3e6b1SJeff Liu }
12963fe3e6b1SJeff Liu 
12973fe3e6b1SJeff Liu STATIC loff_t
xfs_file_llseek(struct file * file,loff_t offset,int whence)12983fe3e6b1SJeff Liu xfs_file_llseek(
12993fe3e6b1SJeff Liu 	struct file	*file,
13003fe3e6b1SJeff Liu 	loff_t		offset,
130159f9c004SEric Sandeen 	int		whence)
13023fe3e6b1SJeff Liu {
13039b2970aaSChristoph Hellwig 	struct inode		*inode = file->f_mapping->host;
13049b2970aaSChristoph Hellwig 
130575c8c50fSDave Chinner 	if (xfs_is_shutdown(XFS_I(inode)->i_mount))
13069b2970aaSChristoph Hellwig 		return -EIO;
13079b2970aaSChristoph Hellwig 
130859f9c004SEric Sandeen 	switch (whence) {
13099b2970aaSChristoph Hellwig 	default:
131059f9c004SEric Sandeen 		return generic_file_llseek(file, offset, whence);
13113fe3e6b1SJeff Liu 	case SEEK_HOLE:
131260271ab7SChristoph Hellwig 		offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
13139b2970aaSChristoph Hellwig 		break;
131449c69591SEric Sandeen 	case SEEK_DATA:
131560271ab7SChristoph Hellwig 		offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
13169b2970aaSChristoph Hellwig 		break;
13173fe3e6b1SJeff Liu 	}
13189b2970aaSChristoph Hellwig 
13199b2970aaSChristoph Hellwig 	if (offset < 0)
13209b2970aaSChristoph Hellwig 		return offset;
13219b2970aaSChristoph Hellwig 	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
13223fe3e6b1SJeff Liu }
13233fe3e6b1SJeff Liu 
1324ea6c49b7SShiyang Ruan #ifdef CONFIG_FS_DAX
132547ba8cc7SDarrick J. Wong static inline vm_fault_t
xfs_dax_fault(struct vm_fault * vmf,unsigned int order,bool write_fault,pfn_t * pfn)1326ea6c49b7SShiyang Ruan xfs_dax_fault(
1327ea6c49b7SShiyang Ruan 	struct vm_fault		*vmf,
13281d024e7aSMatthew Wilcox (Oracle) 	unsigned int		order,
1329ea6c49b7SShiyang Ruan 	bool			write_fault,
1330ea6c49b7SShiyang Ruan 	pfn_t			*pfn)
1331ea6c49b7SShiyang Ruan {
13321d024e7aSMatthew Wilcox (Oracle) 	return dax_iomap_fault(vmf, order, pfn, NULL,
1333ea6c49b7SShiyang Ruan 			(write_fault && !vmf->cow_page) ?
1334ea6c49b7SShiyang Ruan 				&xfs_dax_write_iomap_ops :
1335ea6c49b7SShiyang Ruan 				&xfs_read_iomap_ops);
1336ea6c49b7SShiyang Ruan }
1337ea6c49b7SShiyang Ruan #else
133847ba8cc7SDarrick J. Wong static inline vm_fault_t
xfs_dax_fault(struct vm_fault * vmf,unsigned int order,bool write_fault,pfn_t * pfn)1339ea6c49b7SShiyang Ruan xfs_dax_fault(
1340ea6c49b7SShiyang Ruan 	struct vm_fault		*vmf,
13411d024e7aSMatthew Wilcox (Oracle) 	unsigned int		order,
1342ea6c49b7SShiyang Ruan 	bool			write_fault,
1343ea6c49b7SShiyang Ruan 	pfn_t			*pfn)
1344ea6c49b7SShiyang Ruan {
134547ba8cc7SDarrick J. Wong 	ASSERT(0);
134647ba8cc7SDarrick J. Wong 	return VM_FAULT_SIGBUS;
1347ea6c49b7SShiyang Ruan }
1348ea6c49b7SShiyang Ruan #endif
1349ea6c49b7SShiyang Ruan 
1350de0e8c20SDave Chinner /*
1351de0e8c20SDave Chinner  * Locking for serialisation of IO during page faults. This results in a lock
1352de0e8c20SDave Chinner  * ordering of:
1353de0e8c20SDave Chinner  *
1354c1e8d7c6SMichel Lespinasse  * mmap_lock (MM)
13556b698edeSDave Chinner  *   sb_start_pagefault(vfs, freeze)
13562433480aSJan Kara  *     invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
1357de0e8c20SDave Chinner  *       page_lock (MM)
1358de0e8c20SDave Chinner  *         i_lock (XFS - extent map serialisation)
1359de0e8c20SDave Chinner  */
136005edd888SSouptick Joarder static vm_fault_t
__xfs_filemap_fault(struct vm_fault * vmf,unsigned int order,bool write_fault)1361d522d569SChristoph Hellwig __xfs_filemap_fault(
1362c791ace1SDave Jiang 	struct vm_fault		*vmf,
13631d024e7aSMatthew Wilcox (Oracle) 	unsigned int		order,
1364d522d569SChristoph Hellwig 	bool			write_fault)
1365acd76e74SMatthew Wilcox {
1366f4200391SDave Jiang 	struct inode		*inode = file_inode(vmf->vma->vm_file);
1367acd76e74SMatthew Wilcox 	struct xfs_inode	*ip = XFS_I(inode);
136805edd888SSouptick Joarder 	vm_fault_t		ret;
1369d7d84772SCatherine Hoang 	unsigned int		lock_mode = 0;
1370acd76e74SMatthew Wilcox 
13711d024e7aSMatthew Wilcox (Oracle) 	trace_xfs_filemap_fault(ip, order, write_fault);
1372acd76e74SMatthew Wilcox 
1373d522d569SChristoph Hellwig 	if (write_fault) {
1374acd76e74SMatthew Wilcox 		sb_start_pagefault(inode->i_sb);
1375f4200391SDave Jiang 		file_update_time(vmf->vma->vm_file);
137613ad4fe3SDave Chinner 	}
137713ad4fe3SDave Chinner 
1378d7d84772SCatherine Hoang 	if (IS_DAX(inode) || write_fault)
1379d7d84772SCatherine Hoang 		lock_mode = xfs_ilock_for_write_fault(XFS_I(inode));
1380d7d84772SCatherine Hoang 
1381d522d569SChristoph Hellwig 	if (IS_DAX(inode)) {
1382a39e596bSChristoph Hellwig 		pfn_t pfn;
1383a39e596bSChristoph Hellwig 
13841d024e7aSMatthew Wilcox (Oracle) 		ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
1385a39e596bSChristoph Hellwig 		if (ret & VM_FAULT_NEEDDSYNC)
13861d024e7aSMatthew Wilcox (Oracle) 			ret = dax_finish_sync_fault(vmf, order, pfn);
1387d7d84772SCatherine Hoang 	} else if (write_fault) {
1388d7d84772SCatherine Hoang 		ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
13892433480aSJan Kara 	} else {
1390d522d569SChristoph Hellwig 		ret = filemap_fault(vmf);
1391d522d569SChristoph Hellwig 	}
1392d7d84772SCatherine Hoang 
1393d7d84772SCatherine Hoang 	if (lock_mode)
1394d7d84772SCatherine Hoang 		xfs_iunlock(XFS_I(inode), lock_mode);
139513ad4fe3SDave Chinner 
1396d522d569SChristoph Hellwig 	if (write_fault)
1397acd76e74SMatthew Wilcox 		sb_end_pagefault(inode->i_sb);
1398acd76e74SMatthew Wilcox 	return ret;
1399acd76e74SMatthew Wilcox }
1400acd76e74SMatthew Wilcox 
1401b17164e2SMikulas Patocka static inline bool
xfs_is_write_fault(struct vm_fault * vmf)1402b17164e2SMikulas Patocka xfs_is_write_fault(
1403b17164e2SMikulas Patocka 	struct vm_fault		*vmf)
1404b17164e2SMikulas Patocka {
1405b17164e2SMikulas Patocka 	return (vmf->flags & FAULT_FLAG_WRITE) &&
1406b17164e2SMikulas Patocka 	       (vmf->vma->vm_flags & VM_SHARED);
1407b17164e2SMikulas Patocka }
1408b17164e2SMikulas Patocka 
140905edd888SSouptick Joarder static vm_fault_t
xfs_filemap_fault(struct vm_fault * vmf)1410d522d569SChristoph Hellwig xfs_filemap_fault(
1411d522d569SChristoph Hellwig 	struct vm_fault		*vmf)
1412d522d569SChristoph Hellwig {
1413d522d569SChristoph Hellwig 	/* DAX can shortcut the normal fault path on write faults! */
14141d024e7aSMatthew Wilcox (Oracle) 	return __xfs_filemap_fault(vmf, 0,
1415d522d569SChristoph Hellwig 			IS_DAX(file_inode(vmf->vma->vm_file)) &&
1416b17164e2SMikulas Patocka 			xfs_is_write_fault(vmf));
1417d522d569SChristoph Hellwig }
1418d522d569SChristoph Hellwig 
141905edd888SSouptick Joarder static vm_fault_t
xfs_filemap_huge_fault(struct vm_fault * vmf,unsigned int order)1420d522d569SChristoph Hellwig xfs_filemap_huge_fault(
1421d522d569SChristoph Hellwig 	struct vm_fault		*vmf,
14221d024e7aSMatthew Wilcox (Oracle) 	unsigned int		order)
1423d522d569SChristoph Hellwig {
1424d522d569SChristoph Hellwig 	if (!IS_DAX(file_inode(vmf->vma->vm_file)))
1425d522d569SChristoph Hellwig 		return VM_FAULT_FALLBACK;
1426d522d569SChristoph Hellwig 
1427d522d569SChristoph Hellwig 	/* DAX can shortcut the normal fault path on write faults! */
14281d024e7aSMatthew Wilcox (Oracle) 	return __xfs_filemap_fault(vmf, order,
1429b17164e2SMikulas Patocka 			xfs_is_write_fault(vmf));
1430d522d569SChristoph Hellwig }
1431d522d569SChristoph Hellwig 
143205edd888SSouptick Joarder static vm_fault_t
xfs_filemap_page_mkwrite(struct vm_fault * vmf)1433d522d569SChristoph Hellwig xfs_filemap_page_mkwrite(
1434d522d569SChristoph Hellwig 	struct vm_fault		*vmf)
1435d522d569SChristoph Hellwig {
14361d024e7aSMatthew Wilcox (Oracle) 	return __xfs_filemap_fault(vmf, 0, true);
1437d522d569SChristoph Hellwig }
1438d522d569SChristoph Hellwig 
14393af49285SDave Chinner /*
14407b565c9fSJan Kara  * pfn_mkwrite was originally intended to ensure we capture time stamp updates
14417b565c9fSJan Kara  * on write faults. In reality, it needs to serialise against truncate and
14427b565c9fSJan Kara  * prepare memory for writing so handle is as standard write fault.
14433af49285SDave Chinner  */
144405edd888SSouptick Joarder static vm_fault_t
xfs_filemap_pfn_mkwrite(struct vm_fault * vmf)14453af49285SDave Chinner xfs_filemap_pfn_mkwrite(
14463af49285SDave Chinner 	struct vm_fault		*vmf)
14473af49285SDave Chinner {
14483af49285SDave Chinner 
14491d024e7aSMatthew Wilcox (Oracle) 	return __xfs_filemap_fault(vmf, 0, true);
14503af49285SDave Chinner }
14513af49285SDave Chinner 
14526b698edeSDave Chinner static const struct vm_operations_struct xfs_file_vm_ops = {
14536b698edeSDave Chinner 	.fault		= xfs_filemap_fault,
1454a2d58167SDave Jiang 	.huge_fault	= xfs_filemap_huge_fault,
1455945ea457SMatthew Wilcox (Oracle) 	.map_pages	= filemap_map_pages,
14566b698edeSDave Chinner 	.page_mkwrite	= xfs_filemap_page_mkwrite,
14573af49285SDave Chinner 	.pfn_mkwrite	= xfs_filemap_pfn_mkwrite,
14586b698edeSDave Chinner };
14596b698edeSDave Chinner 
14606b698edeSDave Chinner STATIC int
xfs_file_mmap(struct file * file,struct vm_area_struct * vma)14616b698edeSDave Chinner xfs_file_mmap(
146230fa529eSChristoph Hellwig 	struct file		*file,
14636b698edeSDave Chinner 	struct vm_area_struct	*vma)
14646b698edeSDave Chinner {
146530fa529eSChristoph Hellwig 	struct inode		*inode = file_inode(file);
146630fa529eSChristoph Hellwig 	struct xfs_buftarg	*target = xfs_inode_buftarg(XFS_I(inode));
1467b21fec41SPankaj Gupta 
1468a39e596bSChristoph Hellwig 	/*
1469b21fec41SPankaj Gupta 	 * We don't support synchronous mappings for non-DAX files and
1470b21fec41SPankaj Gupta 	 * for DAX files if underneath dax_device is not synchronous.
1471a39e596bSChristoph Hellwig 	 */
147230fa529eSChristoph Hellwig 	if (!daxdev_mapping_supported(vma, target->bt_daxdev))
1473a39e596bSChristoph Hellwig 		return -EOPNOTSUPP;
1474a39e596bSChristoph Hellwig 
147530fa529eSChristoph Hellwig 	file_accessed(file);
14766b698edeSDave Chinner 	vma->vm_ops = &xfs_file_vm_ops;
147730fa529eSChristoph Hellwig 	if (IS_DAX(inode))
14781c71222eSSuren Baghdasaryan 		vm_flags_set(vma, VM_HUGEPAGE);
14796b698edeSDave Chinner 	return 0;
1480075a924dSDave Chinner }
1481075a924dSDave Chinner 
1482c59d87c4SChristoph Hellwig const struct file_operations xfs_file_operations = {
14833fe3e6b1SJeff Liu 	.llseek		= xfs_file_llseek,
1484b4f5d2c6SAl Viro 	.read_iter	= xfs_file_read_iter,
1485bf97f3bcSAl Viro 	.write_iter	= xfs_file_write_iter,
148654919f94SDavid Howells 	.splice_read	= xfs_file_splice_read,
14878d020765SAl Viro 	.splice_write	= iter_file_splice_write,
14883e08773cSChristoph Hellwig 	.iopoll		= iocb_bio_iopoll,
1489c59d87c4SChristoph Hellwig 	.unlocked_ioctl	= xfs_file_ioctl,
1490c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT
1491c59d87c4SChristoph Hellwig 	.compat_ioctl	= xfs_file_compat_ioctl,
1492c59d87c4SChristoph Hellwig #endif
1493c59d87c4SChristoph Hellwig 	.mmap		= xfs_file_mmap,
1494a39e596bSChristoph Hellwig 	.mmap_supported_flags = MAP_SYNC,
1495c59d87c4SChristoph Hellwig 	.open		= xfs_file_open,
1496c59d87c4SChristoph Hellwig 	.release	= xfs_file_release,
1497c59d87c4SChristoph Hellwig 	.fsync		= xfs_file_fsync,
1498dbe6ec81SToshi Kani 	.get_unmapped_area = thp_get_unmapped_area,
1499c59d87c4SChristoph Hellwig 	.fallocate	= xfs_file_fallocate,
150040144e49SJan Kara 	.fadvise	= xfs_file_fadvise,
15012e5dfc99SDarrick J. Wong 	.remap_file_range = xfs_file_remap_range,
1502c59d87c4SChristoph Hellwig };
1503c59d87c4SChristoph Hellwig 
1504c59d87c4SChristoph Hellwig const struct file_operations xfs_dir_file_operations = {
1505c59d87c4SChristoph Hellwig 	.open		= xfs_dir_open,
1506c59d87c4SChristoph Hellwig 	.read		= generic_read_dir,
15073b0a3c1aSAl Viro 	.iterate_shared	= xfs_file_readdir,
1508c59d87c4SChristoph Hellwig 	.llseek		= generic_file_llseek,
1509c59d87c4SChristoph Hellwig 	.unlocked_ioctl	= xfs_file_ioctl,
1510c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT
1511c59d87c4SChristoph Hellwig 	.compat_ioctl	= xfs_file_compat_ioctl,
1512c59d87c4SChristoph Hellwig #endif
15131da2f2dbSChristoph Hellwig 	.fsync		= xfs_dir_fsync,
1514c59d87c4SChristoph Hellwig };
1515