10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
2c59d87c4SChristoph Hellwig /*
3c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4c59d87c4SChristoph Hellwig * All Rights Reserved.
5c59d87c4SChristoph Hellwig */
6c59d87c4SChristoph Hellwig #include "xfs.h"
7c59d87c4SChristoph Hellwig #include "xfs_fs.h"
870a9883cSDave Chinner #include "xfs_shared.h"
9a4fbe6abSDave Chinner #include "xfs_format.h"
10239880efSDave Chinner #include "xfs_log_format.h"
11239880efSDave Chinner #include "xfs_trans_resv.h"
12c59d87c4SChristoph Hellwig #include "xfs_mount.h"
13c59d87c4SChristoph Hellwig #include "xfs_inode.h"
14239880efSDave Chinner #include "xfs_trans.h"
15c59d87c4SChristoph Hellwig #include "xfs_inode_item.h"
16c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
17c24b5dfaSDave Chinner #include "xfs_bmap_util.h"
182b9ab5abSDave Chinner #include "xfs_dir2.h"
19c24b5dfaSDave Chinner #include "xfs_dir2_priv.h"
20c59d87c4SChristoph Hellwig #include "xfs_ioctl.h"
21c59d87c4SChristoph Hellwig #include "xfs_trace.h"
22239880efSDave Chinner #include "xfs_log.h"
23dc06f398SBrian Foster #include "xfs_icache.h"
24781355c6SChristoph Hellwig #include "xfs_pnfs.h"
2568a9f5e7SChristoph Hellwig #include "xfs_iomap.h"
260613f16cSDarrick J. Wong #include "xfs_reflink.h"
277531c9abSDarrick J. Wong #include "xfs_file.h"
28c59d87c4SChristoph Hellwig
29ea6c49b7SShiyang Ruan #include <linux/dax.h>
30c59d87c4SChristoph Hellwig #include <linux/falloc.h>
3166114cadSTejun Heo #include <linux/backing-dev.h>
32a39e596bSChristoph Hellwig #include <linux/mman.h>
3340144e49SJan Kara #include <linux/fadvise.h>
34f736d93dSChristoph Hellwig #include <linux/mount.h>
35c59d87c4SChristoph Hellwig
36c59d87c4SChristoph Hellwig static const struct vm_operations_struct xfs_file_vm_ops;
37c59d87c4SChristoph Hellwig
3825219dbfSDarrick J. Wong /*
3925219dbfSDarrick J. Wong * Decide if the given file range is aligned to the size of the fundamental
4025219dbfSDarrick J. Wong * allocation unit for the file.
4125219dbfSDarrick J. Wong */
42*c070b880SDarrick J. Wong bool
xfs_is_falloc_aligned(struct xfs_inode * ip,loff_t pos,long long int len)4325219dbfSDarrick J. Wong xfs_is_falloc_aligned(
4425219dbfSDarrick J. Wong struct xfs_inode *ip,
4525219dbfSDarrick J. Wong loff_t pos,
4625219dbfSDarrick J. Wong long long int len)
4725219dbfSDarrick J. Wong {
48*c070b880SDarrick J. Wong unsigned int alloc_unit = xfs_inode_alloc_unitsize(ip);
4925219dbfSDarrick J. Wong
50*c070b880SDarrick J. Wong if (!is_power_of_2(alloc_unit)) {
5125219dbfSDarrick J. Wong u32 mod;
5225219dbfSDarrick J. Wong
53*c070b880SDarrick J. Wong div_u64_rem(pos, alloc_unit, &mod);
5425219dbfSDarrick J. Wong if (mod)
5525219dbfSDarrick J. Wong return false;
56*c070b880SDarrick J. Wong div_u64_rem(len, alloc_unit, &mod);
5725219dbfSDarrick J. Wong return mod == 0;
5825219dbfSDarrick J. Wong }
5925219dbfSDarrick J. Wong
60*c070b880SDarrick J. Wong return !((pos | len) & (alloc_unit - 1));
6125219dbfSDarrick J. Wong }
6225219dbfSDarrick J. Wong
631da2f2dbSChristoph Hellwig /*
641da2f2dbSChristoph Hellwig * Fsync operations on directories are much simpler than on regular files,
651da2f2dbSChristoph Hellwig * as there is no file data to flush, and thus also no need for explicit
661da2f2dbSChristoph Hellwig * cache flush operations, and there are no non-transaction metadata updates
671da2f2dbSChristoph Hellwig * on directories either.
681da2f2dbSChristoph Hellwig */
691da2f2dbSChristoph Hellwig STATIC int
xfs_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)701da2f2dbSChristoph Hellwig xfs_dir_fsync(
711da2f2dbSChristoph Hellwig struct file *file,
721da2f2dbSChristoph Hellwig loff_t start,
731da2f2dbSChristoph Hellwig loff_t end,
741da2f2dbSChristoph Hellwig int datasync)
751da2f2dbSChristoph Hellwig {
761da2f2dbSChristoph Hellwig struct xfs_inode *ip = XFS_I(file->f_mapping->host);
771da2f2dbSChristoph Hellwig
781da2f2dbSChristoph Hellwig trace_xfs_dir_fsync(ip);
7954fbdd10SChristoph Hellwig return xfs_log_force_inode(ip);
801da2f2dbSChristoph Hellwig }
811da2f2dbSChristoph Hellwig
825f9b4b0dSDave Chinner static xfs_csn_t
xfs_fsync_seq(struct xfs_inode * ip,bool datasync)835f9b4b0dSDave Chinner xfs_fsync_seq(
84f22c7f87SChristoph Hellwig struct xfs_inode *ip,
85f22c7f87SChristoph Hellwig bool datasync)
86f22c7f87SChristoph Hellwig {
87f22c7f87SChristoph Hellwig if (!xfs_ipincount(ip))
88f22c7f87SChristoph Hellwig return 0;
89f22c7f87SChristoph Hellwig if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
90f22c7f87SChristoph Hellwig return 0;
915f9b4b0dSDave Chinner return ip->i_itemp->ili_commit_seq;
92f22c7f87SChristoph Hellwig }
93f22c7f87SChristoph Hellwig
94f22c7f87SChristoph Hellwig /*
95f22c7f87SChristoph Hellwig * All metadata updates are logged, which means that we just have to flush the
96f22c7f87SChristoph Hellwig * log up to the latest LSN that touched the inode.
97f22c7f87SChristoph Hellwig *
98f22c7f87SChristoph Hellwig * If we have concurrent fsync/fdatasync() calls, we need them to all block on
99f22c7f87SChristoph Hellwig * the log force before we clear the ili_fsync_fields field. This ensures that
100f22c7f87SChristoph Hellwig * we don't get a racing sync operation that does not wait for the metadata to
101f22c7f87SChristoph Hellwig * hit the journal before returning. If we race with clearing ili_fsync_fields,
102f22c7f87SChristoph Hellwig * then all that will happen is the log force will do nothing as the lsn will
103f22c7f87SChristoph Hellwig * already be on disk. We can't race with setting ili_fsync_fields because that
104f22c7f87SChristoph Hellwig * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
105f22c7f87SChristoph Hellwig * shared until after the ili_fsync_fields is cleared.
106f22c7f87SChristoph Hellwig */
107f22c7f87SChristoph Hellwig static int
xfs_fsync_flush_log(struct xfs_inode * ip,bool datasync,int * log_flushed)108f22c7f87SChristoph Hellwig xfs_fsync_flush_log(
109f22c7f87SChristoph Hellwig struct xfs_inode *ip,
110f22c7f87SChristoph Hellwig bool datasync,
111f22c7f87SChristoph Hellwig int *log_flushed)
112f22c7f87SChristoph Hellwig {
113f22c7f87SChristoph Hellwig int error = 0;
1145f9b4b0dSDave Chinner xfs_csn_t seq;
115f22c7f87SChristoph Hellwig
116f22c7f87SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED);
1175f9b4b0dSDave Chinner seq = xfs_fsync_seq(ip, datasync);
1185f9b4b0dSDave Chinner if (seq) {
1195f9b4b0dSDave Chinner error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
120f22c7f87SChristoph Hellwig log_flushed);
121f22c7f87SChristoph Hellwig
122f22c7f87SChristoph Hellwig spin_lock(&ip->i_itemp->ili_lock);
123f22c7f87SChristoph Hellwig ip->i_itemp->ili_fsync_fields = 0;
124f22c7f87SChristoph Hellwig spin_unlock(&ip->i_itemp->ili_lock);
125f22c7f87SChristoph Hellwig }
126f22c7f87SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED);
127f22c7f87SChristoph Hellwig return error;
128f22c7f87SChristoph Hellwig }
129f22c7f87SChristoph Hellwig
130c59d87c4SChristoph Hellwig STATIC int
xfs_file_fsync(struct file * file,loff_t start,loff_t end,int datasync)131c59d87c4SChristoph Hellwig xfs_file_fsync(
132c59d87c4SChristoph Hellwig struct file *file,
133c59d87c4SChristoph Hellwig loff_t start,
134c59d87c4SChristoph Hellwig loff_t end,
135c59d87c4SChristoph Hellwig int datasync)
136c59d87c4SChristoph Hellwig {
137f22c7f87SChristoph Hellwig struct xfs_inode *ip = XFS_I(file->f_mapping->host);
138c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount;
1397d839e32SDarrick J. Wong int error, err2;
140c59d87c4SChristoph Hellwig int log_flushed = 0;
141c59d87c4SChristoph Hellwig
142c59d87c4SChristoph Hellwig trace_xfs_file_fsync(ip);
143c59d87c4SChristoph Hellwig
1441b180274SJeff Layton error = file_write_and_wait_range(file, start, end);
145c59d87c4SChristoph Hellwig if (error)
146c59d87c4SChristoph Hellwig return error;
147c59d87c4SChristoph Hellwig
14875c8c50fSDave Chinner if (xfs_is_shutdown(mp))
149b474c7aeSEric Sandeen return -EIO;
150c59d87c4SChristoph Hellwig
151c59d87c4SChristoph Hellwig xfs_iflags_clear(ip, XFS_ITRUNCATED);
152c59d87c4SChristoph Hellwig
153c59d87c4SChristoph Hellwig /*
1542291dab2SDave Chinner * If we have an RT and/or log subvolume we need to make sure to flush
1552291dab2SDave Chinner * the write cache the device used for file data first. This is to
1562291dab2SDave Chinner * ensure newly written file data make it to disk before logging the new
1572291dab2SDave Chinner * inode size in case of an extending write.
158c59d87c4SChristoph Hellwig */
159c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip))
1607d839e32SDarrick J. Wong error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
161c59d87c4SChristoph Hellwig else if (mp->m_logdev_targp != mp->m_ddev_targp)
1627d839e32SDarrick J. Wong error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
163c59d87c4SChristoph Hellwig
164c59d87c4SChristoph Hellwig /*
165ae29e422SChristoph Hellwig * Any inode that has dirty modifications in the log is pinned. The
1667d839e32SDarrick J. Wong * racy check here for a pinned inode will not catch modifications
167ae29e422SChristoph Hellwig * that happen concurrently to the fsync call, but fsync semantics
168ae29e422SChristoph Hellwig * only require to sync previously completed I/O.
169c59d87c4SChristoph Hellwig */
1707d839e32SDarrick J. Wong if (xfs_ipincount(ip)) {
1717d839e32SDarrick J. Wong err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed);
1727d839e32SDarrick J. Wong if (err2 && !error)
1737d839e32SDarrick J. Wong error = err2;
1747d839e32SDarrick J. Wong }
175b1037058SChristoph Hellwig
176c59d87c4SChristoph Hellwig /*
177c59d87c4SChristoph Hellwig * If we only have a single device, and the log force about was
178c59d87c4SChristoph Hellwig * a no-op we might have to flush the data device cache here.
179c59d87c4SChristoph Hellwig * This can only happen for fdatasync/O_DSYNC if we were overwriting
180c59d87c4SChristoph Hellwig * an already allocated file and thus do not have any metadata to
181c59d87c4SChristoph Hellwig * commit.
182c59d87c4SChristoph Hellwig */
1832291dab2SDave Chinner if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
1847d839e32SDarrick J. Wong mp->m_logdev_targp == mp->m_ddev_targp) {
1857d839e32SDarrick J. Wong err2 = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
1867d839e32SDarrick J. Wong if (err2 && !error)
1877d839e32SDarrick J. Wong error = err2;
1887d839e32SDarrick J. Wong }
189c59d87c4SChristoph Hellwig
1902451337dSDave Chinner return error;
191c59d87c4SChristoph Hellwig }
192c59d87c4SChristoph Hellwig
193f50b8f47SChristoph Hellwig static int
xfs_ilock_iocb(struct kiocb * iocb,unsigned int lock_mode)194f50b8f47SChristoph Hellwig xfs_ilock_iocb(
195f50b8f47SChristoph Hellwig struct kiocb *iocb,
196f50b8f47SChristoph Hellwig unsigned int lock_mode)
197f50b8f47SChristoph Hellwig {
198f50b8f47SChristoph Hellwig struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
199f50b8f47SChristoph Hellwig
200f50b8f47SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) {
201f50b8f47SChristoph Hellwig if (!xfs_ilock_nowait(ip, lock_mode))
202f50b8f47SChristoph Hellwig return -EAGAIN;
203f50b8f47SChristoph Hellwig } else {
204f50b8f47SChristoph Hellwig xfs_ilock(ip, lock_mode);
205f50b8f47SChristoph Hellwig }
206f50b8f47SChristoph Hellwig
207f50b8f47SChristoph Hellwig return 0;
208f50b8f47SChristoph Hellwig }
209f50b8f47SChristoph Hellwig
210d7d84772SCatherine Hoang static int
xfs_ilock_iocb_for_write(struct kiocb * iocb,unsigned int * lock_mode)211d7d84772SCatherine Hoang xfs_ilock_iocb_for_write(
212d7d84772SCatherine Hoang struct kiocb *iocb,
213d7d84772SCatherine Hoang unsigned int *lock_mode)
214d7d84772SCatherine Hoang {
215d7d84772SCatherine Hoang ssize_t ret;
216d7d84772SCatherine Hoang struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
217d7d84772SCatherine Hoang
218d7d84772SCatherine Hoang ret = xfs_ilock_iocb(iocb, *lock_mode);
219d7d84772SCatherine Hoang if (ret)
220d7d84772SCatherine Hoang return ret;
221d7d84772SCatherine Hoang
222d7d84772SCatherine Hoang if (*lock_mode == XFS_IOLOCK_EXCL)
223d7d84772SCatherine Hoang return 0;
224d7d84772SCatherine Hoang if (!xfs_iflags_test(ip, XFS_IREMAPPING))
225d7d84772SCatherine Hoang return 0;
226d7d84772SCatherine Hoang
227d7d84772SCatherine Hoang xfs_iunlock(ip, *lock_mode);
228d7d84772SCatherine Hoang *lock_mode = XFS_IOLOCK_EXCL;
229d7d84772SCatherine Hoang return xfs_ilock_iocb(iocb, *lock_mode);
230d7d84772SCatherine Hoang }
231d7d84772SCatherine Hoang
232d7d84772SCatherine Hoang static unsigned int
xfs_ilock_for_write_fault(struct xfs_inode * ip)233d7d84772SCatherine Hoang xfs_ilock_for_write_fault(
234d7d84772SCatherine Hoang struct xfs_inode *ip)
235d7d84772SCatherine Hoang {
236d7d84772SCatherine Hoang /* get a shared lock if no remapping in progress */
237d7d84772SCatherine Hoang xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
238d7d84772SCatherine Hoang if (!xfs_iflags_test(ip, XFS_IREMAPPING))
239d7d84772SCatherine Hoang return XFS_MMAPLOCK_SHARED;
240d7d84772SCatherine Hoang
241d7d84772SCatherine Hoang /* wait for remapping to complete */
242d7d84772SCatherine Hoang xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
243d7d84772SCatherine Hoang xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
244d7d84772SCatherine Hoang return XFS_MMAPLOCK_EXCL;
245d7d84772SCatherine Hoang }
246d7d84772SCatherine Hoang
247c59d87c4SChristoph Hellwig STATIC ssize_t
xfs_file_dio_read(struct kiocb * iocb,struct iov_iter * to)248ee1b218bSChristoph Hellwig xfs_file_dio_read(
249c59d87c4SChristoph Hellwig struct kiocb *iocb,
250b4f5d2c6SAl Viro struct iov_iter *to)
251c59d87c4SChristoph Hellwig {
252acdda3aaSChristoph Hellwig struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
253acdda3aaSChristoph Hellwig ssize_t ret;
254c59d87c4SChristoph Hellwig
2553e40b13cSChristoph Hellwig trace_xfs_file_direct_read(iocb, to);
256c59d87c4SChristoph Hellwig
2573e40b13cSChristoph Hellwig if (!iov_iter_count(to))
258f1285ff0SChristoph Hellwig return 0; /* skip atime */
259c59d87c4SChristoph Hellwig
260a447d7cdSChristoph Hellwig file_accessed(iocb->ki_filp);
261a447d7cdSChristoph Hellwig
262f50b8f47SChristoph Hellwig ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
263f50b8f47SChristoph Hellwig if (ret)
264f50b8f47SChristoph Hellwig return ret;
265786f847fSChristoph Hellwig ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0);
26665523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED);
267acdda3aaSChristoph Hellwig
26816d4d435SChristoph Hellwig return ret;
26916d4d435SChristoph Hellwig }
27016d4d435SChristoph Hellwig
271f021bd07SArnd Bergmann static noinline ssize_t
xfs_file_dax_read(struct kiocb * iocb,struct iov_iter * to)27216d4d435SChristoph Hellwig xfs_file_dax_read(
27316d4d435SChristoph Hellwig struct kiocb *iocb,
27416d4d435SChristoph Hellwig struct iov_iter *to)
27516d4d435SChristoph Hellwig {
2766c31f495SChristoph Hellwig struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host);
27716d4d435SChristoph Hellwig ssize_t ret = 0;
27816d4d435SChristoph Hellwig
2793e40b13cSChristoph Hellwig trace_xfs_file_dax_read(iocb, to);
28016d4d435SChristoph Hellwig
2813e40b13cSChristoph Hellwig if (!iov_iter_count(to))
28216d4d435SChristoph Hellwig return 0; /* skip atime */
28316d4d435SChristoph Hellwig
284f50b8f47SChristoph Hellwig ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
285f50b8f47SChristoph Hellwig if (ret)
286f50b8f47SChristoph Hellwig return ret;
287690c2a38SChristoph Hellwig ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
28865523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED);
289bbc5a740SChristoph Hellwig
290f1285ff0SChristoph Hellwig file_accessed(iocb->ki_filp);
291bbc5a740SChristoph Hellwig return ret;
292bbc5a740SChristoph Hellwig }
293bbc5a740SChristoph Hellwig
294bbc5a740SChristoph Hellwig STATIC ssize_t
xfs_file_buffered_read(struct kiocb * iocb,struct iov_iter * to)295ee1b218bSChristoph Hellwig xfs_file_buffered_read(
296bbc5a740SChristoph Hellwig struct kiocb *iocb,
297bbc5a740SChristoph Hellwig struct iov_iter *to)
298bbc5a740SChristoph Hellwig {
299bbc5a740SChristoph Hellwig struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
300bbc5a740SChristoph Hellwig ssize_t ret;
301bbc5a740SChristoph Hellwig
3023e40b13cSChristoph Hellwig trace_xfs_file_buffered_read(iocb, to);
303bbc5a740SChristoph Hellwig
304f50b8f47SChristoph Hellwig ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
305f50b8f47SChristoph Hellwig if (ret)
306f50b8f47SChristoph Hellwig return ret;
307b4f5d2c6SAl Viro ret = generic_file_read_iter(iocb, to);
30865523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED);
309bbc5a740SChristoph Hellwig
310bbc5a740SChristoph Hellwig return ret;
311bbc5a740SChristoph Hellwig }
312bbc5a740SChristoph Hellwig
313bbc5a740SChristoph Hellwig STATIC ssize_t
xfs_file_read_iter(struct kiocb * iocb,struct iov_iter * to)314bbc5a740SChristoph Hellwig xfs_file_read_iter(
315bbc5a740SChristoph Hellwig struct kiocb *iocb,
316bbc5a740SChristoph Hellwig struct iov_iter *to)
317bbc5a740SChristoph Hellwig {
31816d4d435SChristoph Hellwig struct inode *inode = file_inode(iocb->ki_filp);
31916d4d435SChristoph Hellwig struct xfs_mount *mp = XFS_I(inode)->i_mount;
320bbc5a740SChristoph Hellwig ssize_t ret = 0;
321bbc5a740SChristoph Hellwig
322bbc5a740SChristoph Hellwig XFS_STATS_INC(mp, xs_read_calls);
323bbc5a740SChristoph Hellwig
32475c8c50fSDave Chinner if (xfs_is_shutdown(mp))
325bbc5a740SChristoph Hellwig return -EIO;
326bbc5a740SChristoph Hellwig
32716d4d435SChristoph Hellwig if (IS_DAX(inode))
32816d4d435SChristoph Hellwig ret = xfs_file_dax_read(iocb, to);
32916d4d435SChristoph Hellwig else if (iocb->ki_flags & IOCB_DIRECT)
330ee1b218bSChristoph Hellwig ret = xfs_file_dio_read(iocb, to);
331bbc5a740SChristoph Hellwig else
332ee1b218bSChristoph Hellwig ret = xfs_file_buffered_read(iocb, to);
333bbc5a740SChristoph Hellwig
334c59d87c4SChristoph Hellwig if (ret > 0)
335ff6d6af2SBill O'Donnell XFS_STATS_ADD(mp, xs_read_bytes, ret);
336c59d87c4SChristoph Hellwig return ret;
337c59d87c4SChristoph Hellwig }
338c59d87c4SChristoph Hellwig
33954919f94SDavid Howells STATIC ssize_t
xfs_file_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)34054919f94SDavid Howells xfs_file_splice_read(
34154919f94SDavid Howells struct file *in,
34254919f94SDavid Howells loff_t *ppos,
34354919f94SDavid Howells struct pipe_inode_info *pipe,
34454919f94SDavid Howells size_t len,
34554919f94SDavid Howells unsigned int flags)
34654919f94SDavid Howells {
34754919f94SDavid Howells struct inode *inode = file_inode(in);
34854919f94SDavid Howells struct xfs_inode *ip = XFS_I(inode);
34954919f94SDavid Howells struct xfs_mount *mp = ip->i_mount;
35054919f94SDavid Howells ssize_t ret = 0;
35154919f94SDavid Howells
35254919f94SDavid Howells XFS_STATS_INC(mp, xs_read_calls);
35354919f94SDavid Howells
35454919f94SDavid Howells if (xfs_is_shutdown(mp))
35554919f94SDavid Howells return -EIO;
35654919f94SDavid Howells
35754919f94SDavid Howells trace_xfs_file_splice_read(ip, *ppos, len);
35854919f94SDavid Howells
35954919f94SDavid Howells xfs_ilock(ip, XFS_IOLOCK_SHARED);
36054919f94SDavid Howells ret = filemap_splice_read(in, ppos, pipe, len, flags);
36154919f94SDavid Howells xfs_iunlock(ip, XFS_IOLOCK_SHARED);
36254919f94SDavid Howells if (ret > 0)
36354919f94SDavid Howells XFS_STATS_ADD(mp, xs_read_bytes, ret);
36454919f94SDavid Howells return ret;
36554919f94SDavid Howells }
36654919f94SDavid Howells
367c59d87c4SChristoph Hellwig /*
368c59d87c4SChristoph Hellwig * Common pre-write limit and setup checks.
369c59d87c4SChristoph Hellwig *
3705bf1f262SChristoph Hellwig * Called with the iolocked held either shared and exclusive according to
3715bf1f262SChristoph Hellwig * @iolock, and returns with it held. Might upgrade the iolock to exclusive
3725bf1f262SChristoph Hellwig * if called for a direct write beyond i_size.
373c59d87c4SChristoph Hellwig */
374c59d87c4SChristoph Hellwig STATIC ssize_t
xfs_file_write_checks(struct kiocb * iocb,struct iov_iter * from,unsigned int * iolock)375ee1b218bSChristoph Hellwig xfs_file_write_checks(
37699733fa3SAl Viro struct kiocb *iocb,
37799733fa3SAl Viro struct iov_iter *from,
378a1033753SDave Chinner unsigned int *iolock)
379c59d87c4SChristoph Hellwig {
38099733fa3SAl Viro struct file *file = iocb->ki_filp;
381c59d87c4SChristoph Hellwig struct inode *inode = file->f_mapping->host;
382c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode);
3833309dd04SAl Viro ssize_t error = 0;
38499733fa3SAl Viro size_t count = iov_iter_count(from);
3853136e8bbSBrian Foster bool drained_dio = false;
386f5c54717SChristoph Hellwig loff_t isize;
387c59d87c4SChristoph Hellwig
3887271d243SDave Chinner restart:
3893309dd04SAl Viro error = generic_write_checks(iocb, from);
3903309dd04SAl Viro if (error <= 0)
391c59d87c4SChristoph Hellwig return error;
392c59d87c4SChristoph Hellwig
393354be7e3SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) {
394354be7e3SChristoph Hellwig error = break_layout(inode, false);
395354be7e3SChristoph Hellwig if (error == -EWOULDBLOCK)
396354be7e3SChristoph Hellwig error = -EAGAIN;
397354be7e3SChristoph Hellwig } else {
39869eb5fa1SDan Williams error = xfs_break_layouts(inode, iolock, BREAK_WRITE);
399354be7e3SChristoph Hellwig }
400354be7e3SChristoph Hellwig
401781355c6SChristoph Hellwig if (error)
402781355c6SChristoph Hellwig return error;
403781355c6SChristoph Hellwig
40465523218SChristoph Hellwig /*
40565523218SChristoph Hellwig * For changing security info in file_remove_privs() we need i_rwsem
40665523218SChristoph Hellwig * exclusively.
40765523218SChristoph Hellwig */
408a6de82caSJan Kara if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
40965523218SChristoph Hellwig xfs_iunlock(ip, *iolock);
410a6de82caSJan Kara *iolock = XFS_IOLOCK_EXCL;
411354be7e3SChristoph Hellwig error = xfs_ilock_iocb(iocb, *iolock);
412354be7e3SChristoph Hellwig if (error) {
413354be7e3SChristoph Hellwig *iolock = 0;
414354be7e3SChristoph Hellwig return error;
415354be7e3SChristoph Hellwig }
416a6de82caSJan Kara goto restart;
417a6de82caSJan Kara }
418977ec4ddSDave Chinner
419c59d87c4SChristoph Hellwig /*
420c59d87c4SChristoph Hellwig * If the offset is beyond the size of the file, we need to zero any
421c59d87c4SChristoph Hellwig * blocks that fall between the existing EOF and the start of this
422977ec4ddSDave Chinner * write. If zeroing is needed and we are currently holding the iolock
423977ec4ddSDave Chinner * shared, we need to update it to exclusive which implies having to
424977ec4ddSDave Chinner * redo all checks before.
425b9d59846SDave Chinner *
426977ec4ddSDave Chinner * We need to serialise against EOF updates that occur in IO completions
427977ec4ddSDave Chinner * here. We want to make sure that nobody is changing the size while we
428977ec4ddSDave Chinner * do this check until we have placed an IO barrier (i.e. hold the
429977ec4ddSDave Chinner * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. The
430977ec4ddSDave Chinner * spinlock effectively forms a memory barrier once we have the
431977ec4ddSDave Chinner * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
432977ec4ddSDave Chinner * hence be able to correctly determine if we need to run zeroing.
433977ec4ddSDave Chinner *
434977ec4ddSDave Chinner * We can do an unlocked check here safely as IO completion can only
435977ec4ddSDave Chinner * extend EOF. Truncate is locked out at this point, so the EOF can
436977ec4ddSDave Chinner * not move backwards, only forwards. Hence we only need to take the
437977ec4ddSDave Chinner * slow path and spin locks when we are at or beyond the current EOF.
438c59d87c4SChristoph Hellwig */
439977ec4ddSDave Chinner if (iocb->ki_pos <= i_size_read(inode))
440977ec4ddSDave Chinner goto out;
441977ec4ddSDave Chinner
442b9d59846SDave Chinner spin_lock(&ip->i_flags_lock);
443f5c54717SChristoph Hellwig isize = i_size_read(inode);
444f5c54717SChristoph Hellwig if (iocb->ki_pos > isize) {
445b9d59846SDave Chinner spin_unlock(&ip->i_flags_lock);
446354be7e3SChristoph Hellwig
447354be7e3SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT)
448354be7e3SChristoph Hellwig return -EAGAIN;
449354be7e3SChristoph Hellwig
4503136e8bbSBrian Foster if (!drained_dio) {
4517271d243SDave Chinner if (*iolock == XFS_IOLOCK_SHARED) {
45265523218SChristoph Hellwig xfs_iunlock(ip, *iolock);
4537271d243SDave Chinner *iolock = XFS_IOLOCK_EXCL;
45465523218SChristoph Hellwig xfs_ilock(ip, *iolock);
4553309dd04SAl Viro iov_iter_reexpand(from, count);
4563136e8bbSBrian Foster }
45740c63fbcSDave Chinner /*
45840c63fbcSDave Chinner * We now have an IO submission barrier in place, but
45940c63fbcSDave Chinner * AIO can do EOF updates during IO completion and hence
46040c63fbcSDave Chinner * we now need to wait for all of them to drain. Non-AIO
46140c63fbcSDave Chinner * DIO will have drained before we are given the
46240c63fbcSDave Chinner * XFS_IOLOCK_EXCL, and so for most cases this wait is a
46340c63fbcSDave Chinner * no-op.
46440c63fbcSDave Chinner */
46540c63fbcSDave Chinner inode_dio_wait(inode);
4663136e8bbSBrian Foster drained_dio = true;
4677271d243SDave Chinner goto restart;
4687271d243SDave Chinner }
469f5c54717SChristoph Hellwig
470f5c54717SChristoph Hellwig trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
471f1ba5fafSShiyang Ruan error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
472c59d87c4SChristoph Hellwig if (error)
473c59d87c4SChristoph Hellwig return error;
474b9d59846SDave Chinner } else
475b9d59846SDave Chinner spin_unlock(&ip->i_flags_lock);
476c59d87c4SChristoph Hellwig
477977ec4ddSDave Chinner out:
4781aa91d9cSStefan Roesch return kiocb_modified(iocb);
479c59d87c4SChristoph Hellwig }
480c59d87c4SChristoph Hellwig
481acdda3aaSChristoph Hellwig static int
xfs_dio_write_end_io(struct kiocb * iocb,ssize_t size,int error,unsigned flags)482acdda3aaSChristoph Hellwig xfs_dio_write_end_io(
483acdda3aaSChristoph Hellwig struct kiocb *iocb,
484acdda3aaSChristoph Hellwig ssize_t size,
4856fe7b990SMatthew Bobrowski int error,
486acdda3aaSChristoph Hellwig unsigned flags)
487acdda3aaSChristoph Hellwig {
488acdda3aaSChristoph Hellwig struct inode *inode = file_inode(iocb->ki_filp);
489acdda3aaSChristoph Hellwig struct xfs_inode *ip = XFS_I(inode);
490acdda3aaSChristoph Hellwig loff_t offset = iocb->ki_pos;
49173d30d48SChristoph Hellwig unsigned int nofs_flag;
492acdda3aaSChristoph Hellwig
493acdda3aaSChristoph Hellwig trace_xfs_end_io_direct_write(ip, offset, size);
494acdda3aaSChristoph Hellwig
49575c8c50fSDave Chinner if (xfs_is_shutdown(ip->i_mount))
496acdda3aaSChristoph Hellwig return -EIO;
497acdda3aaSChristoph Hellwig
4986fe7b990SMatthew Bobrowski if (error)
4996fe7b990SMatthew Bobrowski return error;
5006fe7b990SMatthew Bobrowski if (!size)
5016fe7b990SMatthew Bobrowski return 0;
502acdda3aaSChristoph Hellwig
503ed5c3e66SDave Chinner /*
504ed5c3e66SDave Chinner * Capture amount written on completion as we can't reliably account
505ed5c3e66SDave Chinner * for it on submission.
506ed5c3e66SDave Chinner */
507ed5c3e66SDave Chinner XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
508ed5c3e66SDave Chinner
50973d30d48SChristoph Hellwig /*
51073d30d48SChristoph Hellwig * We can allocate memory here while doing writeback on behalf of
51173d30d48SChristoph Hellwig * memory reclaim. To avoid memory allocation deadlocks set the
51273d30d48SChristoph Hellwig * task-wide nofs context for the following operations.
51373d30d48SChristoph Hellwig */
51473d30d48SChristoph Hellwig nofs_flag = memalloc_nofs_save();
51573d30d48SChristoph Hellwig
516ee70daabSEryu Guan if (flags & IOMAP_DIO_COW) {
517ee70daabSEryu Guan error = xfs_reflink_end_cow(ip, offset, size);
518ee70daabSEryu Guan if (error)
51973d30d48SChristoph Hellwig goto out;
520ee70daabSEryu Guan }
521ee70daabSEryu Guan
522ee70daabSEryu Guan /*
523ee70daabSEryu Guan * Unwritten conversion updates the in-core isize after extent
524ee70daabSEryu Guan * conversion but before updating the on-disk size. Updating isize any
525ee70daabSEryu Guan * earlier allows a racing dio read to find unwritten extents before
526ee70daabSEryu Guan * they are converted.
527ee70daabSEryu Guan */
52873d30d48SChristoph Hellwig if (flags & IOMAP_DIO_UNWRITTEN) {
52973d30d48SChristoph Hellwig error = xfs_iomap_write_unwritten(ip, offset, size, true);
53073d30d48SChristoph Hellwig goto out;
53173d30d48SChristoph Hellwig }
532ee70daabSEryu Guan
533acdda3aaSChristoph Hellwig /*
534acdda3aaSChristoph Hellwig * We need to update the in-core inode size here so that we don't end up
535acdda3aaSChristoph Hellwig * with the on-disk inode size being outside the in-core inode size. We
536acdda3aaSChristoph Hellwig * have no other method of updating EOF for AIO, so always do it here
537acdda3aaSChristoph Hellwig * if necessary.
538acdda3aaSChristoph Hellwig *
539acdda3aaSChristoph Hellwig * We need to lock the test/set EOF update as we can be racing with
540acdda3aaSChristoph Hellwig * other IO completions here to update the EOF. Failing to serialise
541acdda3aaSChristoph Hellwig * here can result in EOF moving backwards and Bad Things Happen when
542acdda3aaSChristoph Hellwig * that occurs.
543977ec4ddSDave Chinner *
544977ec4ddSDave Chinner * As IO completion only ever extends EOF, we can do an unlocked check
545977ec4ddSDave Chinner * here to avoid taking the spinlock. If we land within the current EOF,
546977ec4ddSDave Chinner * then we do not need to do an extending update at all, and we don't
547977ec4ddSDave Chinner * need to take the lock to check this. If we race with an update moving
548977ec4ddSDave Chinner * EOF, then we'll either still be beyond EOF and need to take the lock,
549977ec4ddSDave Chinner * or we'll be within EOF and we don't need to take it at all.
550acdda3aaSChristoph Hellwig */
551977ec4ddSDave Chinner if (offset + size <= i_size_read(inode))
552977ec4ddSDave Chinner goto out;
553977ec4ddSDave Chinner
554acdda3aaSChristoph Hellwig spin_lock(&ip->i_flags_lock);
555acdda3aaSChristoph Hellwig if (offset + size > i_size_read(inode)) {
556acdda3aaSChristoph Hellwig i_size_write(inode, offset + size);
557acdda3aaSChristoph Hellwig spin_unlock(&ip->i_flags_lock);
558acdda3aaSChristoph Hellwig error = xfs_setfilesize(ip, offset, size);
559ee70daabSEryu Guan } else {
560ee70daabSEryu Guan spin_unlock(&ip->i_flags_lock);
561ee70daabSEryu Guan }
562acdda3aaSChristoph Hellwig
56373d30d48SChristoph Hellwig out:
56473d30d48SChristoph Hellwig memalloc_nofs_restore(nofs_flag);
565acdda3aaSChristoph Hellwig return error;
566acdda3aaSChristoph Hellwig }
567acdda3aaSChristoph Hellwig
568838c4f3dSChristoph Hellwig static const struct iomap_dio_ops xfs_dio_write_ops = {
569838c4f3dSChristoph Hellwig .end_io = xfs_dio_write_end_io,
570838c4f3dSChristoph Hellwig };
571838c4f3dSChristoph Hellwig
572c59d87c4SChristoph Hellwig /*
573caa89dbcSDave Chinner * Handle block aligned direct I/O writes
574c59d87c4SChristoph Hellwig */
575caa89dbcSDave Chinner static noinline ssize_t
xfs_file_dio_write_aligned(struct xfs_inode * ip,struct kiocb * iocb,struct iov_iter * from)576caa89dbcSDave Chinner xfs_file_dio_write_aligned(
577caa89dbcSDave Chinner struct xfs_inode *ip,
578c59d87c4SChristoph Hellwig struct kiocb *iocb,
579b3188919SAl Viro struct iov_iter *from)
580c59d87c4SChristoph Hellwig {
581a1033753SDave Chinner unsigned int iolock = XFS_IOLOCK_SHARED;
582caa89dbcSDave Chinner ssize_t ret;
583c59d87c4SChristoph Hellwig
584d7d84772SCatherine Hoang ret = xfs_ilock_iocb_for_write(iocb, &iolock);
585caa89dbcSDave Chinner if (ret)
586caa89dbcSDave Chinner return ret;
587caa89dbcSDave Chinner ret = xfs_file_write_checks(iocb, from, &iolock);
588caa89dbcSDave Chinner if (ret)
589caa89dbcSDave Chinner goto out_unlock;
590c59d87c4SChristoph Hellwig
5910ee7a3f6SChristoph Hellwig /*
592caa89dbcSDave Chinner * We don't need to hold the IOLOCK exclusively across the IO, so demote
593caa89dbcSDave Chinner * the iolock back to shared if we had to take the exclusive lock in
594caa89dbcSDave Chinner * xfs_file_write_checks() for other reasons.
5950ee7a3f6SChristoph Hellwig */
596caa89dbcSDave Chinner if (iolock == XFS_IOLOCK_EXCL) {
597caa89dbcSDave Chinner xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
598caa89dbcSDave Chinner iolock = XFS_IOLOCK_SHARED;
599caa89dbcSDave Chinner }
600caa89dbcSDave Chinner trace_xfs_file_direct_write(iocb, from);
601caa89dbcSDave Chinner ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
602786f847fSChristoph Hellwig &xfs_dio_write_ops, 0, NULL, 0);
603caa89dbcSDave Chinner out_unlock:
604caa89dbcSDave Chinner if (iolock)
605caa89dbcSDave Chinner xfs_iunlock(ip, iolock);
606caa89dbcSDave Chinner return ret;
607caa89dbcSDave Chinner }
60854a4ef8aSChristoph Hellwig
60954a4ef8aSChristoph Hellwig /*
610caa89dbcSDave Chinner * Handle block unaligned direct I/O writes
611caa89dbcSDave Chinner *
612caa89dbcSDave Chinner * In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
613caa89dbcSDave Chinner * them to be done in parallel with reads and other direct I/O writes. However,
614caa89dbcSDave Chinner * if the I/O is not aligned to filesystem blocks, the direct I/O layer may need
615caa89dbcSDave Chinner * to do sub-block zeroing and that requires serialisation against other direct
616caa89dbcSDave Chinner * I/O to the same block. In this case we need to serialise the submission of
617caa89dbcSDave Chinner * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
618ed1128c2SDave Chinner * In the case where sub-block zeroing is not required, we can do concurrent
619ed1128c2SDave Chinner * sub-block dios to the same block successfully.
620caa89dbcSDave Chinner *
621ed1128c2SDave Chinner * Optimistically submit the I/O using the shared lock first, but use the
622ed1128c2SDave Chinner * IOMAP_DIO_OVERWRITE_ONLY flag to tell the lower layers to return -EAGAIN
623ed1128c2SDave Chinner * if block allocation or partial block zeroing would be required. In that case
624ed1128c2SDave Chinner * we try again with the exclusive lock.
625caa89dbcSDave Chinner */
626caa89dbcSDave Chinner static noinline ssize_t
xfs_file_dio_write_unaligned(struct xfs_inode * ip,struct kiocb * iocb,struct iov_iter * from)627caa89dbcSDave Chinner xfs_file_dio_write_unaligned(
628caa89dbcSDave Chinner struct xfs_inode *ip,
629caa89dbcSDave Chinner struct kiocb *iocb,
630caa89dbcSDave Chinner struct iov_iter *from)
631caa89dbcSDave Chinner {
632ed1128c2SDave Chinner size_t isize = i_size_read(VFS_I(ip));
633ed1128c2SDave Chinner size_t count = iov_iter_count(from);
634a1033753SDave Chinner unsigned int iolock = XFS_IOLOCK_SHARED;
635ed1128c2SDave Chinner unsigned int flags = IOMAP_DIO_OVERWRITE_ONLY;
636caa89dbcSDave Chinner ssize_t ret;
637caa89dbcSDave Chinner
638ed1128c2SDave Chinner /*
639ed1128c2SDave Chinner * Extending writes need exclusivity because of the sub-block zeroing
640ed1128c2SDave Chinner * that the DIO code always does for partial tail blocks beyond EOF, so
641ed1128c2SDave Chinner * don't even bother trying the fast path in this case.
642ed1128c2SDave Chinner */
643ed1128c2SDave Chinner if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
644caa89dbcSDave Chinner if (iocb->ki_flags & IOCB_NOWAIT)
645caa89dbcSDave Chinner return -EAGAIN;
64693e6aa43SKaixu Xia retry_exclusive:
647ed1128c2SDave Chinner iolock = XFS_IOLOCK_EXCL;
648ed1128c2SDave Chinner flags = IOMAP_DIO_FORCE_WAIT;
649ed1128c2SDave Chinner }
650ed1128c2SDave Chinner
651d7d84772SCatherine Hoang ret = xfs_ilock_iocb_for_write(iocb, &iolock);
652ed1128c2SDave Chinner if (ret)
653ed1128c2SDave Chinner return ret;
654caa89dbcSDave Chinner
655caa89dbcSDave Chinner /*
656caa89dbcSDave Chinner * We can't properly handle unaligned direct I/O to reflink files yet,
657caa89dbcSDave Chinner * as we can't unshare a partial block.
65854a4ef8aSChristoph Hellwig */
65966ae56a5SChristoph Hellwig if (xfs_is_cow_inode(ip)) {
660896f72d0SChristoph Hellwig trace_xfs_reflink_bounce_dio_write(iocb, from);
661caa89dbcSDave Chinner ret = -ENOTBLK;
662caa89dbcSDave Chinner goto out_unlock;
66329a5d29eSGoldwyn Rodrigues }
6640ee7a3f6SChristoph Hellwig
665ee1b218bSChristoph Hellwig ret = xfs_file_write_checks(iocb, from, &iolock);
666c59d87c4SChristoph Hellwig if (ret)
667caa89dbcSDave Chinner goto out_unlock;
668c59d87c4SChristoph Hellwig
669c59d87c4SChristoph Hellwig /*
670ed1128c2SDave Chinner * If we are doing exclusive unaligned I/O, this must be the only I/O
671ed1128c2SDave Chinner * in-flight. Otherwise we risk data corruption due to unwritten extent
672ed1128c2SDave Chinner * conversions from the AIO end_io handler. Wait for all other I/O to
673ed1128c2SDave Chinner * drain first.
674c59d87c4SChristoph Hellwig */
675ed1128c2SDave Chinner if (flags & IOMAP_DIO_FORCE_WAIT)
676caa89dbcSDave Chinner inode_dio_wait(VFS_I(ip));
677c59d87c4SChristoph Hellwig
6783e40b13cSChristoph Hellwig trace_xfs_file_direct_write(iocb, from);
679f150b423SChristoph Hellwig ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
680786f847fSChristoph Hellwig &xfs_dio_write_ops, flags, NULL, 0);
681ed1128c2SDave Chinner
682ed1128c2SDave Chinner /*
683ed1128c2SDave Chinner * Retry unaligned I/O with exclusive blocking semantics if the DIO
684ed1128c2SDave Chinner * layer rejected it for mapping or locking reasons. If we are doing
685ed1128c2SDave Chinner * nonblocking user I/O, propagate the error.
686ed1128c2SDave Chinner */
687ed1128c2SDave Chinner if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
688ed1128c2SDave Chinner ASSERT(flags & IOMAP_DIO_OVERWRITE_ONLY);
689ed1128c2SDave Chinner xfs_iunlock(ip, iolock);
690ed1128c2SDave Chinner goto retry_exclusive;
691ed1128c2SDave Chinner }
692ed1128c2SDave Chinner
693caa89dbcSDave Chinner out_unlock:
694354be7e3SChristoph Hellwig if (iolock)
69565523218SChristoph Hellwig xfs_iunlock(ip, iolock);
69616d4d435SChristoph Hellwig return ret;
69716d4d435SChristoph Hellwig }
69816d4d435SChristoph Hellwig
699caa89dbcSDave Chinner static ssize_t
xfs_file_dio_write(struct kiocb * iocb,struct iov_iter * from)700caa89dbcSDave Chinner xfs_file_dio_write(
701caa89dbcSDave Chinner struct kiocb *iocb,
702caa89dbcSDave Chinner struct iov_iter *from)
703caa89dbcSDave Chinner {
704caa89dbcSDave Chinner struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
705caa89dbcSDave Chinner struct xfs_buftarg *target = xfs_inode_buftarg(ip);
706caa89dbcSDave Chinner size_t count = iov_iter_count(from);
707caa89dbcSDave Chinner
708caa89dbcSDave Chinner /* direct I/O must be aligned to device logical sector size */
709caa89dbcSDave Chinner if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
710caa89dbcSDave Chinner return -EINVAL;
711caa89dbcSDave Chinner if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
712caa89dbcSDave Chinner return xfs_file_dio_write_unaligned(ip, iocb, from);
713caa89dbcSDave Chinner return xfs_file_dio_write_aligned(ip, iocb, from);
714caa89dbcSDave Chinner }
715caa89dbcSDave Chinner
716f021bd07SArnd Bergmann static noinline ssize_t
xfs_file_dax_write(struct kiocb * iocb,struct iov_iter * from)71716d4d435SChristoph Hellwig xfs_file_dax_write(
71816d4d435SChristoph Hellwig struct kiocb *iocb,
71916d4d435SChristoph Hellwig struct iov_iter *from)
72016d4d435SChristoph Hellwig {
7216c31f495SChristoph Hellwig struct inode *inode = iocb->ki_filp->f_mapping->host;
72216d4d435SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode);
723a1033753SDave Chinner unsigned int iolock = XFS_IOLOCK_EXCL;
7246c31f495SChristoph Hellwig ssize_t ret, error = 0;
7256c31f495SChristoph Hellwig loff_t pos;
72616d4d435SChristoph Hellwig
727f50b8f47SChristoph Hellwig ret = xfs_ilock_iocb(iocb, iolock);
728f50b8f47SChristoph Hellwig if (ret)
729f50b8f47SChristoph Hellwig return ret;
730ee1b218bSChristoph Hellwig ret = xfs_file_write_checks(iocb, from, &iolock);
73116d4d435SChristoph Hellwig if (ret)
73216d4d435SChristoph Hellwig goto out;
73316d4d435SChristoph Hellwig
7346c31f495SChristoph Hellwig pos = iocb->ki_pos;
7358b2180b3SDave Chinner
7363e40b13cSChristoph Hellwig trace_xfs_file_dax_write(iocb, from);
737ea6c49b7SShiyang Ruan ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops);
7386c31f495SChristoph Hellwig if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
7396c31f495SChristoph Hellwig i_size_write(inode, iocb->ki_pos);
7406c31f495SChristoph Hellwig error = xfs_setfilesize(ip, pos, ret);
74116d4d435SChristoph Hellwig }
74216d4d435SChristoph Hellwig out:
743354be7e3SChristoph Hellwig if (iolock)
74465523218SChristoph Hellwig xfs_iunlock(ip, iolock);
745ed5c3e66SDave Chinner if (error)
746ed5c3e66SDave Chinner return error;
747ed5c3e66SDave Chinner
748ed5c3e66SDave Chinner if (ret > 0) {
749ed5c3e66SDave Chinner XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
750ed5c3e66SDave Chinner
751ed5c3e66SDave Chinner /* Handle various SYNC-type writes */
752ed5c3e66SDave Chinner ret = generic_write_sync(iocb, ret);
753ed5c3e66SDave Chinner }
754ed5c3e66SDave Chinner return ret;
755c59d87c4SChristoph Hellwig }
756c59d87c4SChristoph Hellwig
757c59d87c4SChristoph Hellwig STATIC ssize_t
xfs_file_buffered_write(struct kiocb * iocb,struct iov_iter * from)758ee1b218bSChristoph Hellwig xfs_file_buffered_write(
759c59d87c4SChristoph Hellwig struct kiocb *iocb,
760b3188919SAl Viro struct iov_iter *from)
761c59d87c4SChristoph Hellwig {
7622d9ac431SKaixu Xia struct inode *inode = iocb->ki_filp->f_mapping->host;
763c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode);
764c59d87c4SChristoph Hellwig ssize_t ret;
765a636b1d1SDarrick J. Wong bool cleared_space = false;
766a1033753SDave Chinner unsigned int iolock;
767c59d87c4SChristoph Hellwig
768c3155097SBrian Foster write_retry:
769c3155097SBrian Foster iolock = XFS_IOLOCK_EXCL;
7701aa91d9cSStefan Roesch ret = xfs_ilock_iocb(iocb, iolock);
7711aa91d9cSStefan Roesch if (ret)
7721aa91d9cSStefan Roesch return ret;
773c59d87c4SChristoph Hellwig
774ee1b218bSChristoph Hellwig ret = xfs_file_write_checks(iocb, from, &iolock);
775c59d87c4SChristoph Hellwig if (ret)
776d0606464SChristoph Hellwig goto out;
777c59d87c4SChristoph Hellwig
7783e40b13cSChristoph Hellwig trace_xfs_file_buffered_write(iocb, from);
779f150b423SChristoph Hellwig ret = iomap_file_buffered_write(iocb, from,
780f150b423SChristoph Hellwig &xfs_buffered_write_iomap_ops);
781dc06f398SBrian Foster
782c59d87c4SChristoph Hellwig /*
783dc06f398SBrian Foster * If we hit a space limit, try to free up some lingering preallocated
784dc06f398SBrian Foster * space before returning an error. In the case of ENOSPC, first try to
785dc06f398SBrian Foster * write back all dirty inodes to free up some of the excess reserved
786dc06f398SBrian Foster * metadata space. This reduces the chances that the eofblocks scan
787dc06f398SBrian Foster * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
788dc06f398SBrian Foster * also behaves as a filter to prevent too many eofblocks scans from
789111068f8SDarrick J. Wong * running at the same time. Use a synchronous scan to increase the
790111068f8SDarrick J. Wong * effectiveness of the scan.
791c59d87c4SChristoph Hellwig */
792a636b1d1SDarrick J. Wong if (ret == -EDQUOT && !cleared_space) {
793c3155097SBrian Foster xfs_iunlock(ip, iolock);
7942d53f66bSDarrick J. Wong xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
795111068f8SDarrick J. Wong cleared_space = true;
796dc06f398SBrian Foster goto write_retry;
797a636b1d1SDarrick J. Wong } else if (ret == -ENOSPC && !cleared_space) {
798b26b2bf1SDarrick J. Wong struct xfs_icwalk icw = {0};
799dc06f398SBrian Foster
800a636b1d1SDarrick J. Wong cleared_space = true;
8019aa05000SDave Chinner xfs_flush_inodes(ip->i_mount);
802c3155097SBrian Foster
803c3155097SBrian Foster xfs_iunlock(ip, iolock);
804b26b2bf1SDarrick J. Wong icw.icw_flags = XFS_ICWALK_FLAG_SYNC;
805b26b2bf1SDarrick J. Wong xfs_blockgc_free_space(ip->i_mount, &icw);
806c59d87c4SChristoph Hellwig goto write_retry;
807c59d87c4SChristoph Hellwig }
808d0606464SChristoph Hellwig
809d0606464SChristoph Hellwig out:
810c3155097SBrian Foster if (iolock)
81165523218SChristoph Hellwig xfs_iunlock(ip, iolock);
812ed5c3e66SDave Chinner
813ed5c3e66SDave Chinner if (ret > 0) {
814ed5c3e66SDave Chinner XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
815ed5c3e66SDave Chinner /* Handle various SYNC-type writes */
816ed5c3e66SDave Chinner ret = generic_write_sync(iocb, ret);
817ed5c3e66SDave Chinner }
818c59d87c4SChristoph Hellwig return ret;
819c59d87c4SChristoph Hellwig }
820c59d87c4SChristoph Hellwig
821c59d87c4SChristoph Hellwig STATIC ssize_t
xfs_file_write_iter(struct kiocb * iocb,struct iov_iter * from)822bf97f3bcSAl Viro xfs_file_write_iter(
823c59d87c4SChristoph Hellwig struct kiocb *iocb,
824bf97f3bcSAl Viro struct iov_iter *from)
825c59d87c4SChristoph Hellwig {
8262d9ac431SKaixu Xia struct inode *inode = iocb->ki_filp->f_mapping->host;
827c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode);
828c59d87c4SChristoph Hellwig ssize_t ret;
829bf97f3bcSAl Viro size_t ocount = iov_iter_count(from);
830c59d87c4SChristoph Hellwig
831ff6d6af2SBill O'Donnell XFS_STATS_INC(ip->i_mount, xs_write_calls);
832c59d87c4SChristoph Hellwig
833c59d87c4SChristoph Hellwig if (ocount == 0)
834c59d87c4SChristoph Hellwig return 0;
835c59d87c4SChristoph Hellwig
83675c8c50fSDave Chinner if (xfs_is_shutdown(ip->i_mount))
837bf97f3bcSAl Viro return -EIO;
838c59d87c4SChristoph Hellwig
83916d4d435SChristoph Hellwig if (IS_DAX(inode))
840ed5c3e66SDave Chinner return xfs_file_dax_write(iocb, from);
841ed5c3e66SDave Chinner
842ed5c3e66SDave Chinner if (iocb->ki_flags & IOCB_DIRECT) {
8430613f16cSDarrick J. Wong /*
8440613f16cSDarrick J. Wong * Allow a directio write to fall back to a buffered
8450613f16cSDarrick J. Wong * write *only* in the case that we're doing a reflink
8460613f16cSDarrick J. Wong * CoW. In all other directio scenarios we do not
8470613f16cSDarrick J. Wong * allow an operation to fall back to buffered mode.
8480613f16cSDarrick J. Wong */
849ee1b218bSChristoph Hellwig ret = xfs_file_dio_write(iocb, from);
85080e543aeSChristoph Hellwig if (ret != -ENOTBLK)
851c59d87c4SChristoph Hellwig return ret;
852c59d87c4SChristoph Hellwig }
853c59d87c4SChristoph Hellwig
854ee1b218bSChristoph Hellwig return xfs_file_buffered_write(iocb, from);
855ed5c3e66SDave Chinner }
856ed5c3e66SDave Chinner
857d6dc57e2SDan Williams static void
xfs_wait_dax_page(struct inode * inode)858d6dc57e2SDan Williams xfs_wait_dax_page(
859e25ff835SDave Jiang struct inode *inode)
860d6dc57e2SDan Williams {
861d6dc57e2SDan Williams struct xfs_inode *ip = XFS_I(inode);
862d6dc57e2SDan Williams
863d6dc57e2SDan Williams xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
864d6dc57e2SDan Williams schedule();
865d6dc57e2SDan Williams xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
866d6dc57e2SDan Williams }
867d6dc57e2SDan Williams
86813f9e267SShiyang Ruan int
xfs_break_dax_layouts(struct inode * inode,bool * retry)869d6dc57e2SDan Williams xfs_break_dax_layouts(
870d6dc57e2SDan Williams struct inode *inode,
871e25ff835SDave Jiang bool *retry)
872d6dc57e2SDan Williams {
873d6dc57e2SDan Williams struct page *page;
874d6dc57e2SDan Williams
875d6dc57e2SDan Williams ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
876d6dc57e2SDan Williams
877d6dc57e2SDan Williams page = dax_layout_busy_page(inode->i_mapping);
878d6dc57e2SDan Williams if (!page)
879d6dc57e2SDan Williams return 0;
880d6dc57e2SDan Williams
881e25ff835SDave Jiang *retry = true;
882d6dc57e2SDan Williams return ___wait_var_event(&page->_refcount,
883d6dc57e2SDan Williams atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
884e25ff835SDave Jiang 0, 0, xfs_wait_dax_page(inode));
885d6dc57e2SDan Williams }
886d6dc57e2SDan Williams
88769eb5fa1SDan Williams int
xfs_break_layouts(struct inode * inode,uint * iolock,enum layout_break_reason reason)88869eb5fa1SDan Williams xfs_break_layouts(
88969eb5fa1SDan Williams struct inode *inode,
89069eb5fa1SDan Williams uint *iolock,
89169eb5fa1SDan Williams enum layout_break_reason reason)
89269eb5fa1SDan Williams {
89369eb5fa1SDan Williams bool retry;
894d6dc57e2SDan Williams int error;
89569eb5fa1SDan Williams
89669eb5fa1SDan Williams ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
89769eb5fa1SDan Williams
898d6dc57e2SDan Williams do {
899d6dc57e2SDan Williams retry = false;
90069eb5fa1SDan Williams switch (reason) {
90169eb5fa1SDan Williams case BREAK_UNMAP:
902a4722a64SEric Sandeen error = xfs_break_dax_layouts(inode, &retry);
903d6dc57e2SDan Williams if (error || retry)
904d6dc57e2SDan Williams break;
90553004ee7SGustavo A. R. Silva fallthrough;
90669eb5fa1SDan Williams case BREAK_WRITE:
907d6dc57e2SDan Williams error = xfs_break_leased_layouts(inode, iolock, &retry);
908d6dc57e2SDan Williams break;
90969eb5fa1SDan Williams default:
91069eb5fa1SDan Williams WARN_ON_ONCE(1);
911d6dc57e2SDan Williams error = -EINVAL;
91269eb5fa1SDan Williams }
913d6dc57e2SDan Williams } while (error == 0 && retry);
914d6dc57e2SDan Williams
915d6dc57e2SDan Williams return error;
91669eb5fa1SDan Williams }
91769eb5fa1SDan Williams
918cea267c2SDave Chinner /* Does this file, inode, or mount want synchronous writes? */
xfs_file_sync_writes(struct file * filp)919cea267c2SDave Chinner static inline bool xfs_file_sync_writes(struct file *filp)
920cea267c2SDave Chinner {
921cea267c2SDave Chinner struct xfs_inode *ip = XFS_I(file_inode(filp));
922cea267c2SDave Chinner
923cea267c2SDave Chinner if (xfs_has_wsync(ip->i_mount))
924cea267c2SDave Chinner return true;
925cea267c2SDave Chinner if (filp->f_flags & (__O_SYNC | O_DSYNC))
926cea267c2SDave Chinner return true;
927cea267c2SDave Chinner if (IS_SYNC(file_inode(filp)))
928cea267c2SDave Chinner return true;
929cea267c2SDave Chinner
930cea267c2SDave Chinner return false;
931cea267c2SDave Chinner }
932cea267c2SDave Chinner
933a904b1caSNamjae Jeon #define XFS_FALLOC_FL_SUPPORTED \
934a904b1caSNamjae Jeon (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
935a904b1caSNamjae Jeon FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
93698cc2db5SDarrick J. Wong FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
937a904b1caSNamjae Jeon
938c59d87c4SChristoph Hellwig STATIC long
xfs_file_fallocate(struct file * file,int mode,loff_t offset,loff_t len)939c59d87c4SChristoph Hellwig xfs_file_fallocate(
940c59d87c4SChristoph Hellwig struct file *file,
941c59d87c4SChristoph Hellwig int mode,
942c59d87c4SChristoph Hellwig loff_t offset,
943c59d87c4SChristoph Hellwig loff_t len)
944c59d87c4SChristoph Hellwig {
945496ad9aaSAl Viro struct inode *inode = file_inode(file);
94683aee9e4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode);
947c59d87c4SChristoph Hellwig long error;
948c63a8eaeSDan Williams uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
949c59d87c4SChristoph Hellwig loff_t new_size = 0;
950749f24f3SThomas Meyer bool do_file_insert = false;
951c59d87c4SChristoph Hellwig
95283aee9e4SChristoph Hellwig if (!S_ISREG(inode->i_mode))
95383aee9e4SChristoph Hellwig return -EINVAL;
954a904b1caSNamjae Jeon if (mode & ~XFS_FALLOC_FL_SUPPORTED)
955c59d87c4SChristoph Hellwig return -EOPNOTSUPP;
956c59d87c4SChristoph Hellwig
957781355c6SChristoph Hellwig xfs_ilock(ip, iolock);
95869eb5fa1SDan Williams error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
959781355c6SChristoph Hellwig if (error)
960781355c6SChristoph Hellwig goto out_unlock;
961781355c6SChristoph Hellwig
962249bd908SDave Chinner /*
963249bd908SDave Chinner * Must wait for all AIO to complete before we continue as AIO can
964249bd908SDave Chinner * change the file size on completion without holding any locks we
965249bd908SDave Chinner * currently hold. We must do this first because AIO can update both
966249bd908SDave Chinner * the on disk and in memory inode sizes, and the operations that follow
967249bd908SDave Chinner * require the in-memory size to be fully up-to-date.
968249bd908SDave Chinner */
969249bd908SDave Chinner inode_dio_wait(inode);
970249bd908SDave Chinner
971249bd908SDave Chinner /*
972249bd908SDave Chinner * Now AIO and DIO has drained we flush and (if necessary) invalidate
973249bd908SDave Chinner * the cached range over the first operation we are about to run.
974249bd908SDave Chinner *
975249bd908SDave Chinner * We care about zero and collapse here because they both run a hole
976249bd908SDave Chinner * punch over the range first. Because that can zero data, and the range
977249bd908SDave Chinner * of invalidation for the shift operations is much larger, we still do
978249bd908SDave Chinner * the required flush for collapse in xfs_prepare_shift().
979249bd908SDave Chinner *
980249bd908SDave Chinner * Insert has the same range requirements as collapse, and we extend the
981249bd908SDave Chinner * file first which can zero data. Hence insert has the same
982249bd908SDave Chinner * flush/invalidate requirements as collapse and so they are both
983249bd908SDave Chinner * handled at the right time by xfs_prepare_shift().
984249bd908SDave Chinner */
985249bd908SDave Chinner if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
986249bd908SDave Chinner FALLOC_FL_COLLAPSE_RANGE)) {
987249bd908SDave Chinner error = xfs_flush_unmap_range(ip, offset, len);
988249bd908SDave Chinner if (error)
989249bd908SDave Chinner goto out_unlock;
990249bd908SDave Chinner }
991249bd908SDave Chinner
992fbe7e520SDave Chinner error = file_modified(file);
993fbe7e520SDave Chinner if (error)
994fbe7e520SDave Chinner goto out_unlock;
995fbe7e520SDave Chinner
99683aee9e4SChristoph Hellwig if (mode & FALLOC_FL_PUNCH_HOLE) {
99783aee9e4SChristoph Hellwig error = xfs_free_file_space(ip, offset, len);
99883aee9e4SChristoph Hellwig if (error)
99983aee9e4SChristoph Hellwig goto out_unlock;
1000e1d8fb88SNamjae Jeon } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
100125219dbfSDarrick J. Wong if (!xfs_is_falloc_aligned(ip, offset, len)) {
10022451337dSDave Chinner error = -EINVAL;
1003e1d8fb88SNamjae Jeon goto out_unlock;
1004e1d8fb88SNamjae Jeon }
1005e1d8fb88SNamjae Jeon
100623fffa92SLukas Czerner /*
100723fffa92SLukas Czerner * There is no need to overlap collapse range with EOF,
100823fffa92SLukas Czerner * in which case it is effectively a truncate operation
100923fffa92SLukas Czerner */
101023fffa92SLukas Czerner if (offset + len >= i_size_read(inode)) {
10112451337dSDave Chinner error = -EINVAL;
101223fffa92SLukas Czerner goto out_unlock;
101323fffa92SLukas Czerner }
101423fffa92SLukas Czerner
1015e1d8fb88SNamjae Jeon new_size = i_size_read(inode) - len;
1016e1d8fb88SNamjae Jeon
1017e1d8fb88SNamjae Jeon error = xfs_collapse_file_space(ip, offset, len);
1018e1d8fb88SNamjae Jeon if (error)
1019e1d8fb88SNamjae Jeon goto out_unlock;
1020a904b1caSNamjae Jeon } else if (mode & FALLOC_FL_INSERT_RANGE) {
10217d83fb14SDarrick J. Wong loff_t isize = i_size_read(inode);
1022a904b1caSNamjae Jeon
102325219dbfSDarrick J. Wong if (!xfs_is_falloc_aligned(ip, offset, len)) {
1024a904b1caSNamjae Jeon error = -EINVAL;
1025a904b1caSNamjae Jeon goto out_unlock;
1026a904b1caSNamjae Jeon }
1027a904b1caSNamjae Jeon
10287d83fb14SDarrick J. Wong /*
10297d83fb14SDarrick J. Wong * New inode size must not exceed ->s_maxbytes, accounting for
10307d83fb14SDarrick J. Wong * possible signed overflow.
10317d83fb14SDarrick J. Wong */
10327d83fb14SDarrick J. Wong if (inode->i_sb->s_maxbytes - isize < len) {
1033a904b1caSNamjae Jeon error = -EFBIG;
1034a904b1caSNamjae Jeon goto out_unlock;
1035a904b1caSNamjae Jeon }
10367d83fb14SDarrick J. Wong new_size = isize + len;
1037a904b1caSNamjae Jeon
1038a904b1caSNamjae Jeon /* Offset should be less than i_size */
10397d83fb14SDarrick J. Wong if (offset >= isize) {
1040a904b1caSNamjae Jeon error = -EINVAL;
1041a904b1caSNamjae Jeon goto out_unlock;
1042a904b1caSNamjae Jeon }
1043749f24f3SThomas Meyer do_file_insert = true;
104483aee9e4SChristoph Hellwig } else {
1045c59d87c4SChristoph Hellwig if (!(mode & FALLOC_FL_KEEP_SIZE) &&
1046c59d87c4SChristoph Hellwig offset + len > i_size_read(inode)) {
1047c59d87c4SChristoph Hellwig new_size = offset + len;
10482451337dSDave Chinner error = inode_newsize_ok(inode, new_size);
1049c59d87c4SChristoph Hellwig if (error)
1050c59d87c4SChristoph Hellwig goto out_unlock;
1051c59d87c4SChristoph Hellwig }
1052c59d87c4SChristoph Hellwig
105366ae56a5SChristoph Hellwig if (mode & FALLOC_FL_ZERO_RANGE) {
1054360c09c0SChristoph Hellwig /*
1055360c09c0SChristoph Hellwig * Punch a hole and prealloc the range. We use a hole
1056360c09c0SChristoph Hellwig * punch rather than unwritten extent conversion for two
1057360c09c0SChristoph Hellwig * reasons:
1058360c09c0SChristoph Hellwig *
1059360c09c0SChristoph Hellwig * 1.) Hole punch handles partial block zeroing for us.
1060360c09c0SChristoph Hellwig * 2.) If prealloc returns ENOSPC, the file range is
1061360c09c0SChristoph Hellwig * still zero-valued by virtue of the hole punch.
1062360c09c0SChristoph Hellwig */
1063360c09c0SChristoph Hellwig unsigned int blksize = i_blocksize(inode);
1064360c09c0SChristoph Hellwig
1065360c09c0SChristoph Hellwig trace_xfs_zero_file_space(ip);
1066360c09c0SChristoph Hellwig
1067360c09c0SChristoph Hellwig error = xfs_free_file_space(ip, offset, len);
1068360c09c0SChristoph Hellwig if (error)
1069360c09c0SChristoph Hellwig goto out_unlock;
1070360c09c0SChristoph Hellwig
1071360c09c0SChristoph Hellwig len = round_up(offset + len, blksize) -
1072360c09c0SChristoph Hellwig round_down(offset, blksize);
1073360c09c0SChristoph Hellwig offset = round_down(offset, blksize);
107466ae56a5SChristoph Hellwig } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
107598cc2db5SDarrick J. Wong error = xfs_reflink_unshare(ip, offset, len);
107698cc2db5SDarrick J. Wong if (error)
107798cc2db5SDarrick J. Wong goto out_unlock;
107866ae56a5SChristoph Hellwig } else {
107966ae56a5SChristoph Hellwig /*
108066ae56a5SChristoph Hellwig * If always_cow mode we can't use preallocations and
108166ae56a5SChristoph Hellwig * thus should not create them.
108266ae56a5SChristoph Hellwig */
108366ae56a5SChristoph Hellwig if (xfs_is_always_cow_inode(ip)) {
108466ae56a5SChristoph Hellwig error = -EOPNOTSUPP;
108566ae56a5SChristoph Hellwig goto out_unlock;
108666ae56a5SChristoph Hellwig }
1087360c09c0SChristoph Hellwig }
108866ae56a5SChristoph Hellwig
1089360c09c0SChristoph Hellwig if (!xfs_is_always_cow_inode(ip)) {
10904d1b97f9SDarrick J. Wong error = xfs_alloc_file_space(ip, offset, len);
109183aee9e4SChristoph Hellwig if (error)
109283aee9e4SChristoph Hellwig goto out_unlock;
109383aee9e4SChristoph Hellwig }
1094fbe7e520SDave Chinner }
1095fbe7e520SDave Chinner
1096c59d87c4SChristoph Hellwig /* Change file size if needed */
1097c59d87c4SChristoph Hellwig if (new_size) {
1098c59d87c4SChristoph Hellwig struct iattr iattr;
1099c59d87c4SChristoph Hellwig
1100c59d87c4SChristoph Hellwig iattr.ia_valid = ATTR_SIZE;
1101c59d87c4SChristoph Hellwig iattr.ia_size = new_size;
1102c1632a0fSChristian Brauner error = xfs_vn_setattr_size(file_mnt_idmap(file),
1103f736d93dSChristoph Hellwig file_dentry(file), &iattr);
1104a904b1caSNamjae Jeon if (error)
1105a904b1caSNamjae Jeon goto out_unlock;
1106c59d87c4SChristoph Hellwig }
1107c59d87c4SChristoph Hellwig
1108a904b1caSNamjae Jeon /*
1109a904b1caSNamjae Jeon * Perform hole insertion now that the file size has been
1110a904b1caSNamjae Jeon * updated so that if we crash during the operation we don't
1111a904b1caSNamjae Jeon * leave shifted extents past EOF and hence losing access to
1112a904b1caSNamjae Jeon * the data that is contained within them.
1113a904b1caSNamjae Jeon */
1114472c6e46SDave Chinner if (do_file_insert) {
1115a904b1caSNamjae Jeon error = xfs_insert_file_space(ip, offset, len);
1116472c6e46SDave Chinner if (error)
1117472c6e46SDave Chinner goto out_unlock;
1118472c6e46SDave Chinner }
1119472c6e46SDave Chinner
1120cea267c2SDave Chinner if (xfs_file_sync_writes(file))
1121472c6e46SDave Chinner error = xfs_log_force_inode(ip);
1122a904b1caSNamjae Jeon
1123c59d87c4SChristoph Hellwig out_unlock:
1124781355c6SChristoph Hellwig xfs_iunlock(ip, iolock);
11252451337dSDave Chinner return error;
1126c59d87c4SChristoph Hellwig }
1127c59d87c4SChristoph Hellwig
112840144e49SJan Kara STATIC int
xfs_file_fadvise(struct file * file,loff_t start,loff_t end,int advice)112940144e49SJan Kara xfs_file_fadvise(
113040144e49SJan Kara struct file *file,
113140144e49SJan Kara loff_t start,
113240144e49SJan Kara loff_t end,
113340144e49SJan Kara int advice)
113440144e49SJan Kara {
113540144e49SJan Kara struct xfs_inode *ip = XFS_I(file_inode(file));
113640144e49SJan Kara int ret;
113740144e49SJan Kara int lockflags = 0;
113840144e49SJan Kara
113940144e49SJan Kara /*
114040144e49SJan Kara * Operations creating pages in page cache need protection from hole
114140144e49SJan Kara * punching and similar ops
114240144e49SJan Kara */
114340144e49SJan Kara if (advice == POSIX_FADV_WILLNEED) {
114440144e49SJan Kara lockflags = XFS_IOLOCK_SHARED;
114540144e49SJan Kara xfs_ilock(ip, lockflags);
114640144e49SJan Kara }
114740144e49SJan Kara ret = generic_fadvise(file, start, end, advice);
114840144e49SJan Kara if (lockflags)
114940144e49SJan Kara xfs_iunlock(ip, lockflags);
115040144e49SJan Kara return ret;
115140144e49SJan Kara }
11523fc9f5e4SDarrick J. Wong
1153da034bccSEric Biggers STATIC loff_t
xfs_file_remap_range(struct file * file_in,loff_t pos_in,struct file * file_out,loff_t pos_out,loff_t len,unsigned int remap_flags)11542e5dfc99SDarrick J. Wong xfs_file_remap_range(
11559fe26045SDarrick J. Wong struct file *file_in,
11569fe26045SDarrick J. Wong loff_t pos_in,
11579fe26045SDarrick J. Wong struct file *file_out,
11589fe26045SDarrick J. Wong loff_t pos_out,
115942ec3d4cSDarrick J. Wong loff_t len,
11602e5dfc99SDarrick J. Wong unsigned int remap_flags)
11619fe26045SDarrick J. Wong {
11623fc9f5e4SDarrick J. Wong struct inode *inode_in = file_inode(file_in);
11633fc9f5e4SDarrick J. Wong struct xfs_inode *src = XFS_I(inode_in);
11643fc9f5e4SDarrick J. Wong struct inode *inode_out = file_inode(file_out);
11653fc9f5e4SDarrick J. Wong struct xfs_inode *dest = XFS_I(inode_out);
11663fc9f5e4SDarrick J. Wong struct xfs_mount *mp = src->i_mount;
11673fc9f5e4SDarrick J. Wong loff_t remapped = 0;
11683fc9f5e4SDarrick J. Wong xfs_extlen_t cowextsize;
11693fc9f5e4SDarrick J. Wong int ret;
11703fc9f5e4SDarrick J. Wong
11712e5dfc99SDarrick J. Wong if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
11722e5dfc99SDarrick J. Wong return -EINVAL;
1173cc714660SDarrick J. Wong
117438c26bfdSDave Chinner if (!xfs_has_reflink(mp))
11753fc9f5e4SDarrick J. Wong return -EOPNOTSUPP;
11763fc9f5e4SDarrick J. Wong
117775c8c50fSDave Chinner if (xfs_is_shutdown(mp))
11783fc9f5e4SDarrick J. Wong return -EIO;
11793fc9f5e4SDarrick J. Wong
11803fc9f5e4SDarrick J. Wong /* Prepare and then clone file data. */
11813fc9f5e4SDarrick J. Wong ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
11823fc9f5e4SDarrick J. Wong &len, remap_flags);
1183451d34eeSDarrick J. Wong if (ret || len == 0)
11843fc9f5e4SDarrick J. Wong return ret;
11853fc9f5e4SDarrick J. Wong
11863fc9f5e4SDarrick J. Wong trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
11873fc9f5e4SDarrick J. Wong
11883fc9f5e4SDarrick J. Wong ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
11893fc9f5e4SDarrick J. Wong &remapped);
11903fc9f5e4SDarrick J. Wong if (ret)
11913fc9f5e4SDarrick J. Wong goto out_unlock;
11923fc9f5e4SDarrick J. Wong
11933fc9f5e4SDarrick J. Wong /*
11943fc9f5e4SDarrick J. Wong * Carry the cowextsize hint from src to dest if we're sharing the
11953fc9f5e4SDarrick J. Wong * entire source file to the entire destination file, the source file
11963fc9f5e4SDarrick J. Wong * has a cowextsize hint, and the destination file does not.
11973fc9f5e4SDarrick J. Wong */
11983fc9f5e4SDarrick J. Wong cowextsize = 0;
11993fc9f5e4SDarrick J. Wong if (pos_in == 0 && len == i_size_read(inode_in) &&
12003e09ab8fSChristoph Hellwig (src->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
12013fc9f5e4SDarrick J. Wong pos_out == 0 && len >= i_size_read(inode_out) &&
12023e09ab8fSChristoph Hellwig !(dest->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE))
1203b33ce57dSChristoph Hellwig cowextsize = src->i_cowextsize;
12043fc9f5e4SDarrick J. Wong
12053fc9f5e4SDarrick J. Wong ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
12063fc9f5e4SDarrick J. Wong remap_flags);
12075833112dSChristoph Hellwig if (ret)
12085833112dSChristoph Hellwig goto out_unlock;
12093fc9f5e4SDarrick J. Wong
12105ffce3ccSDarrick J. Wong if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
12115833112dSChristoph Hellwig xfs_log_force_inode(dest);
12123fc9f5e4SDarrick J. Wong out_unlock:
1213d7d84772SCatherine Hoang xfs_iunlock2_remapping(src, dest);
12143fc9f5e4SDarrick J. Wong if (ret)
12153fc9f5e4SDarrick J. Wong trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
1216549f2fc3SDarrick J. Wong /*
1217549f2fc3SDarrick J. Wong * If the caller did not set CAN_SHORTEN, then it is not prepared to
1218549f2fc3SDarrick J. Wong * handle partial results -- either the whole remap succeeds, or we
1219549f2fc3SDarrick J. Wong * must say why it did not. In this case, any error should be returned
1220549f2fc3SDarrick J. Wong * to the caller.
1221549f2fc3SDarrick J. Wong */
1222549f2fc3SDarrick J. Wong if (ret && remapped < len && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
1223549f2fc3SDarrick J. Wong return ret;
12243fc9f5e4SDarrick J. Wong return remapped > 0 ? remapped : ret;
12259fe26045SDarrick J. Wong }
1226c59d87c4SChristoph Hellwig
1227c59d87c4SChristoph Hellwig STATIC int
xfs_file_open(struct inode * inode,struct file * file)1228c59d87c4SChristoph Hellwig xfs_file_open(
1229c59d87c4SChristoph Hellwig struct inode *inode,
1230c59d87c4SChristoph Hellwig struct file *file)
1231c59d87c4SChristoph Hellwig {
123275c8c50fSDave Chinner if (xfs_is_shutdown(XFS_M(inode->i_sb)))
1233c59d87c4SChristoph Hellwig return -EIO;
1234d8aeb44aSJens Axboe file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
1235b2943499SChristoph Hellwig FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
1236f3bf67c6SMatthew Wilcox (Oracle) return generic_file_open(inode, file);
1237c59d87c4SChristoph Hellwig }
1238c59d87c4SChristoph Hellwig
1239c59d87c4SChristoph Hellwig STATIC int
xfs_dir_open(struct inode * inode,struct file * file)1240c59d87c4SChristoph Hellwig xfs_dir_open(
1241c59d87c4SChristoph Hellwig struct inode *inode,
1242c59d87c4SChristoph Hellwig struct file *file)
1243c59d87c4SChristoph Hellwig {
1244c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode);
1245a1033753SDave Chinner unsigned int mode;
1246c59d87c4SChristoph Hellwig int error;
1247c59d87c4SChristoph Hellwig
1248c59d87c4SChristoph Hellwig error = xfs_file_open(inode, file);
1249c59d87c4SChristoph Hellwig if (error)
1250c59d87c4SChristoph Hellwig return error;
1251c59d87c4SChristoph Hellwig
1252c59d87c4SChristoph Hellwig /*
1253c59d87c4SChristoph Hellwig * If there are any blocks, read-ahead block 0 as we're almost
1254c59d87c4SChristoph Hellwig * certain to have the next operation be a read there.
1255c59d87c4SChristoph Hellwig */
1256309ecac8SChristoph Hellwig mode = xfs_ilock_data_map_shared(ip);
1257daf83964SChristoph Hellwig if (ip->i_df.if_nextents > 0)
125806566fdaSChristoph Hellwig error = xfs_dir3_data_readahead(ip, 0, 0);
1259c59d87c4SChristoph Hellwig xfs_iunlock(ip, mode);
12607a652bbeSDarrick J. Wong return error;
1261c59d87c4SChristoph Hellwig }
1262c59d87c4SChristoph Hellwig
1263c59d87c4SChristoph Hellwig STATIC int
xfs_file_release(struct inode * inode,struct file * filp)1264c59d87c4SChristoph Hellwig xfs_file_release(
1265c59d87c4SChristoph Hellwig struct inode *inode,
1266c59d87c4SChristoph Hellwig struct file *filp)
1267c59d87c4SChristoph Hellwig {
12682451337dSDave Chinner return xfs_release(XFS_I(inode));
1269c59d87c4SChristoph Hellwig }
1270c59d87c4SChristoph Hellwig
1271c59d87c4SChristoph Hellwig STATIC int
xfs_file_readdir(struct file * file,struct dir_context * ctx)1272c59d87c4SChristoph Hellwig xfs_file_readdir(
1273b8227554SAl Viro struct file *file,
1274b8227554SAl Viro struct dir_context *ctx)
1275c59d87c4SChristoph Hellwig {
1276b8227554SAl Viro struct inode *inode = file_inode(file);
1277c59d87c4SChristoph Hellwig xfs_inode_t *ip = XFS_I(inode);
1278c59d87c4SChristoph Hellwig size_t bufsize;
1279c59d87c4SChristoph Hellwig
1280c59d87c4SChristoph Hellwig /*
1281c59d87c4SChristoph Hellwig * The Linux API doesn't pass down the total size of the buffer
1282c59d87c4SChristoph Hellwig * we read into down to the filesystem. With the filldir concept
1283c59d87c4SChristoph Hellwig * it's not needed for correct information, but the XFS dir2 leaf
1284c59d87c4SChristoph Hellwig * code wants an estimate of the buffer size to calculate it's
1285c59d87c4SChristoph Hellwig * readahead window and size the buffers used for mapping to
1286c59d87c4SChristoph Hellwig * physical blocks.
1287c59d87c4SChristoph Hellwig *
1288c59d87c4SChristoph Hellwig * Try to give it an estimate that's good enough, maybe at some
1289c59d87c4SChristoph Hellwig * point we can change the ->readdir prototype to include the
1290c59d87c4SChristoph Hellwig * buffer size. For now we use the current glibc buffer size.
1291c59d87c4SChristoph Hellwig */
129213d2c10bSChristoph Hellwig bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_disk_size);
1293c59d87c4SChristoph Hellwig
1294acb9553cSDarrick J. Wong return xfs_readdir(NULL, ip, ctx, bufsize);
12953fe3e6b1SJeff Liu }
12963fe3e6b1SJeff Liu
12973fe3e6b1SJeff Liu STATIC loff_t
xfs_file_llseek(struct file * file,loff_t offset,int whence)12983fe3e6b1SJeff Liu xfs_file_llseek(
12993fe3e6b1SJeff Liu struct file *file,
13003fe3e6b1SJeff Liu loff_t offset,
130159f9c004SEric Sandeen int whence)
13023fe3e6b1SJeff Liu {
13039b2970aaSChristoph Hellwig struct inode *inode = file->f_mapping->host;
13049b2970aaSChristoph Hellwig
130575c8c50fSDave Chinner if (xfs_is_shutdown(XFS_I(inode)->i_mount))
13069b2970aaSChristoph Hellwig return -EIO;
13079b2970aaSChristoph Hellwig
130859f9c004SEric Sandeen switch (whence) {
13099b2970aaSChristoph Hellwig default:
131059f9c004SEric Sandeen return generic_file_llseek(file, offset, whence);
13113fe3e6b1SJeff Liu case SEEK_HOLE:
131260271ab7SChristoph Hellwig offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
13139b2970aaSChristoph Hellwig break;
131449c69591SEric Sandeen case SEEK_DATA:
131560271ab7SChristoph Hellwig offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
13169b2970aaSChristoph Hellwig break;
13173fe3e6b1SJeff Liu }
13189b2970aaSChristoph Hellwig
13199b2970aaSChristoph Hellwig if (offset < 0)
13209b2970aaSChristoph Hellwig return offset;
13219b2970aaSChristoph Hellwig return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
13223fe3e6b1SJeff Liu }
13233fe3e6b1SJeff Liu
1324ea6c49b7SShiyang Ruan #ifdef CONFIG_FS_DAX
132547ba8cc7SDarrick J. Wong static inline vm_fault_t
xfs_dax_fault(struct vm_fault * vmf,unsigned int order,bool write_fault,pfn_t * pfn)1326ea6c49b7SShiyang Ruan xfs_dax_fault(
1327ea6c49b7SShiyang Ruan struct vm_fault *vmf,
13281d024e7aSMatthew Wilcox (Oracle) unsigned int order,
1329ea6c49b7SShiyang Ruan bool write_fault,
1330ea6c49b7SShiyang Ruan pfn_t *pfn)
1331ea6c49b7SShiyang Ruan {
13321d024e7aSMatthew Wilcox (Oracle) return dax_iomap_fault(vmf, order, pfn, NULL,
1333ea6c49b7SShiyang Ruan (write_fault && !vmf->cow_page) ?
1334ea6c49b7SShiyang Ruan &xfs_dax_write_iomap_ops :
1335ea6c49b7SShiyang Ruan &xfs_read_iomap_ops);
1336ea6c49b7SShiyang Ruan }
1337ea6c49b7SShiyang Ruan #else
133847ba8cc7SDarrick J. Wong static inline vm_fault_t
xfs_dax_fault(struct vm_fault * vmf,unsigned int order,bool write_fault,pfn_t * pfn)1339ea6c49b7SShiyang Ruan xfs_dax_fault(
1340ea6c49b7SShiyang Ruan struct vm_fault *vmf,
13411d024e7aSMatthew Wilcox (Oracle) unsigned int order,
1342ea6c49b7SShiyang Ruan bool write_fault,
1343ea6c49b7SShiyang Ruan pfn_t *pfn)
1344ea6c49b7SShiyang Ruan {
134547ba8cc7SDarrick J. Wong ASSERT(0);
134647ba8cc7SDarrick J. Wong return VM_FAULT_SIGBUS;
1347ea6c49b7SShiyang Ruan }
1348ea6c49b7SShiyang Ruan #endif
1349ea6c49b7SShiyang Ruan
1350de0e8c20SDave Chinner /*
1351de0e8c20SDave Chinner * Locking for serialisation of IO during page faults. This results in a lock
1352de0e8c20SDave Chinner * ordering of:
1353de0e8c20SDave Chinner *
1354c1e8d7c6SMichel Lespinasse * mmap_lock (MM)
13556b698edeSDave Chinner * sb_start_pagefault(vfs, freeze)
13562433480aSJan Kara * invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
1357de0e8c20SDave Chinner * page_lock (MM)
1358de0e8c20SDave Chinner * i_lock (XFS - extent map serialisation)
1359de0e8c20SDave Chinner */
136005edd888SSouptick Joarder static vm_fault_t
__xfs_filemap_fault(struct vm_fault * vmf,unsigned int order,bool write_fault)1361d522d569SChristoph Hellwig __xfs_filemap_fault(
1362c791ace1SDave Jiang struct vm_fault *vmf,
13631d024e7aSMatthew Wilcox (Oracle) unsigned int order,
1364d522d569SChristoph Hellwig bool write_fault)
1365acd76e74SMatthew Wilcox {
1366f4200391SDave Jiang struct inode *inode = file_inode(vmf->vma->vm_file);
1367acd76e74SMatthew Wilcox struct xfs_inode *ip = XFS_I(inode);
136805edd888SSouptick Joarder vm_fault_t ret;
1369d7d84772SCatherine Hoang unsigned int lock_mode = 0;
1370acd76e74SMatthew Wilcox
13711d024e7aSMatthew Wilcox (Oracle) trace_xfs_filemap_fault(ip, order, write_fault);
1372acd76e74SMatthew Wilcox
1373d522d569SChristoph Hellwig if (write_fault) {
1374acd76e74SMatthew Wilcox sb_start_pagefault(inode->i_sb);
1375f4200391SDave Jiang file_update_time(vmf->vma->vm_file);
137613ad4fe3SDave Chinner }
137713ad4fe3SDave Chinner
1378d7d84772SCatherine Hoang if (IS_DAX(inode) || write_fault)
1379d7d84772SCatherine Hoang lock_mode = xfs_ilock_for_write_fault(XFS_I(inode));
1380d7d84772SCatherine Hoang
1381d522d569SChristoph Hellwig if (IS_DAX(inode)) {
1382a39e596bSChristoph Hellwig pfn_t pfn;
1383a39e596bSChristoph Hellwig
13841d024e7aSMatthew Wilcox (Oracle) ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
1385a39e596bSChristoph Hellwig if (ret & VM_FAULT_NEEDDSYNC)
13861d024e7aSMatthew Wilcox (Oracle) ret = dax_finish_sync_fault(vmf, order, pfn);
1387d7d84772SCatherine Hoang } else if (write_fault) {
1388d7d84772SCatherine Hoang ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
13892433480aSJan Kara } else {
1390d522d569SChristoph Hellwig ret = filemap_fault(vmf);
1391d522d569SChristoph Hellwig }
1392d7d84772SCatherine Hoang
1393d7d84772SCatherine Hoang if (lock_mode)
1394d7d84772SCatherine Hoang xfs_iunlock(XFS_I(inode), lock_mode);
139513ad4fe3SDave Chinner
1396d522d569SChristoph Hellwig if (write_fault)
1397acd76e74SMatthew Wilcox sb_end_pagefault(inode->i_sb);
1398acd76e74SMatthew Wilcox return ret;
1399acd76e74SMatthew Wilcox }
1400acd76e74SMatthew Wilcox
1401b17164e2SMikulas Patocka static inline bool
xfs_is_write_fault(struct vm_fault * vmf)1402b17164e2SMikulas Patocka xfs_is_write_fault(
1403b17164e2SMikulas Patocka struct vm_fault *vmf)
1404b17164e2SMikulas Patocka {
1405b17164e2SMikulas Patocka return (vmf->flags & FAULT_FLAG_WRITE) &&
1406b17164e2SMikulas Patocka (vmf->vma->vm_flags & VM_SHARED);
1407b17164e2SMikulas Patocka }
1408b17164e2SMikulas Patocka
140905edd888SSouptick Joarder static vm_fault_t
xfs_filemap_fault(struct vm_fault * vmf)1410d522d569SChristoph Hellwig xfs_filemap_fault(
1411d522d569SChristoph Hellwig struct vm_fault *vmf)
1412d522d569SChristoph Hellwig {
1413d522d569SChristoph Hellwig /* DAX can shortcut the normal fault path on write faults! */
14141d024e7aSMatthew Wilcox (Oracle) return __xfs_filemap_fault(vmf, 0,
1415d522d569SChristoph Hellwig IS_DAX(file_inode(vmf->vma->vm_file)) &&
1416b17164e2SMikulas Patocka xfs_is_write_fault(vmf));
1417d522d569SChristoph Hellwig }
1418d522d569SChristoph Hellwig
141905edd888SSouptick Joarder static vm_fault_t
xfs_filemap_huge_fault(struct vm_fault * vmf,unsigned int order)1420d522d569SChristoph Hellwig xfs_filemap_huge_fault(
1421d522d569SChristoph Hellwig struct vm_fault *vmf,
14221d024e7aSMatthew Wilcox (Oracle) unsigned int order)
1423d522d569SChristoph Hellwig {
1424d522d569SChristoph Hellwig if (!IS_DAX(file_inode(vmf->vma->vm_file)))
1425d522d569SChristoph Hellwig return VM_FAULT_FALLBACK;
1426d522d569SChristoph Hellwig
1427d522d569SChristoph Hellwig /* DAX can shortcut the normal fault path on write faults! */
14281d024e7aSMatthew Wilcox (Oracle) return __xfs_filemap_fault(vmf, order,
1429b17164e2SMikulas Patocka xfs_is_write_fault(vmf));
1430d522d569SChristoph Hellwig }
1431d522d569SChristoph Hellwig
143205edd888SSouptick Joarder static vm_fault_t
xfs_filemap_page_mkwrite(struct vm_fault * vmf)1433d522d569SChristoph Hellwig xfs_filemap_page_mkwrite(
1434d522d569SChristoph Hellwig struct vm_fault *vmf)
1435d522d569SChristoph Hellwig {
14361d024e7aSMatthew Wilcox (Oracle) return __xfs_filemap_fault(vmf, 0, true);
1437d522d569SChristoph Hellwig }
1438d522d569SChristoph Hellwig
14393af49285SDave Chinner /*
14407b565c9fSJan Kara * pfn_mkwrite was originally intended to ensure we capture time stamp updates
14417b565c9fSJan Kara * on write faults. In reality, it needs to serialise against truncate and
14427b565c9fSJan Kara * prepare memory for writing so handle is as standard write fault.
14433af49285SDave Chinner */
144405edd888SSouptick Joarder static vm_fault_t
xfs_filemap_pfn_mkwrite(struct vm_fault * vmf)14453af49285SDave Chinner xfs_filemap_pfn_mkwrite(
14463af49285SDave Chinner struct vm_fault *vmf)
14473af49285SDave Chinner {
14483af49285SDave Chinner
14491d024e7aSMatthew Wilcox (Oracle) return __xfs_filemap_fault(vmf, 0, true);
14503af49285SDave Chinner }
14513af49285SDave Chinner
14526b698edeSDave Chinner static const struct vm_operations_struct xfs_file_vm_ops = {
14536b698edeSDave Chinner .fault = xfs_filemap_fault,
1454a2d58167SDave Jiang .huge_fault = xfs_filemap_huge_fault,
1455945ea457SMatthew Wilcox (Oracle) .map_pages = filemap_map_pages,
14566b698edeSDave Chinner .page_mkwrite = xfs_filemap_page_mkwrite,
14573af49285SDave Chinner .pfn_mkwrite = xfs_filemap_pfn_mkwrite,
14586b698edeSDave Chinner };
14596b698edeSDave Chinner
14606b698edeSDave Chinner STATIC int
xfs_file_mmap(struct file * file,struct vm_area_struct * vma)14616b698edeSDave Chinner xfs_file_mmap(
146230fa529eSChristoph Hellwig struct file *file,
14636b698edeSDave Chinner struct vm_area_struct *vma)
14646b698edeSDave Chinner {
146530fa529eSChristoph Hellwig struct inode *inode = file_inode(file);
146630fa529eSChristoph Hellwig struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode));
1467b21fec41SPankaj Gupta
1468a39e596bSChristoph Hellwig /*
1469b21fec41SPankaj Gupta * We don't support synchronous mappings for non-DAX files and
1470b21fec41SPankaj Gupta * for DAX files if underneath dax_device is not synchronous.
1471a39e596bSChristoph Hellwig */
147230fa529eSChristoph Hellwig if (!daxdev_mapping_supported(vma, target->bt_daxdev))
1473a39e596bSChristoph Hellwig return -EOPNOTSUPP;
1474a39e596bSChristoph Hellwig
147530fa529eSChristoph Hellwig file_accessed(file);
14766b698edeSDave Chinner vma->vm_ops = &xfs_file_vm_ops;
147730fa529eSChristoph Hellwig if (IS_DAX(inode))
14781c71222eSSuren Baghdasaryan vm_flags_set(vma, VM_HUGEPAGE);
14796b698edeSDave Chinner return 0;
1480075a924dSDave Chinner }
1481075a924dSDave Chinner
1482c59d87c4SChristoph Hellwig const struct file_operations xfs_file_operations = {
14833fe3e6b1SJeff Liu .llseek = xfs_file_llseek,
1484b4f5d2c6SAl Viro .read_iter = xfs_file_read_iter,
1485bf97f3bcSAl Viro .write_iter = xfs_file_write_iter,
148654919f94SDavid Howells .splice_read = xfs_file_splice_read,
14878d020765SAl Viro .splice_write = iter_file_splice_write,
14883e08773cSChristoph Hellwig .iopoll = iocb_bio_iopoll,
1489c59d87c4SChristoph Hellwig .unlocked_ioctl = xfs_file_ioctl,
1490c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT
1491c59d87c4SChristoph Hellwig .compat_ioctl = xfs_file_compat_ioctl,
1492c59d87c4SChristoph Hellwig #endif
1493c59d87c4SChristoph Hellwig .mmap = xfs_file_mmap,
1494a39e596bSChristoph Hellwig .mmap_supported_flags = MAP_SYNC,
1495c59d87c4SChristoph Hellwig .open = xfs_file_open,
1496c59d87c4SChristoph Hellwig .release = xfs_file_release,
1497c59d87c4SChristoph Hellwig .fsync = xfs_file_fsync,
1498dbe6ec81SToshi Kani .get_unmapped_area = thp_get_unmapped_area,
1499c59d87c4SChristoph Hellwig .fallocate = xfs_file_fallocate,
150040144e49SJan Kara .fadvise = xfs_file_fadvise,
15012e5dfc99SDarrick J. Wong .remap_file_range = xfs_file_remap_range,
1502c59d87c4SChristoph Hellwig };
1503c59d87c4SChristoph Hellwig
1504c59d87c4SChristoph Hellwig const struct file_operations xfs_dir_file_operations = {
1505c59d87c4SChristoph Hellwig .open = xfs_dir_open,
1506c59d87c4SChristoph Hellwig .read = generic_read_dir,
15073b0a3c1aSAl Viro .iterate_shared = xfs_file_readdir,
1508c59d87c4SChristoph Hellwig .llseek = generic_file_llseek,
1509c59d87c4SChristoph Hellwig .unlocked_ioctl = xfs_file_ioctl,
1510c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT
1511c59d87c4SChristoph Hellwig .compat_ioctl = xfs_file_compat_ioctl,
1512c59d87c4SChristoph Hellwig #endif
15131da2f2dbSChristoph Hellwig .fsync = xfs_dir_fsync,
1514c59d87c4SChristoph Hellwig };
1515