10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 2c59d87c4SChristoph Hellwig /* 3c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4c59d87c4SChristoph Hellwig * All Rights Reserved. 5c59d87c4SChristoph Hellwig */ 6c59d87c4SChristoph Hellwig #include "xfs.h" 7c59d87c4SChristoph Hellwig #include "xfs_fs.h" 870a9883cSDave Chinner #include "xfs_shared.h" 9a4fbe6abSDave Chinner #include "xfs_format.h" 10239880efSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 12c59d87c4SChristoph Hellwig #include "xfs_mount.h" 13c59d87c4SChristoph Hellwig #include "xfs_inode.h" 14239880efSDave Chinner #include "xfs_trans.h" 15c59d87c4SChristoph Hellwig #include "xfs_inode_item.h" 16c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 17c24b5dfaSDave Chinner #include "xfs_bmap_util.h" 182b9ab5abSDave Chinner #include "xfs_dir2.h" 19c24b5dfaSDave Chinner #include "xfs_dir2_priv.h" 20c59d87c4SChristoph Hellwig #include "xfs_ioctl.h" 21c59d87c4SChristoph Hellwig #include "xfs_trace.h" 22239880efSDave Chinner #include "xfs_log.h" 23dc06f398SBrian Foster #include "xfs_icache.h" 24781355c6SChristoph Hellwig #include "xfs_pnfs.h" 2568a9f5e7SChristoph Hellwig #include "xfs_iomap.h" 260613f16cSDarrick J. Wong #include "xfs_reflink.h" 27c59d87c4SChristoph Hellwig 28c59d87c4SChristoph Hellwig #include <linux/falloc.h> 2966114cadSTejun Heo #include <linux/backing-dev.h> 30a39e596bSChristoph Hellwig #include <linux/mman.h> 3140144e49SJan Kara #include <linux/fadvise.h> 32c59d87c4SChristoph Hellwig 33c59d87c4SChristoph Hellwig static const struct vm_operations_struct xfs_file_vm_ops; 34c59d87c4SChristoph Hellwig 358add71caSChristoph Hellwig int 368add71caSChristoph Hellwig xfs_update_prealloc_flags( 378add71caSChristoph Hellwig struct xfs_inode *ip, 388add71caSChristoph Hellwig enum xfs_prealloc_flags flags) 398add71caSChristoph Hellwig { 408add71caSChristoph Hellwig struct xfs_trans *tp; 418add71caSChristoph Hellwig int error; 428add71caSChristoph Hellwig 43253f4911SChristoph Hellwig error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, 44253f4911SChristoph Hellwig 0, 0, 0, &tp); 45253f4911SChristoph Hellwig if (error) 468add71caSChristoph Hellwig return error; 478add71caSChristoph Hellwig 488add71caSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 498add71caSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 508add71caSChristoph Hellwig 518add71caSChristoph Hellwig if (!(flags & XFS_PREALLOC_INVISIBLE)) { 52c19b3b05SDave Chinner VFS_I(ip)->i_mode &= ~S_ISUID; 53c19b3b05SDave Chinner if (VFS_I(ip)->i_mode & S_IXGRP) 54c19b3b05SDave Chinner VFS_I(ip)->i_mode &= ~S_ISGID; 558add71caSChristoph Hellwig xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 568add71caSChristoph Hellwig } 578add71caSChristoph Hellwig 588add71caSChristoph Hellwig if (flags & XFS_PREALLOC_SET) 598add71caSChristoph Hellwig ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 608add71caSChristoph Hellwig if (flags & XFS_PREALLOC_CLEAR) 618add71caSChristoph Hellwig ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 628add71caSChristoph Hellwig 638add71caSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 648add71caSChristoph Hellwig if (flags & XFS_PREALLOC_SYNC) 658add71caSChristoph Hellwig xfs_trans_set_sync(tp); 6670393313SChristoph Hellwig return xfs_trans_commit(tp); 678add71caSChristoph Hellwig } 688add71caSChristoph Hellwig 691da2f2dbSChristoph Hellwig /* 701da2f2dbSChristoph Hellwig * Fsync operations on directories are much simpler than on regular files, 711da2f2dbSChristoph Hellwig * as there is no file data to flush, and thus also no need for explicit 721da2f2dbSChristoph Hellwig * cache flush operations, and there are no non-transaction metadata updates 731da2f2dbSChristoph Hellwig * on directories either. 741da2f2dbSChristoph Hellwig */ 751da2f2dbSChristoph Hellwig STATIC int 761da2f2dbSChristoph Hellwig xfs_dir_fsync( 771da2f2dbSChristoph Hellwig struct file *file, 781da2f2dbSChristoph Hellwig loff_t start, 791da2f2dbSChristoph Hellwig loff_t end, 801da2f2dbSChristoph Hellwig int datasync) 811da2f2dbSChristoph Hellwig { 821da2f2dbSChristoph Hellwig struct xfs_inode *ip = XFS_I(file->f_mapping->host); 831da2f2dbSChristoph Hellwig 841da2f2dbSChristoph Hellwig trace_xfs_dir_fsync(ip); 8554fbdd10SChristoph Hellwig return xfs_log_force_inode(ip); 861da2f2dbSChristoph Hellwig } 871da2f2dbSChristoph Hellwig 88c59d87c4SChristoph Hellwig STATIC int 89c59d87c4SChristoph Hellwig xfs_file_fsync( 90c59d87c4SChristoph Hellwig struct file *file, 91c59d87c4SChristoph Hellwig loff_t start, 92c59d87c4SChristoph Hellwig loff_t end, 93c59d87c4SChristoph Hellwig int datasync) 94c59d87c4SChristoph Hellwig { 95c59d87c4SChristoph Hellwig struct inode *inode = file->f_mapping->host; 96c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 97c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 98c59d87c4SChristoph Hellwig int error = 0; 99c59d87c4SChristoph Hellwig int log_flushed = 0; 100b1037058SChristoph Hellwig xfs_lsn_t lsn = 0; 101c59d87c4SChristoph Hellwig 102c59d87c4SChristoph Hellwig trace_xfs_file_fsync(ip); 103c59d87c4SChristoph Hellwig 1041b180274SJeff Layton error = file_write_and_wait_range(file, start, end); 105c59d87c4SChristoph Hellwig if (error) 106c59d87c4SChristoph Hellwig return error; 107c59d87c4SChristoph Hellwig 108c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 109b474c7aeSEric Sandeen return -EIO; 110c59d87c4SChristoph Hellwig 111c59d87c4SChristoph Hellwig xfs_iflags_clear(ip, XFS_ITRUNCATED); 112c59d87c4SChristoph Hellwig 113c59d87c4SChristoph Hellwig /* 1142291dab2SDave Chinner * If we have an RT and/or log subvolume we need to make sure to flush 1152291dab2SDave Chinner * the write cache the device used for file data first. This is to 1162291dab2SDave Chinner * ensure newly written file data make it to disk before logging the new 1172291dab2SDave Chinner * inode size in case of an extending write. 118c59d87c4SChristoph Hellwig */ 119c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 120c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_rtdev_targp); 121c59d87c4SChristoph Hellwig else if (mp->m_logdev_targp != mp->m_ddev_targp) 122c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_ddev_targp); 123c59d87c4SChristoph Hellwig 124c59d87c4SChristoph Hellwig /* 125fc0561ceSDave Chinner * All metadata updates are logged, which means that we just have to 126fc0561ceSDave Chinner * flush the log up to the latest LSN that touched the inode. If we have 127fc0561ceSDave Chinner * concurrent fsync/fdatasync() calls, we need them to all block on the 128fc0561ceSDave Chinner * log force before we clear the ili_fsync_fields field. This ensures 129fc0561ceSDave Chinner * that we don't get a racing sync operation that does not wait for the 130fc0561ceSDave Chinner * metadata to hit the journal before returning. If we race with 131fc0561ceSDave Chinner * clearing the ili_fsync_fields, then all that will happen is the log 132fc0561ceSDave Chinner * force will do nothing as the lsn will already be on disk. We can't 133fc0561ceSDave Chinner * race with setting ili_fsync_fields because that is done under 134fc0561ceSDave Chinner * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared 135fc0561ceSDave Chinner * until after the ili_fsync_fields is cleared. 136c59d87c4SChristoph Hellwig */ 137c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 1388f639ddeSChristoph Hellwig if (xfs_ipincount(ip)) { 1398f639ddeSChristoph Hellwig if (!datasync || 140fc0561ceSDave Chinner (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 141b1037058SChristoph Hellwig lsn = ip->i_itemp->ili_last_lsn; 1428f639ddeSChristoph Hellwig } 143c59d87c4SChristoph Hellwig 144fc0561ceSDave Chinner if (lsn) { 145656de4ffSChristoph Hellwig error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 146fc0561ceSDave Chinner ip->i_itemp->ili_fsync_fields = 0; 147fc0561ceSDave Chinner } 148fc0561ceSDave Chinner xfs_iunlock(ip, XFS_ILOCK_SHARED); 149b1037058SChristoph Hellwig 150c59d87c4SChristoph Hellwig /* 151c59d87c4SChristoph Hellwig * If we only have a single device, and the log force about was 152c59d87c4SChristoph Hellwig * a no-op we might have to flush the data device cache here. 153c59d87c4SChristoph Hellwig * This can only happen for fdatasync/O_DSYNC if we were overwriting 154c59d87c4SChristoph Hellwig * an already allocated file and thus do not have any metadata to 155c59d87c4SChristoph Hellwig * commit. 156c59d87c4SChristoph Hellwig */ 1572291dab2SDave Chinner if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && 1582291dab2SDave Chinner mp->m_logdev_targp == mp->m_ddev_targp) 159c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_ddev_targp); 160c59d87c4SChristoph Hellwig 1612451337dSDave Chinner return error; 162c59d87c4SChristoph Hellwig } 163c59d87c4SChristoph Hellwig 164c59d87c4SChristoph Hellwig STATIC ssize_t 165bbc5a740SChristoph Hellwig xfs_file_dio_aio_read( 166c59d87c4SChristoph Hellwig struct kiocb *iocb, 167b4f5d2c6SAl Viro struct iov_iter *to) 168c59d87c4SChristoph Hellwig { 169acdda3aaSChristoph Hellwig struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 170bbc5a740SChristoph Hellwig size_t count = iov_iter_count(to); 171acdda3aaSChristoph Hellwig ssize_t ret; 172c59d87c4SChristoph Hellwig 173bbc5a740SChristoph Hellwig trace_xfs_file_direct_read(ip, count, iocb->ki_pos); 174c59d87c4SChristoph Hellwig 175f1285ff0SChristoph Hellwig if (!count) 176f1285ff0SChristoph Hellwig return 0; /* skip atime */ 177c59d87c4SChristoph Hellwig 178a447d7cdSChristoph Hellwig file_accessed(iocb->ki_filp); 179a447d7cdSChristoph Hellwig 1807b53b868SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) { 1817b53b868SChristoph Hellwig if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 1827b53b868SChristoph Hellwig return -EAGAIN; 1837b53b868SChristoph Hellwig } else { 18465523218SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 1857b53b868SChristoph Hellwig } 186690c2a38SChristoph Hellwig ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 187690c2a38SChristoph Hellwig is_sync_kiocb(iocb)); 18865523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 189acdda3aaSChristoph Hellwig 19016d4d435SChristoph Hellwig return ret; 19116d4d435SChristoph Hellwig } 19216d4d435SChristoph Hellwig 193f021bd07SArnd Bergmann static noinline ssize_t 19416d4d435SChristoph Hellwig xfs_file_dax_read( 19516d4d435SChristoph Hellwig struct kiocb *iocb, 19616d4d435SChristoph Hellwig struct iov_iter *to) 19716d4d435SChristoph Hellwig { 1986c31f495SChristoph Hellwig struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); 19916d4d435SChristoph Hellwig size_t count = iov_iter_count(to); 20016d4d435SChristoph Hellwig ssize_t ret = 0; 20116d4d435SChristoph Hellwig 20216d4d435SChristoph Hellwig trace_xfs_file_dax_read(ip, count, iocb->ki_pos); 20316d4d435SChristoph Hellwig 20416d4d435SChristoph Hellwig if (!count) 20516d4d435SChristoph Hellwig return 0; /* skip atime */ 20616d4d435SChristoph Hellwig 207942491c9SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) { 208942491c9SChristoph Hellwig if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 20929a5d29eSGoldwyn Rodrigues return -EAGAIN; 210942491c9SChristoph Hellwig } else { 21165523218SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 21229a5d29eSGoldwyn Rodrigues } 213942491c9SChristoph Hellwig 214690c2a38SChristoph Hellwig ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops); 21565523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 216bbc5a740SChristoph Hellwig 217f1285ff0SChristoph Hellwig file_accessed(iocb->ki_filp); 218bbc5a740SChristoph Hellwig return ret; 219bbc5a740SChristoph Hellwig } 220bbc5a740SChristoph Hellwig 221bbc5a740SChristoph Hellwig STATIC ssize_t 222bbc5a740SChristoph Hellwig xfs_file_buffered_aio_read( 223bbc5a740SChristoph Hellwig struct kiocb *iocb, 224bbc5a740SChristoph Hellwig struct iov_iter *to) 225bbc5a740SChristoph Hellwig { 226bbc5a740SChristoph Hellwig struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 227bbc5a740SChristoph Hellwig ssize_t ret; 228bbc5a740SChristoph Hellwig 229bbc5a740SChristoph Hellwig trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); 230bbc5a740SChristoph Hellwig 231942491c9SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) { 232942491c9SChristoph Hellwig if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 23391f9943eSChristoph Hellwig return -EAGAIN; 234942491c9SChristoph Hellwig } else { 23565523218SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 23691f9943eSChristoph Hellwig } 237b4f5d2c6SAl Viro ret = generic_file_read_iter(iocb, to); 23865523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 239bbc5a740SChristoph Hellwig 240bbc5a740SChristoph Hellwig return ret; 241bbc5a740SChristoph Hellwig } 242bbc5a740SChristoph Hellwig 243bbc5a740SChristoph Hellwig STATIC ssize_t 244bbc5a740SChristoph Hellwig xfs_file_read_iter( 245bbc5a740SChristoph Hellwig struct kiocb *iocb, 246bbc5a740SChristoph Hellwig struct iov_iter *to) 247bbc5a740SChristoph Hellwig { 24816d4d435SChristoph Hellwig struct inode *inode = file_inode(iocb->ki_filp); 24916d4d435SChristoph Hellwig struct xfs_mount *mp = XFS_I(inode)->i_mount; 250bbc5a740SChristoph Hellwig ssize_t ret = 0; 251bbc5a740SChristoph Hellwig 252bbc5a740SChristoph Hellwig XFS_STATS_INC(mp, xs_read_calls); 253bbc5a740SChristoph Hellwig 254bbc5a740SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 255bbc5a740SChristoph Hellwig return -EIO; 256bbc5a740SChristoph Hellwig 25716d4d435SChristoph Hellwig if (IS_DAX(inode)) 25816d4d435SChristoph Hellwig ret = xfs_file_dax_read(iocb, to); 25916d4d435SChristoph Hellwig else if (iocb->ki_flags & IOCB_DIRECT) 260bbc5a740SChristoph Hellwig ret = xfs_file_dio_aio_read(iocb, to); 261bbc5a740SChristoph Hellwig else 262bbc5a740SChristoph Hellwig ret = xfs_file_buffered_aio_read(iocb, to); 263bbc5a740SChristoph Hellwig 264c59d87c4SChristoph Hellwig if (ret > 0) 265ff6d6af2SBill O'Donnell XFS_STATS_ADD(mp, xs_read_bytes, ret); 266c59d87c4SChristoph Hellwig return ret; 267c59d87c4SChristoph Hellwig } 268c59d87c4SChristoph Hellwig 269c59d87c4SChristoph Hellwig /* 270c59d87c4SChristoph Hellwig * Common pre-write limit and setup checks. 271c59d87c4SChristoph Hellwig * 2725bf1f262SChristoph Hellwig * Called with the iolocked held either shared and exclusive according to 2735bf1f262SChristoph Hellwig * @iolock, and returns with it held. Might upgrade the iolock to exclusive 2745bf1f262SChristoph Hellwig * if called for a direct write beyond i_size. 275c59d87c4SChristoph Hellwig */ 276c59d87c4SChristoph Hellwig STATIC ssize_t 277c59d87c4SChristoph Hellwig xfs_file_aio_write_checks( 27899733fa3SAl Viro struct kiocb *iocb, 27999733fa3SAl Viro struct iov_iter *from, 280c59d87c4SChristoph Hellwig int *iolock) 281c59d87c4SChristoph Hellwig { 28299733fa3SAl Viro struct file *file = iocb->ki_filp; 283c59d87c4SChristoph Hellwig struct inode *inode = file->f_mapping->host; 284c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 2853309dd04SAl Viro ssize_t error = 0; 28699733fa3SAl Viro size_t count = iov_iter_count(from); 2873136e8bbSBrian Foster bool drained_dio = false; 288f5c54717SChristoph Hellwig loff_t isize; 289c59d87c4SChristoph Hellwig 2907271d243SDave Chinner restart: 2913309dd04SAl Viro error = generic_write_checks(iocb, from); 2923309dd04SAl Viro if (error <= 0) 293c59d87c4SChristoph Hellwig return error; 294c59d87c4SChristoph Hellwig 29569eb5fa1SDan Williams error = xfs_break_layouts(inode, iolock, BREAK_WRITE); 296781355c6SChristoph Hellwig if (error) 297781355c6SChristoph Hellwig return error; 298781355c6SChristoph Hellwig 29965523218SChristoph Hellwig /* 30065523218SChristoph Hellwig * For changing security info in file_remove_privs() we need i_rwsem 30165523218SChristoph Hellwig * exclusively. 30265523218SChristoph Hellwig */ 303a6de82caSJan Kara if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { 30465523218SChristoph Hellwig xfs_iunlock(ip, *iolock); 305a6de82caSJan Kara *iolock = XFS_IOLOCK_EXCL; 30665523218SChristoph Hellwig xfs_ilock(ip, *iolock); 307a6de82caSJan Kara goto restart; 308a6de82caSJan Kara } 309c59d87c4SChristoph Hellwig /* 310c59d87c4SChristoph Hellwig * If the offset is beyond the size of the file, we need to zero any 311c59d87c4SChristoph Hellwig * blocks that fall between the existing EOF and the start of this 3122813d682SChristoph Hellwig * write. If zeroing is needed and we are currently holding the 313467f7899SChristoph Hellwig * iolock shared, we need to update it to exclusive which implies 314467f7899SChristoph Hellwig * having to redo all checks before. 315b9d59846SDave Chinner * 316b9d59846SDave Chinner * We need to serialise against EOF updates that occur in IO 317b9d59846SDave Chinner * completions here. We want to make sure that nobody is changing the 318b9d59846SDave Chinner * size while we do this check until we have placed an IO barrier (i.e. 319b9d59846SDave Chinner * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. 320b9d59846SDave Chinner * The spinlock effectively forms a memory barrier once we have the 321b9d59846SDave Chinner * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value 322b9d59846SDave Chinner * and hence be able to correctly determine if we need to run zeroing. 323c59d87c4SChristoph Hellwig */ 324b9d59846SDave Chinner spin_lock(&ip->i_flags_lock); 325f5c54717SChristoph Hellwig isize = i_size_read(inode); 326f5c54717SChristoph Hellwig if (iocb->ki_pos > isize) { 327b9d59846SDave Chinner spin_unlock(&ip->i_flags_lock); 3283136e8bbSBrian Foster if (!drained_dio) { 3297271d243SDave Chinner if (*iolock == XFS_IOLOCK_SHARED) { 33065523218SChristoph Hellwig xfs_iunlock(ip, *iolock); 3317271d243SDave Chinner *iolock = XFS_IOLOCK_EXCL; 33265523218SChristoph Hellwig xfs_ilock(ip, *iolock); 3333309dd04SAl Viro iov_iter_reexpand(from, count); 3343136e8bbSBrian Foster } 33540c63fbcSDave Chinner /* 33640c63fbcSDave Chinner * We now have an IO submission barrier in place, but 33740c63fbcSDave Chinner * AIO can do EOF updates during IO completion and hence 33840c63fbcSDave Chinner * we now need to wait for all of them to drain. Non-AIO 33940c63fbcSDave Chinner * DIO will have drained before we are given the 34040c63fbcSDave Chinner * XFS_IOLOCK_EXCL, and so for most cases this wait is a 34140c63fbcSDave Chinner * no-op. 34240c63fbcSDave Chinner */ 34340c63fbcSDave Chinner inode_dio_wait(inode); 3443136e8bbSBrian Foster drained_dio = true; 3457271d243SDave Chinner goto restart; 3467271d243SDave Chinner } 347f5c54717SChristoph Hellwig 348f5c54717SChristoph Hellwig trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); 349f5c54717SChristoph Hellwig error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, 350f150b423SChristoph Hellwig NULL, &xfs_buffered_write_iomap_ops); 351c59d87c4SChristoph Hellwig if (error) 352c59d87c4SChristoph Hellwig return error; 353b9d59846SDave Chinner } else 354b9d59846SDave Chinner spin_unlock(&ip->i_flags_lock); 355c59d87c4SChristoph Hellwig 356c59d87c4SChristoph Hellwig /* 3578a9c9980SChristoph Hellwig * Updating the timestamps will grab the ilock again from 3588a9c9980SChristoph Hellwig * xfs_fs_dirty_inode, so we have to call it after dropping the 3598a9c9980SChristoph Hellwig * lock above. Eventually we should look into a way to avoid 3608a9c9980SChristoph Hellwig * the pointless lock roundtrip. 3618a9c9980SChristoph Hellwig */ 3628c3f406cSAmir Goldstein return file_modified(file); 363c59d87c4SChristoph Hellwig } 364c59d87c4SChristoph Hellwig 365acdda3aaSChristoph Hellwig static int 366acdda3aaSChristoph Hellwig xfs_dio_write_end_io( 367acdda3aaSChristoph Hellwig struct kiocb *iocb, 368acdda3aaSChristoph Hellwig ssize_t size, 3696fe7b990SMatthew Bobrowski int error, 370acdda3aaSChristoph Hellwig unsigned flags) 371acdda3aaSChristoph Hellwig { 372acdda3aaSChristoph Hellwig struct inode *inode = file_inode(iocb->ki_filp); 373acdda3aaSChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 374acdda3aaSChristoph Hellwig loff_t offset = iocb->ki_pos; 37573d30d48SChristoph Hellwig unsigned int nofs_flag; 376acdda3aaSChristoph Hellwig 377acdda3aaSChristoph Hellwig trace_xfs_end_io_direct_write(ip, offset, size); 378acdda3aaSChristoph Hellwig 379acdda3aaSChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 380acdda3aaSChristoph Hellwig return -EIO; 381acdda3aaSChristoph Hellwig 3826fe7b990SMatthew Bobrowski if (error) 3836fe7b990SMatthew Bobrowski return error; 3846fe7b990SMatthew Bobrowski if (!size) 3856fe7b990SMatthew Bobrowski return 0; 386acdda3aaSChristoph Hellwig 387ed5c3e66SDave Chinner /* 388ed5c3e66SDave Chinner * Capture amount written on completion as we can't reliably account 389ed5c3e66SDave Chinner * for it on submission. 390ed5c3e66SDave Chinner */ 391ed5c3e66SDave Chinner XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); 392ed5c3e66SDave Chinner 39373d30d48SChristoph Hellwig /* 39473d30d48SChristoph Hellwig * We can allocate memory here while doing writeback on behalf of 39573d30d48SChristoph Hellwig * memory reclaim. To avoid memory allocation deadlocks set the 39673d30d48SChristoph Hellwig * task-wide nofs context for the following operations. 39773d30d48SChristoph Hellwig */ 39873d30d48SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 39973d30d48SChristoph Hellwig 400ee70daabSEryu Guan if (flags & IOMAP_DIO_COW) { 401ee70daabSEryu Guan error = xfs_reflink_end_cow(ip, offset, size); 402ee70daabSEryu Guan if (error) 40373d30d48SChristoph Hellwig goto out; 404ee70daabSEryu Guan } 405ee70daabSEryu Guan 406ee70daabSEryu Guan /* 407ee70daabSEryu Guan * Unwritten conversion updates the in-core isize after extent 408ee70daabSEryu Guan * conversion but before updating the on-disk size. Updating isize any 409ee70daabSEryu Guan * earlier allows a racing dio read to find unwritten extents before 410ee70daabSEryu Guan * they are converted. 411ee70daabSEryu Guan */ 41273d30d48SChristoph Hellwig if (flags & IOMAP_DIO_UNWRITTEN) { 41373d30d48SChristoph Hellwig error = xfs_iomap_write_unwritten(ip, offset, size, true); 41473d30d48SChristoph Hellwig goto out; 41573d30d48SChristoph Hellwig } 416ee70daabSEryu Guan 417acdda3aaSChristoph Hellwig /* 418acdda3aaSChristoph Hellwig * We need to update the in-core inode size here so that we don't end up 419acdda3aaSChristoph Hellwig * with the on-disk inode size being outside the in-core inode size. We 420acdda3aaSChristoph Hellwig * have no other method of updating EOF for AIO, so always do it here 421acdda3aaSChristoph Hellwig * if necessary. 422acdda3aaSChristoph Hellwig * 423acdda3aaSChristoph Hellwig * We need to lock the test/set EOF update as we can be racing with 424acdda3aaSChristoph Hellwig * other IO completions here to update the EOF. Failing to serialise 425acdda3aaSChristoph Hellwig * here can result in EOF moving backwards and Bad Things Happen when 426acdda3aaSChristoph Hellwig * that occurs. 427acdda3aaSChristoph Hellwig */ 428acdda3aaSChristoph Hellwig spin_lock(&ip->i_flags_lock); 429acdda3aaSChristoph Hellwig if (offset + size > i_size_read(inode)) { 430acdda3aaSChristoph Hellwig i_size_write(inode, offset + size); 431acdda3aaSChristoph Hellwig spin_unlock(&ip->i_flags_lock); 432acdda3aaSChristoph Hellwig error = xfs_setfilesize(ip, offset, size); 433ee70daabSEryu Guan } else { 434ee70daabSEryu Guan spin_unlock(&ip->i_flags_lock); 435ee70daabSEryu Guan } 436acdda3aaSChristoph Hellwig 43773d30d48SChristoph Hellwig out: 43873d30d48SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 439acdda3aaSChristoph Hellwig return error; 440acdda3aaSChristoph Hellwig } 441acdda3aaSChristoph Hellwig 442838c4f3dSChristoph Hellwig static const struct iomap_dio_ops xfs_dio_write_ops = { 443838c4f3dSChristoph Hellwig .end_io = xfs_dio_write_end_io, 444838c4f3dSChristoph Hellwig }; 445838c4f3dSChristoph Hellwig 446c59d87c4SChristoph Hellwig /* 447c59d87c4SChristoph Hellwig * xfs_file_dio_aio_write - handle direct IO writes 448c59d87c4SChristoph Hellwig * 449c59d87c4SChristoph Hellwig * Lock the inode appropriately to prepare for and issue a direct IO write. 450c59d87c4SChristoph Hellwig * By separating it from the buffered write path we remove all the tricky to 451c59d87c4SChristoph Hellwig * follow locking changes and looping. 452c59d87c4SChristoph Hellwig * 453c59d87c4SChristoph Hellwig * If there are cached pages or we're extending the file, we need IOLOCK_EXCL 454c59d87c4SChristoph Hellwig * until we're sure the bytes at the new EOF have been zeroed and/or the cached 455c59d87c4SChristoph Hellwig * pages are flushed out. 456c59d87c4SChristoph Hellwig * 457c59d87c4SChristoph Hellwig * In most cases the direct IO writes will be done holding IOLOCK_SHARED 458c59d87c4SChristoph Hellwig * allowing them to be done in parallel with reads and other direct IO writes. 459c59d87c4SChristoph Hellwig * However, if the IO is not aligned to filesystem blocks, the direct IO layer 460c59d87c4SChristoph Hellwig * needs to do sub-block zeroing and that requires serialisation against other 461c59d87c4SChristoph Hellwig * direct IOs to the same block. In this case we need to serialise the 462c59d87c4SChristoph Hellwig * submission of the unaligned IOs so that we don't get racing block zeroing in 463c59d87c4SChristoph Hellwig * the dio layer. To avoid the problem with aio, we also need to wait for 464c59d87c4SChristoph Hellwig * outstanding IOs to complete so that unwritten extent conversion is completed 465c59d87c4SChristoph Hellwig * before we try to map the overlapping block. This is currently implemented by 4664a06fd26SChristoph Hellwig * hitting it with a big hammer (i.e. inode_dio_wait()). 467c59d87c4SChristoph Hellwig * 468c59d87c4SChristoph Hellwig * Returns with locks held indicated by @iolock and errors indicated by 469c59d87c4SChristoph Hellwig * negative return values. 470c59d87c4SChristoph Hellwig */ 471c59d87c4SChristoph Hellwig STATIC ssize_t 472c59d87c4SChristoph Hellwig xfs_file_dio_aio_write( 473c59d87c4SChristoph Hellwig struct kiocb *iocb, 474b3188919SAl Viro struct iov_iter *from) 475c59d87c4SChristoph Hellwig { 476c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 477c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 478c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 479c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 480c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 481c59d87c4SChristoph Hellwig ssize_t ret = 0; 482c59d87c4SChristoph Hellwig int unaligned_io = 0; 483d0606464SChristoph Hellwig int iolock; 484b3188919SAl Viro size_t count = iov_iter_count(from); 485f9acc19cSChristoph Hellwig struct xfs_buftarg *target = xfs_inode_buftarg(ip); 486c59d87c4SChristoph Hellwig 4877c71ee78SEric Sandeen /* DIO must be aligned to device logical sector size */ 48816d4d435SChristoph Hellwig if ((iocb->ki_pos | count) & target->bt_logical_sectormask) 489b474c7aeSEric Sandeen return -EINVAL; 490c59d87c4SChristoph Hellwig 4910ee7a3f6SChristoph Hellwig /* 4920ee7a3f6SChristoph Hellwig * Don't take the exclusive iolock here unless the I/O is unaligned to 4930ee7a3f6SChristoph Hellwig * the file system block size. We don't need to consider the EOF 4940ee7a3f6SChristoph Hellwig * extension case here because xfs_file_aio_write_checks() will relock 4950ee7a3f6SChristoph Hellwig * the inode as necessary for EOF zeroing cases and fill out the new 4960ee7a3f6SChristoph Hellwig * inode size as appropriate. 4970ee7a3f6SChristoph Hellwig */ 49813712713SChristoph Hellwig if ((iocb->ki_pos & mp->m_blockmask) || 4990ee7a3f6SChristoph Hellwig ((iocb->ki_pos + count) & mp->m_blockmask)) { 500c59d87c4SChristoph Hellwig unaligned_io = 1; 50154a4ef8aSChristoph Hellwig 50254a4ef8aSChristoph Hellwig /* 50354a4ef8aSChristoph Hellwig * We can't properly handle unaligned direct I/O to reflink 50454a4ef8aSChristoph Hellwig * files yet, as we can't unshare a partial block. 50554a4ef8aSChristoph Hellwig */ 50666ae56a5SChristoph Hellwig if (xfs_is_cow_inode(ip)) { 50754a4ef8aSChristoph Hellwig trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); 50854a4ef8aSChristoph Hellwig return -EREMCHG; 50954a4ef8aSChristoph Hellwig } 510d0606464SChristoph Hellwig iolock = XFS_IOLOCK_EXCL; 5110ee7a3f6SChristoph Hellwig } else { 512d0606464SChristoph Hellwig iolock = XFS_IOLOCK_SHARED; 513c58cb165SChristoph Hellwig } 514c59d87c4SChristoph Hellwig 515942491c9SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) { 5161fdeaea4SDarrick J. Wong /* unaligned dio always waits, bail */ 5171fdeaea4SDarrick J. Wong if (unaligned_io) 5181fdeaea4SDarrick J. Wong return -EAGAIN; 519942491c9SChristoph Hellwig if (!xfs_ilock_nowait(ip, iolock)) 52029a5d29eSGoldwyn Rodrigues return -EAGAIN; 521942491c9SChristoph Hellwig } else { 52265523218SChristoph Hellwig xfs_ilock(ip, iolock); 52329a5d29eSGoldwyn Rodrigues } 5240ee7a3f6SChristoph Hellwig 52599733fa3SAl Viro ret = xfs_file_aio_write_checks(iocb, from, &iolock); 526c59d87c4SChristoph Hellwig if (ret) 527d0606464SChristoph Hellwig goto out; 52899733fa3SAl Viro count = iov_iter_count(from); 529c59d87c4SChristoph Hellwig 530c59d87c4SChristoph Hellwig /* 5312032a8a2SBrian Foster * If we are doing unaligned IO, we can't allow any other overlapping IO 5322032a8a2SBrian Foster * in-flight at the same time or we risk data corruption. Wait for all 5332032a8a2SBrian Foster * other IO to drain before we submit. If the IO is aligned, demote the 5342032a8a2SBrian Foster * iolock if we had to take the exclusive lock in 5352032a8a2SBrian Foster * xfs_file_aio_write_checks() for other reasons. 536c59d87c4SChristoph Hellwig */ 53729a5d29eSGoldwyn Rodrigues if (unaligned_io) { 5384a06fd26SChristoph Hellwig inode_dio_wait(inode); 53929a5d29eSGoldwyn Rodrigues } else if (iolock == XFS_IOLOCK_EXCL) { 54065523218SChristoph Hellwig xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 541d0606464SChristoph Hellwig iolock = XFS_IOLOCK_SHARED; 542c59d87c4SChristoph Hellwig } 543c59d87c4SChristoph Hellwig 5443176c3e0SChristoph Hellwig trace_xfs_file_direct_write(ip, count, iocb->ki_pos); 5452032a8a2SBrian Foster /* 546906753beSJan Kara * If unaligned, this is the only IO in-flight. Wait on it before we 547906753beSJan Kara * release the iolock to prevent subsequent overlapping IO. 5482032a8a2SBrian Foster */ 549f150b423SChristoph Hellwig ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, 550f150b423SChristoph Hellwig &xfs_dio_write_ops, 551906753beSJan Kara is_sync_kiocb(iocb) || unaligned_io); 552d0606464SChristoph Hellwig out: 55365523218SChristoph Hellwig xfs_iunlock(ip, iolock); 554d0606464SChristoph Hellwig 5556b698edeSDave Chinner /* 55616d4d435SChristoph Hellwig * No fallback to buffered IO on errors for XFS, direct IO will either 55716d4d435SChristoph Hellwig * complete fully or fail. 5586b698edeSDave Chinner */ 55916d4d435SChristoph Hellwig ASSERT(ret < 0 || ret == count); 56016d4d435SChristoph Hellwig return ret; 56116d4d435SChristoph Hellwig } 56216d4d435SChristoph Hellwig 563f021bd07SArnd Bergmann static noinline ssize_t 56416d4d435SChristoph Hellwig xfs_file_dax_write( 56516d4d435SChristoph Hellwig struct kiocb *iocb, 56616d4d435SChristoph Hellwig struct iov_iter *from) 56716d4d435SChristoph Hellwig { 5686c31f495SChristoph Hellwig struct inode *inode = iocb->ki_filp->f_mapping->host; 56916d4d435SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 57017879e8fSChristoph Hellwig int iolock = XFS_IOLOCK_EXCL; 5716c31f495SChristoph Hellwig ssize_t ret, error = 0; 5726c31f495SChristoph Hellwig size_t count; 5736c31f495SChristoph Hellwig loff_t pos; 57416d4d435SChristoph Hellwig 575942491c9SChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) { 576942491c9SChristoph Hellwig if (!xfs_ilock_nowait(ip, iolock)) 57729a5d29eSGoldwyn Rodrigues return -EAGAIN; 578942491c9SChristoph Hellwig } else { 57965523218SChristoph Hellwig xfs_ilock(ip, iolock); 58029a5d29eSGoldwyn Rodrigues } 58129a5d29eSGoldwyn Rodrigues 58216d4d435SChristoph Hellwig ret = xfs_file_aio_write_checks(iocb, from, &iolock); 58316d4d435SChristoph Hellwig if (ret) 58416d4d435SChristoph Hellwig goto out; 58516d4d435SChristoph Hellwig 5866c31f495SChristoph Hellwig pos = iocb->ki_pos; 5876c31f495SChristoph Hellwig count = iov_iter_count(from); 5888b2180b3SDave Chinner 5896c31f495SChristoph Hellwig trace_xfs_file_dax_write(ip, count, pos); 590f150b423SChristoph Hellwig ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); 5916c31f495SChristoph Hellwig if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { 5926c31f495SChristoph Hellwig i_size_write(inode, iocb->ki_pos); 5936c31f495SChristoph Hellwig error = xfs_setfilesize(ip, pos, ret); 59416d4d435SChristoph Hellwig } 59516d4d435SChristoph Hellwig out: 59665523218SChristoph Hellwig xfs_iunlock(ip, iolock); 597ed5c3e66SDave Chinner if (error) 598ed5c3e66SDave Chinner return error; 599ed5c3e66SDave Chinner 600ed5c3e66SDave Chinner if (ret > 0) { 601ed5c3e66SDave Chinner XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 602ed5c3e66SDave Chinner 603ed5c3e66SDave Chinner /* Handle various SYNC-type writes */ 604ed5c3e66SDave Chinner ret = generic_write_sync(iocb, ret); 605ed5c3e66SDave Chinner } 606ed5c3e66SDave Chinner return ret; 607c59d87c4SChristoph Hellwig } 608c59d87c4SChristoph Hellwig 609c59d87c4SChristoph Hellwig STATIC ssize_t 610c59d87c4SChristoph Hellwig xfs_file_buffered_aio_write( 611c59d87c4SChristoph Hellwig struct kiocb *iocb, 612b3188919SAl Viro struct iov_iter *from) 613c59d87c4SChristoph Hellwig { 614c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 615c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 616c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 617c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 618c59d87c4SChristoph Hellwig ssize_t ret; 619c59d87c4SChristoph Hellwig int enospc = 0; 620c3155097SBrian Foster int iolock; 621c59d87c4SChristoph Hellwig 62291f9943eSChristoph Hellwig if (iocb->ki_flags & IOCB_NOWAIT) 62391f9943eSChristoph Hellwig return -EOPNOTSUPP; 62491f9943eSChristoph Hellwig 625c3155097SBrian Foster write_retry: 626c3155097SBrian Foster iolock = XFS_IOLOCK_EXCL; 62765523218SChristoph Hellwig xfs_ilock(ip, iolock); 628c59d87c4SChristoph Hellwig 62999733fa3SAl Viro ret = xfs_file_aio_write_checks(iocb, from, &iolock); 630c59d87c4SChristoph Hellwig if (ret) 631d0606464SChristoph Hellwig goto out; 632c59d87c4SChristoph Hellwig 633c59d87c4SChristoph Hellwig /* We can write back this queue in page reclaim */ 634de1414a6SChristoph Hellwig current->backing_dev_info = inode_to_bdi(inode); 635c59d87c4SChristoph Hellwig 6363176c3e0SChristoph Hellwig trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); 637f150b423SChristoph Hellwig ret = iomap_file_buffered_write(iocb, from, 638f150b423SChristoph Hellwig &xfs_buffered_write_iomap_ops); 6390a64bc2cSAl Viro if (likely(ret >= 0)) 64099733fa3SAl Viro iocb->ki_pos += ret; 641dc06f398SBrian Foster 642c59d87c4SChristoph Hellwig /* 643dc06f398SBrian Foster * If we hit a space limit, try to free up some lingering preallocated 644dc06f398SBrian Foster * space before returning an error. In the case of ENOSPC, first try to 645dc06f398SBrian Foster * write back all dirty inodes to free up some of the excess reserved 646dc06f398SBrian Foster * metadata space. This reduces the chances that the eofblocks scan 647dc06f398SBrian Foster * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this 648dc06f398SBrian Foster * also behaves as a filter to prevent too many eofblocks scans from 649dc06f398SBrian Foster * running at the same time. 650c59d87c4SChristoph Hellwig */ 651dc06f398SBrian Foster if (ret == -EDQUOT && !enospc) { 652c3155097SBrian Foster xfs_iunlock(ip, iolock); 653dc06f398SBrian Foster enospc = xfs_inode_free_quota_eofblocks(ip); 654dc06f398SBrian Foster if (enospc) 655dc06f398SBrian Foster goto write_retry; 65683104d44SDarrick J. Wong enospc = xfs_inode_free_quota_cowblocks(ip); 65783104d44SDarrick J. Wong if (enospc) 65883104d44SDarrick J. Wong goto write_retry; 659c3155097SBrian Foster iolock = 0; 660dc06f398SBrian Foster } else if (ret == -ENOSPC && !enospc) { 661dc06f398SBrian Foster struct xfs_eofblocks eofb = {0}; 662dc06f398SBrian Foster 663c59d87c4SChristoph Hellwig enospc = 1; 6649aa05000SDave Chinner xfs_flush_inodes(ip->i_mount); 665c3155097SBrian Foster 666c3155097SBrian Foster xfs_iunlock(ip, iolock); 667dc06f398SBrian Foster eofb.eof_flags = XFS_EOF_FLAGS_SYNC; 668dc06f398SBrian Foster xfs_icache_free_eofblocks(ip->i_mount, &eofb); 669cf2cb784SBrian Foster xfs_icache_free_cowblocks(ip->i_mount, &eofb); 670c59d87c4SChristoph Hellwig goto write_retry; 671c59d87c4SChristoph Hellwig } 672d0606464SChristoph Hellwig 673c59d87c4SChristoph Hellwig current->backing_dev_info = NULL; 674d0606464SChristoph Hellwig out: 675c3155097SBrian Foster if (iolock) 67665523218SChristoph Hellwig xfs_iunlock(ip, iolock); 677ed5c3e66SDave Chinner 678ed5c3e66SDave Chinner if (ret > 0) { 679ed5c3e66SDave Chinner XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 680ed5c3e66SDave Chinner /* Handle various SYNC-type writes */ 681ed5c3e66SDave Chinner ret = generic_write_sync(iocb, ret); 682ed5c3e66SDave Chinner } 683c59d87c4SChristoph Hellwig return ret; 684c59d87c4SChristoph Hellwig } 685c59d87c4SChristoph Hellwig 686c59d87c4SChristoph Hellwig STATIC ssize_t 687bf97f3bcSAl Viro xfs_file_write_iter( 688c59d87c4SChristoph Hellwig struct kiocb *iocb, 689bf97f3bcSAl Viro struct iov_iter *from) 690c59d87c4SChristoph Hellwig { 691c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 692c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 693c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 694c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 695c59d87c4SChristoph Hellwig ssize_t ret; 696bf97f3bcSAl Viro size_t ocount = iov_iter_count(from); 697c59d87c4SChristoph Hellwig 698ff6d6af2SBill O'Donnell XFS_STATS_INC(ip->i_mount, xs_write_calls); 699c59d87c4SChristoph Hellwig 700c59d87c4SChristoph Hellwig if (ocount == 0) 701c59d87c4SChristoph Hellwig return 0; 702c59d87c4SChristoph Hellwig 703bf97f3bcSAl Viro if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 704bf97f3bcSAl Viro return -EIO; 705c59d87c4SChristoph Hellwig 70616d4d435SChristoph Hellwig if (IS_DAX(inode)) 707ed5c3e66SDave Chinner return xfs_file_dax_write(iocb, from); 708ed5c3e66SDave Chinner 709ed5c3e66SDave Chinner if (iocb->ki_flags & IOCB_DIRECT) { 7100613f16cSDarrick J. Wong /* 7110613f16cSDarrick J. Wong * Allow a directio write to fall back to a buffered 7120613f16cSDarrick J. Wong * write *only* in the case that we're doing a reflink 7130613f16cSDarrick J. Wong * CoW. In all other directio scenarios we do not 7140613f16cSDarrick J. Wong * allow an operation to fall back to buffered mode. 7150613f16cSDarrick J. Wong */ 716bf97f3bcSAl Viro ret = xfs_file_dio_aio_write(iocb, from); 717ed5c3e66SDave Chinner if (ret != -EREMCHG) 718c59d87c4SChristoph Hellwig return ret; 719c59d87c4SChristoph Hellwig } 720c59d87c4SChristoph Hellwig 721ed5c3e66SDave Chinner return xfs_file_buffered_aio_write(iocb, from); 722ed5c3e66SDave Chinner } 723ed5c3e66SDave Chinner 724d6dc57e2SDan Williams static void 725d6dc57e2SDan Williams xfs_wait_dax_page( 726e25ff835SDave Jiang struct inode *inode) 727d6dc57e2SDan Williams { 728d6dc57e2SDan Williams struct xfs_inode *ip = XFS_I(inode); 729d6dc57e2SDan Williams 730d6dc57e2SDan Williams xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 731d6dc57e2SDan Williams schedule(); 732d6dc57e2SDan Williams xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 733d6dc57e2SDan Williams } 734d6dc57e2SDan Williams 735d6dc57e2SDan Williams static int 736d6dc57e2SDan Williams xfs_break_dax_layouts( 737d6dc57e2SDan Williams struct inode *inode, 738e25ff835SDave Jiang bool *retry) 739d6dc57e2SDan Williams { 740d6dc57e2SDan Williams struct page *page; 741d6dc57e2SDan Williams 742d6dc57e2SDan Williams ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); 743d6dc57e2SDan Williams 744d6dc57e2SDan Williams page = dax_layout_busy_page(inode->i_mapping); 745d6dc57e2SDan Williams if (!page) 746d6dc57e2SDan Williams return 0; 747d6dc57e2SDan Williams 748e25ff835SDave Jiang *retry = true; 749d6dc57e2SDan Williams return ___wait_var_event(&page->_refcount, 750d6dc57e2SDan Williams atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 751e25ff835SDave Jiang 0, 0, xfs_wait_dax_page(inode)); 752d6dc57e2SDan Williams } 753d6dc57e2SDan Williams 75469eb5fa1SDan Williams int 75569eb5fa1SDan Williams xfs_break_layouts( 75669eb5fa1SDan Williams struct inode *inode, 75769eb5fa1SDan Williams uint *iolock, 75869eb5fa1SDan Williams enum layout_break_reason reason) 75969eb5fa1SDan Williams { 76069eb5fa1SDan Williams bool retry; 761d6dc57e2SDan Williams int error; 76269eb5fa1SDan Williams 76369eb5fa1SDan Williams ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); 76469eb5fa1SDan Williams 765d6dc57e2SDan Williams do { 766d6dc57e2SDan Williams retry = false; 76769eb5fa1SDan Williams switch (reason) { 76869eb5fa1SDan Williams case BREAK_UNMAP: 769a4722a64SEric Sandeen error = xfs_break_dax_layouts(inode, &retry); 770d6dc57e2SDan Williams if (error || retry) 771d6dc57e2SDan Williams break; 77269eb5fa1SDan Williams /* fall through */ 77369eb5fa1SDan Williams case BREAK_WRITE: 774d6dc57e2SDan Williams error = xfs_break_leased_layouts(inode, iolock, &retry); 775d6dc57e2SDan Williams break; 77669eb5fa1SDan Williams default: 77769eb5fa1SDan Williams WARN_ON_ONCE(1); 778d6dc57e2SDan Williams error = -EINVAL; 77969eb5fa1SDan Williams } 780d6dc57e2SDan Williams } while (error == 0 && retry); 781d6dc57e2SDan Williams 782d6dc57e2SDan Williams return error; 78369eb5fa1SDan Williams } 78469eb5fa1SDan Williams 785a904b1caSNamjae Jeon #define XFS_FALLOC_FL_SUPPORTED \ 786a904b1caSNamjae Jeon (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 787a904b1caSNamjae Jeon FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ 78898cc2db5SDarrick J. Wong FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) 789a904b1caSNamjae Jeon 790c59d87c4SChristoph Hellwig STATIC long 791c59d87c4SChristoph Hellwig xfs_file_fallocate( 792c59d87c4SChristoph Hellwig struct file *file, 793c59d87c4SChristoph Hellwig int mode, 794c59d87c4SChristoph Hellwig loff_t offset, 795c59d87c4SChristoph Hellwig loff_t len) 796c59d87c4SChristoph Hellwig { 797496ad9aaSAl Viro struct inode *inode = file_inode(file); 79883aee9e4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 799c59d87c4SChristoph Hellwig long error; 8008add71caSChristoph Hellwig enum xfs_prealloc_flags flags = 0; 801c63a8eaeSDan Williams uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 802c59d87c4SChristoph Hellwig loff_t new_size = 0; 803749f24f3SThomas Meyer bool do_file_insert = false; 804c59d87c4SChristoph Hellwig 80583aee9e4SChristoph Hellwig if (!S_ISREG(inode->i_mode)) 80683aee9e4SChristoph Hellwig return -EINVAL; 807a904b1caSNamjae Jeon if (mode & ~XFS_FALLOC_FL_SUPPORTED) 808c59d87c4SChristoph Hellwig return -EOPNOTSUPP; 809c59d87c4SChristoph Hellwig 810781355c6SChristoph Hellwig xfs_ilock(ip, iolock); 81169eb5fa1SDan Williams error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); 812781355c6SChristoph Hellwig if (error) 813781355c6SChristoph Hellwig goto out_unlock; 814781355c6SChristoph Hellwig 815249bd908SDave Chinner /* 816249bd908SDave Chinner * Must wait for all AIO to complete before we continue as AIO can 817249bd908SDave Chinner * change the file size on completion without holding any locks we 818249bd908SDave Chinner * currently hold. We must do this first because AIO can update both 819249bd908SDave Chinner * the on disk and in memory inode sizes, and the operations that follow 820249bd908SDave Chinner * require the in-memory size to be fully up-to-date. 821249bd908SDave Chinner */ 822249bd908SDave Chinner inode_dio_wait(inode); 823249bd908SDave Chinner 824249bd908SDave Chinner /* 825249bd908SDave Chinner * Now AIO and DIO has drained we flush and (if necessary) invalidate 826249bd908SDave Chinner * the cached range over the first operation we are about to run. 827249bd908SDave Chinner * 828249bd908SDave Chinner * We care about zero and collapse here because they both run a hole 829249bd908SDave Chinner * punch over the range first. Because that can zero data, and the range 830249bd908SDave Chinner * of invalidation for the shift operations is much larger, we still do 831249bd908SDave Chinner * the required flush for collapse in xfs_prepare_shift(). 832249bd908SDave Chinner * 833249bd908SDave Chinner * Insert has the same range requirements as collapse, and we extend the 834249bd908SDave Chinner * file first which can zero data. Hence insert has the same 835249bd908SDave Chinner * flush/invalidate requirements as collapse and so they are both 836249bd908SDave Chinner * handled at the right time by xfs_prepare_shift(). 837249bd908SDave Chinner */ 838249bd908SDave Chinner if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE | 839249bd908SDave Chinner FALLOC_FL_COLLAPSE_RANGE)) { 840249bd908SDave Chinner error = xfs_flush_unmap_range(ip, offset, len); 841249bd908SDave Chinner if (error) 842249bd908SDave Chinner goto out_unlock; 843249bd908SDave Chinner } 844249bd908SDave Chinner 84583aee9e4SChristoph Hellwig if (mode & FALLOC_FL_PUNCH_HOLE) { 84683aee9e4SChristoph Hellwig error = xfs_free_file_space(ip, offset, len); 84783aee9e4SChristoph Hellwig if (error) 84883aee9e4SChristoph Hellwig goto out_unlock; 849e1d8fb88SNamjae Jeon } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 85093407472SFabian Frederick unsigned int blksize_mask = i_blocksize(inode) - 1; 851e1d8fb88SNamjae Jeon 852e1d8fb88SNamjae Jeon if (offset & blksize_mask || len & blksize_mask) { 8532451337dSDave Chinner error = -EINVAL; 854e1d8fb88SNamjae Jeon goto out_unlock; 855e1d8fb88SNamjae Jeon } 856e1d8fb88SNamjae Jeon 85723fffa92SLukas Czerner /* 85823fffa92SLukas Czerner * There is no need to overlap collapse range with EOF, 85923fffa92SLukas Czerner * in which case it is effectively a truncate operation 86023fffa92SLukas Czerner */ 86123fffa92SLukas Czerner if (offset + len >= i_size_read(inode)) { 8622451337dSDave Chinner error = -EINVAL; 86323fffa92SLukas Czerner goto out_unlock; 86423fffa92SLukas Czerner } 86523fffa92SLukas Czerner 866e1d8fb88SNamjae Jeon new_size = i_size_read(inode) - len; 867e1d8fb88SNamjae Jeon 868e1d8fb88SNamjae Jeon error = xfs_collapse_file_space(ip, offset, len); 869e1d8fb88SNamjae Jeon if (error) 870e1d8fb88SNamjae Jeon goto out_unlock; 871a904b1caSNamjae Jeon } else if (mode & FALLOC_FL_INSERT_RANGE) { 87293407472SFabian Frederick unsigned int blksize_mask = i_blocksize(inode) - 1; 8737d83fb14SDarrick J. Wong loff_t isize = i_size_read(inode); 874a904b1caSNamjae Jeon 875a904b1caSNamjae Jeon if (offset & blksize_mask || len & blksize_mask) { 876a904b1caSNamjae Jeon error = -EINVAL; 877a904b1caSNamjae Jeon goto out_unlock; 878a904b1caSNamjae Jeon } 879a904b1caSNamjae Jeon 8807d83fb14SDarrick J. Wong /* 8817d83fb14SDarrick J. Wong * New inode size must not exceed ->s_maxbytes, accounting for 8827d83fb14SDarrick J. Wong * possible signed overflow. 8837d83fb14SDarrick J. Wong */ 8847d83fb14SDarrick J. Wong if (inode->i_sb->s_maxbytes - isize < len) { 885a904b1caSNamjae Jeon error = -EFBIG; 886a904b1caSNamjae Jeon goto out_unlock; 887a904b1caSNamjae Jeon } 8887d83fb14SDarrick J. Wong new_size = isize + len; 889a904b1caSNamjae Jeon 890a904b1caSNamjae Jeon /* Offset should be less than i_size */ 8917d83fb14SDarrick J. Wong if (offset >= isize) { 892a904b1caSNamjae Jeon error = -EINVAL; 893a904b1caSNamjae Jeon goto out_unlock; 894a904b1caSNamjae Jeon } 895749f24f3SThomas Meyer do_file_insert = true; 89683aee9e4SChristoph Hellwig } else { 8978add71caSChristoph Hellwig flags |= XFS_PREALLOC_SET; 8988add71caSChristoph Hellwig 899c59d87c4SChristoph Hellwig if (!(mode & FALLOC_FL_KEEP_SIZE) && 900c59d87c4SChristoph Hellwig offset + len > i_size_read(inode)) { 901c59d87c4SChristoph Hellwig new_size = offset + len; 9022451337dSDave Chinner error = inode_newsize_ok(inode, new_size); 903c59d87c4SChristoph Hellwig if (error) 904c59d87c4SChristoph Hellwig goto out_unlock; 905c59d87c4SChristoph Hellwig } 906c59d87c4SChristoph Hellwig 90766ae56a5SChristoph Hellwig if (mode & FALLOC_FL_ZERO_RANGE) { 908360c09c0SChristoph Hellwig /* 909360c09c0SChristoph Hellwig * Punch a hole and prealloc the range. We use a hole 910360c09c0SChristoph Hellwig * punch rather than unwritten extent conversion for two 911360c09c0SChristoph Hellwig * reasons: 912360c09c0SChristoph Hellwig * 913360c09c0SChristoph Hellwig * 1.) Hole punch handles partial block zeroing for us. 914360c09c0SChristoph Hellwig * 2.) If prealloc returns ENOSPC, the file range is 915360c09c0SChristoph Hellwig * still zero-valued by virtue of the hole punch. 916360c09c0SChristoph Hellwig */ 917360c09c0SChristoph Hellwig unsigned int blksize = i_blocksize(inode); 918360c09c0SChristoph Hellwig 919360c09c0SChristoph Hellwig trace_xfs_zero_file_space(ip); 920360c09c0SChristoph Hellwig 921360c09c0SChristoph Hellwig error = xfs_free_file_space(ip, offset, len); 922360c09c0SChristoph Hellwig if (error) 923360c09c0SChristoph Hellwig goto out_unlock; 924360c09c0SChristoph Hellwig 925360c09c0SChristoph Hellwig len = round_up(offset + len, blksize) - 926360c09c0SChristoph Hellwig round_down(offset, blksize); 927360c09c0SChristoph Hellwig offset = round_down(offset, blksize); 92866ae56a5SChristoph Hellwig } else if (mode & FALLOC_FL_UNSHARE_RANGE) { 92998cc2db5SDarrick J. Wong error = xfs_reflink_unshare(ip, offset, len); 93098cc2db5SDarrick J. Wong if (error) 93198cc2db5SDarrick J. Wong goto out_unlock; 93266ae56a5SChristoph Hellwig } else { 93366ae56a5SChristoph Hellwig /* 93466ae56a5SChristoph Hellwig * If always_cow mode we can't use preallocations and 93566ae56a5SChristoph Hellwig * thus should not create them. 93666ae56a5SChristoph Hellwig */ 93766ae56a5SChristoph Hellwig if (xfs_is_always_cow_inode(ip)) { 93866ae56a5SChristoph Hellwig error = -EOPNOTSUPP; 93966ae56a5SChristoph Hellwig goto out_unlock; 94066ae56a5SChristoph Hellwig } 941360c09c0SChristoph Hellwig } 94266ae56a5SChristoph Hellwig 943360c09c0SChristoph Hellwig if (!xfs_is_always_cow_inode(ip)) { 94483aee9e4SChristoph Hellwig error = xfs_alloc_file_space(ip, offset, len, 94583aee9e4SChristoph Hellwig XFS_BMAPI_PREALLOC); 94683aee9e4SChristoph Hellwig if (error) 94783aee9e4SChristoph Hellwig goto out_unlock; 94883aee9e4SChristoph Hellwig } 949360c09c0SChristoph Hellwig } 950c59d87c4SChristoph Hellwig 95183aee9e4SChristoph Hellwig if (file->f_flags & O_DSYNC) 9528add71caSChristoph Hellwig flags |= XFS_PREALLOC_SYNC; 9538add71caSChristoph Hellwig 9548add71caSChristoph Hellwig error = xfs_update_prealloc_flags(ip, flags); 955c59d87c4SChristoph Hellwig if (error) 956c59d87c4SChristoph Hellwig goto out_unlock; 957c59d87c4SChristoph Hellwig 958c59d87c4SChristoph Hellwig /* Change file size if needed */ 959c59d87c4SChristoph Hellwig if (new_size) { 960c59d87c4SChristoph Hellwig struct iattr iattr; 961c59d87c4SChristoph Hellwig 962c59d87c4SChristoph Hellwig iattr.ia_valid = ATTR_SIZE; 963c59d87c4SChristoph Hellwig iattr.ia_size = new_size; 96469bca807SJan Kara error = xfs_vn_setattr_size(file_dentry(file), &iattr); 965a904b1caSNamjae Jeon if (error) 966a904b1caSNamjae Jeon goto out_unlock; 967c59d87c4SChristoph Hellwig } 968c59d87c4SChristoph Hellwig 969a904b1caSNamjae Jeon /* 970a904b1caSNamjae Jeon * Perform hole insertion now that the file size has been 971a904b1caSNamjae Jeon * updated so that if we crash during the operation we don't 972a904b1caSNamjae Jeon * leave shifted extents past EOF and hence losing access to 973a904b1caSNamjae Jeon * the data that is contained within them. 974a904b1caSNamjae Jeon */ 975a904b1caSNamjae Jeon if (do_file_insert) 976a904b1caSNamjae Jeon error = xfs_insert_file_space(ip, offset, len); 977a904b1caSNamjae Jeon 978c59d87c4SChristoph Hellwig out_unlock: 979781355c6SChristoph Hellwig xfs_iunlock(ip, iolock); 9802451337dSDave Chinner return error; 981c59d87c4SChristoph Hellwig } 982c59d87c4SChristoph Hellwig 98340144e49SJan Kara STATIC int 98440144e49SJan Kara xfs_file_fadvise( 98540144e49SJan Kara struct file *file, 98640144e49SJan Kara loff_t start, 98740144e49SJan Kara loff_t end, 98840144e49SJan Kara int advice) 98940144e49SJan Kara { 99040144e49SJan Kara struct xfs_inode *ip = XFS_I(file_inode(file)); 99140144e49SJan Kara int ret; 99240144e49SJan Kara int lockflags = 0; 99340144e49SJan Kara 99440144e49SJan Kara /* 99540144e49SJan Kara * Operations creating pages in page cache need protection from hole 99640144e49SJan Kara * punching and similar ops 99740144e49SJan Kara */ 99840144e49SJan Kara if (advice == POSIX_FADV_WILLNEED) { 99940144e49SJan Kara lockflags = XFS_IOLOCK_SHARED; 100040144e49SJan Kara xfs_ilock(ip, lockflags); 100140144e49SJan Kara } 100240144e49SJan Kara ret = generic_fadvise(file, start, end, advice); 100340144e49SJan Kara if (lockflags) 100440144e49SJan Kara xfs_iunlock(ip, lockflags); 100540144e49SJan Kara return ret; 100640144e49SJan Kara } 10073fc9f5e4SDarrick J. Wong 1008da034bccSEric Biggers STATIC loff_t 10092e5dfc99SDarrick J. Wong xfs_file_remap_range( 10109fe26045SDarrick J. Wong struct file *file_in, 10119fe26045SDarrick J. Wong loff_t pos_in, 10129fe26045SDarrick J. Wong struct file *file_out, 10139fe26045SDarrick J. Wong loff_t pos_out, 101442ec3d4cSDarrick J. Wong loff_t len, 10152e5dfc99SDarrick J. Wong unsigned int remap_flags) 10169fe26045SDarrick J. Wong { 10173fc9f5e4SDarrick J. Wong struct inode *inode_in = file_inode(file_in); 10183fc9f5e4SDarrick J. Wong struct xfs_inode *src = XFS_I(inode_in); 10193fc9f5e4SDarrick J. Wong struct inode *inode_out = file_inode(file_out); 10203fc9f5e4SDarrick J. Wong struct xfs_inode *dest = XFS_I(inode_out); 10213fc9f5e4SDarrick J. Wong struct xfs_mount *mp = src->i_mount; 10223fc9f5e4SDarrick J. Wong loff_t remapped = 0; 10233fc9f5e4SDarrick J. Wong xfs_extlen_t cowextsize; 10243fc9f5e4SDarrick J. Wong int ret; 10253fc9f5e4SDarrick J. Wong 10262e5dfc99SDarrick J. Wong if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 10272e5dfc99SDarrick J. Wong return -EINVAL; 1028cc714660SDarrick J. Wong 10293fc9f5e4SDarrick J. Wong if (!xfs_sb_version_hasreflink(&mp->m_sb)) 10303fc9f5e4SDarrick J. Wong return -EOPNOTSUPP; 10313fc9f5e4SDarrick J. Wong 10323fc9f5e4SDarrick J. Wong if (XFS_FORCED_SHUTDOWN(mp)) 10333fc9f5e4SDarrick J. Wong return -EIO; 10343fc9f5e4SDarrick J. Wong 10353fc9f5e4SDarrick J. Wong /* Prepare and then clone file data. */ 10363fc9f5e4SDarrick J. Wong ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, 10373fc9f5e4SDarrick J. Wong &len, remap_flags); 10383fc9f5e4SDarrick J. Wong if (ret < 0 || len == 0) 10393fc9f5e4SDarrick J. Wong return ret; 10403fc9f5e4SDarrick J. Wong 10413fc9f5e4SDarrick J. Wong trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 10423fc9f5e4SDarrick J. Wong 10433fc9f5e4SDarrick J. Wong ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, 10443fc9f5e4SDarrick J. Wong &remapped); 10453fc9f5e4SDarrick J. Wong if (ret) 10463fc9f5e4SDarrick J. Wong goto out_unlock; 10473fc9f5e4SDarrick J. Wong 10483fc9f5e4SDarrick J. Wong /* 10493fc9f5e4SDarrick J. Wong * Carry the cowextsize hint from src to dest if we're sharing the 10503fc9f5e4SDarrick J. Wong * entire source file to the entire destination file, the source file 10513fc9f5e4SDarrick J. Wong * has a cowextsize hint, and the destination file does not. 10523fc9f5e4SDarrick J. Wong */ 10533fc9f5e4SDarrick J. Wong cowextsize = 0; 10543fc9f5e4SDarrick J. Wong if (pos_in == 0 && len == i_size_read(inode_in) && 10553fc9f5e4SDarrick J. Wong (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && 10563fc9f5e4SDarrick J. Wong pos_out == 0 && len >= i_size_read(inode_out) && 10573fc9f5e4SDarrick J. Wong !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 10583fc9f5e4SDarrick J. Wong cowextsize = src->i_d.di_cowextsize; 10593fc9f5e4SDarrick J. Wong 10603fc9f5e4SDarrick J. Wong ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, 10613fc9f5e4SDarrick J. Wong remap_flags); 10625833112dSChristoph Hellwig if (ret) 10635833112dSChristoph Hellwig goto out_unlock; 10643fc9f5e4SDarrick J. Wong 10655833112dSChristoph Hellwig if (mp->m_flags & XFS_MOUNT_WSYNC) 10665833112dSChristoph Hellwig xfs_log_force_inode(dest); 10673fc9f5e4SDarrick J. Wong out_unlock: 10683fc9f5e4SDarrick J. Wong xfs_reflink_remap_unlock(file_in, file_out); 10693fc9f5e4SDarrick J. Wong if (ret) 10703fc9f5e4SDarrick J. Wong trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 10713fc9f5e4SDarrick J. Wong return remapped > 0 ? remapped : ret; 10729fe26045SDarrick J. Wong } 1073c59d87c4SChristoph Hellwig 1074c59d87c4SChristoph Hellwig STATIC int 1075c59d87c4SChristoph Hellwig xfs_file_open( 1076c59d87c4SChristoph Hellwig struct inode *inode, 1077c59d87c4SChristoph Hellwig struct file *file) 1078c59d87c4SChristoph Hellwig { 1079c59d87c4SChristoph Hellwig if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1080c59d87c4SChristoph Hellwig return -EFBIG; 1081c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) 1082c59d87c4SChristoph Hellwig return -EIO; 108391f9943eSChristoph Hellwig file->f_mode |= FMODE_NOWAIT; 1084c59d87c4SChristoph Hellwig return 0; 1085c59d87c4SChristoph Hellwig } 1086c59d87c4SChristoph Hellwig 1087c59d87c4SChristoph Hellwig STATIC int 1088c59d87c4SChristoph Hellwig xfs_dir_open( 1089c59d87c4SChristoph Hellwig struct inode *inode, 1090c59d87c4SChristoph Hellwig struct file *file) 1091c59d87c4SChristoph Hellwig { 1092c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1093c59d87c4SChristoph Hellwig int mode; 1094c59d87c4SChristoph Hellwig int error; 1095c59d87c4SChristoph Hellwig 1096c59d87c4SChristoph Hellwig error = xfs_file_open(inode, file); 1097c59d87c4SChristoph Hellwig if (error) 1098c59d87c4SChristoph Hellwig return error; 1099c59d87c4SChristoph Hellwig 1100c59d87c4SChristoph Hellwig /* 1101c59d87c4SChristoph Hellwig * If there are any blocks, read-ahead block 0 as we're almost 1102c59d87c4SChristoph Hellwig * certain to have the next operation be a read there. 1103c59d87c4SChristoph Hellwig */ 1104309ecac8SChristoph Hellwig mode = xfs_ilock_data_map_shared(ip); 1105*daf83964SChristoph Hellwig if (ip->i_df.if_nextents > 0) 110606566fdaSChristoph Hellwig error = xfs_dir3_data_readahead(ip, 0, 0); 1107c59d87c4SChristoph Hellwig xfs_iunlock(ip, mode); 11087a652bbeSDarrick J. Wong return error; 1109c59d87c4SChristoph Hellwig } 1110c59d87c4SChristoph Hellwig 1111c59d87c4SChristoph Hellwig STATIC int 1112c59d87c4SChristoph Hellwig xfs_file_release( 1113c59d87c4SChristoph Hellwig struct inode *inode, 1114c59d87c4SChristoph Hellwig struct file *filp) 1115c59d87c4SChristoph Hellwig { 11162451337dSDave Chinner return xfs_release(XFS_I(inode)); 1117c59d87c4SChristoph Hellwig } 1118c59d87c4SChristoph Hellwig 1119c59d87c4SChristoph Hellwig STATIC int 1120c59d87c4SChristoph Hellwig xfs_file_readdir( 1121b8227554SAl Viro struct file *file, 1122b8227554SAl Viro struct dir_context *ctx) 1123c59d87c4SChristoph Hellwig { 1124b8227554SAl Viro struct inode *inode = file_inode(file); 1125c59d87c4SChristoph Hellwig xfs_inode_t *ip = XFS_I(inode); 1126c59d87c4SChristoph Hellwig size_t bufsize; 1127c59d87c4SChristoph Hellwig 1128c59d87c4SChristoph Hellwig /* 1129c59d87c4SChristoph Hellwig * The Linux API doesn't pass down the total size of the buffer 1130c59d87c4SChristoph Hellwig * we read into down to the filesystem. With the filldir concept 1131c59d87c4SChristoph Hellwig * it's not needed for correct information, but the XFS dir2 leaf 1132c59d87c4SChristoph Hellwig * code wants an estimate of the buffer size to calculate it's 1133c59d87c4SChristoph Hellwig * readahead window and size the buffers used for mapping to 1134c59d87c4SChristoph Hellwig * physical blocks. 1135c59d87c4SChristoph Hellwig * 1136c59d87c4SChristoph Hellwig * Try to give it an estimate that's good enough, maybe at some 1137c59d87c4SChristoph Hellwig * point we can change the ->readdir prototype to include the 1138c59d87c4SChristoph Hellwig * buffer size. For now we use the current glibc buffer size. 1139c59d87c4SChristoph Hellwig */ 1140a5c46e5eSDarrick J. Wong bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size); 1141c59d87c4SChristoph Hellwig 1142acb9553cSDarrick J. Wong return xfs_readdir(NULL, ip, ctx, bufsize); 11433fe3e6b1SJeff Liu } 11443fe3e6b1SJeff Liu 11453fe3e6b1SJeff Liu STATIC loff_t 11463fe3e6b1SJeff Liu xfs_file_llseek( 11473fe3e6b1SJeff Liu struct file *file, 11483fe3e6b1SJeff Liu loff_t offset, 114959f9c004SEric Sandeen int whence) 11503fe3e6b1SJeff Liu { 11519b2970aaSChristoph Hellwig struct inode *inode = file->f_mapping->host; 11529b2970aaSChristoph Hellwig 11539b2970aaSChristoph Hellwig if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount)) 11549b2970aaSChristoph Hellwig return -EIO; 11559b2970aaSChristoph Hellwig 115659f9c004SEric Sandeen switch (whence) { 11579b2970aaSChristoph Hellwig default: 115859f9c004SEric Sandeen return generic_file_llseek(file, offset, whence); 11593fe3e6b1SJeff Liu case SEEK_HOLE: 116060271ab7SChristoph Hellwig offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops); 11619b2970aaSChristoph Hellwig break; 116249c69591SEric Sandeen case SEEK_DATA: 116360271ab7SChristoph Hellwig offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops); 11649b2970aaSChristoph Hellwig break; 11653fe3e6b1SJeff Liu } 11669b2970aaSChristoph Hellwig 11679b2970aaSChristoph Hellwig if (offset < 0) 11689b2970aaSChristoph Hellwig return offset; 11699b2970aaSChristoph Hellwig return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 11703fe3e6b1SJeff Liu } 11713fe3e6b1SJeff Liu 1172de0e8c20SDave Chinner /* 1173de0e8c20SDave Chinner * Locking for serialisation of IO during page faults. This results in a lock 1174de0e8c20SDave Chinner * ordering of: 1175de0e8c20SDave Chinner * 1176de0e8c20SDave Chinner * mmap_sem (MM) 11776b698edeSDave Chinner * sb_start_pagefault(vfs, freeze) 117813ad4fe3SDave Chinner * i_mmaplock (XFS - truncate serialisation) 1179de0e8c20SDave Chinner * page_lock (MM) 1180de0e8c20SDave Chinner * i_lock (XFS - extent map serialisation) 1181de0e8c20SDave Chinner */ 118205edd888SSouptick Joarder static vm_fault_t 1183d522d569SChristoph Hellwig __xfs_filemap_fault( 1184c791ace1SDave Jiang struct vm_fault *vmf, 1185d522d569SChristoph Hellwig enum page_entry_size pe_size, 1186d522d569SChristoph Hellwig bool write_fault) 1187acd76e74SMatthew Wilcox { 1188f4200391SDave Jiang struct inode *inode = file_inode(vmf->vma->vm_file); 1189acd76e74SMatthew Wilcox struct xfs_inode *ip = XFS_I(inode); 119005edd888SSouptick Joarder vm_fault_t ret; 1191acd76e74SMatthew Wilcox 1192d522d569SChristoph Hellwig trace_xfs_filemap_fault(ip, pe_size, write_fault); 1193acd76e74SMatthew Wilcox 1194d522d569SChristoph Hellwig if (write_fault) { 1195acd76e74SMatthew Wilcox sb_start_pagefault(inode->i_sb); 1196f4200391SDave Jiang file_update_time(vmf->vma->vm_file); 119713ad4fe3SDave Chinner } 119813ad4fe3SDave Chinner 1199acd76e74SMatthew Wilcox xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1200d522d569SChristoph Hellwig if (IS_DAX(inode)) { 1201a39e596bSChristoph Hellwig pfn_t pfn; 1202a39e596bSChristoph Hellwig 1203690c2a38SChristoph Hellwig ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, 1204690c2a38SChristoph Hellwig (write_fault && !vmf->cow_page) ? 1205f150b423SChristoph Hellwig &xfs_direct_write_iomap_ops : 1206f150b423SChristoph Hellwig &xfs_read_iomap_ops); 1207a39e596bSChristoph Hellwig if (ret & VM_FAULT_NEEDDSYNC) 1208a39e596bSChristoph Hellwig ret = dax_finish_sync_fault(vmf, pe_size, pfn); 1209d522d569SChristoph Hellwig } else { 1210d522d569SChristoph Hellwig if (write_fault) 1211f150b423SChristoph Hellwig ret = iomap_page_mkwrite(vmf, 1212f150b423SChristoph Hellwig &xfs_buffered_write_iomap_ops); 1213d522d569SChristoph Hellwig else 1214d522d569SChristoph Hellwig ret = filemap_fault(vmf); 1215d522d569SChristoph Hellwig } 1216acd76e74SMatthew Wilcox xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 121713ad4fe3SDave Chinner 1218d522d569SChristoph Hellwig if (write_fault) 1219acd76e74SMatthew Wilcox sb_end_pagefault(inode->i_sb); 1220acd76e74SMatthew Wilcox return ret; 1221acd76e74SMatthew Wilcox } 1222acd76e74SMatthew Wilcox 122305edd888SSouptick Joarder static vm_fault_t 1224d522d569SChristoph Hellwig xfs_filemap_fault( 1225d522d569SChristoph Hellwig struct vm_fault *vmf) 1226d522d569SChristoph Hellwig { 1227d522d569SChristoph Hellwig /* DAX can shortcut the normal fault path on write faults! */ 1228d522d569SChristoph Hellwig return __xfs_filemap_fault(vmf, PE_SIZE_PTE, 1229d522d569SChristoph Hellwig IS_DAX(file_inode(vmf->vma->vm_file)) && 1230d522d569SChristoph Hellwig (vmf->flags & FAULT_FLAG_WRITE)); 1231d522d569SChristoph Hellwig } 1232d522d569SChristoph Hellwig 123305edd888SSouptick Joarder static vm_fault_t 1234d522d569SChristoph Hellwig xfs_filemap_huge_fault( 1235d522d569SChristoph Hellwig struct vm_fault *vmf, 1236d522d569SChristoph Hellwig enum page_entry_size pe_size) 1237d522d569SChristoph Hellwig { 1238d522d569SChristoph Hellwig if (!IS_DAX(file_inode(vmf->vma->vm_file))) 1239d522d569SChristoph Hellwig return VM_FAULT_FALLBACK; 1240d522d569SChristoph Hellwig 1241d522d569SChristoph Hellwig /* DAX can shortcut the normal fault path on write faults! */ 1242d522d569SChristoph Hellwig return __xfs_filemap_fault(vmf, pe_size, 1243d522d569SChristoph Hellwig (vmf->flags & FAULT_FLAG_WRITE)); 1244d522d569SChristoph Hellwig } 1245d522d569SChristoph Hellwig 124605edd888SSouptick Joarder static vm_fault_t 1247d522d569SChristoph Hellwig xfs_filemap_page_mkwrite( 1248d522d569SChristoph Hellwig struct vm_fault *vmf) 1249d522d569SChristoph Hellwig { 1250d522d569SChristoph Hellwig return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1251d522d569SChristoph Hellwig } 1252d522d569SChristoph Hellwig 12533af49285SDave Chinner /* 12547b565c9fSJan Kara * pfn_mkwrite was originally intended to ensure we capture time stamp updates 12557b565c9fSJan Kara * on write faults. In reality, it needs to serialise against truncate and 12567b565c9fSJan Kara * prepare memory for writing so handle is as standard write fault. 12573af49285SDave Chinner */ 125805edd888SSouptick Joarder static vm_fault_t 12593af49285SDave Chinner xfs_filemap_pfn_mkwrite( 12603af49285SDave Chinner struct vm_fault *vmf) 12613af49285SDave Chinner { 12623af49285SDave Chinner 12637b565c9fSJan Kara return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 12643af49285SDave Chinner } 12653af49285SDave Chinner 12666b698edeSDave Chinner static const struct vm_operations_struct xfs_file_vm_ops = { 12676b698edeSDave Chinner .fault = xfs_filemap_fault, 1268a2d58167SDave Jiang .huge_fault = xfs_filemap_huge_fault, 12696b698edeSDave Chinner .map_pages = filemap_map_pages, 12706b698edeSDave Chinner .page_mkwrite = xfs_filemap_page_mkwrite, 12713af49285SDave Chinner .pfn_mkwrite = xfs_filemap_pfn_mkwrite, 12726b698edeSDave Chinner }; 12736b698edeSDave Chinner 12746b698edeSDave Chinner STATIC int 12756b698edeSDave Chinner xfs_file_mmap( 127630fa529eSChristoph Hellwig struct file *file, 12776b698edeSDave Chinner struct vm_area_struct *vma) 12786b698edeSDave Chinner { 127930fa529eSChristoph Hellwig struct inode *inode = file_inode(file); 128030fa529eSChristoph Hellwig struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode)); 1281b21fec41SPankaj Gupta 1282a39e596bSChristoph Hellwig /* 1283b21fec41SPankaj Gupta * We don't support synchronous mappings for non-DAX files and 1284b21fec41SPankaj Gupta * for DAX files if underneath dax_device is not synchronous. 1285a39e596bSChristoph Hellwig */ 128630fa529eSChristoph Hellwig if (!daxdev_mapping_supported(vma, target->bt_daxdev)) 1287a39e596bSChristoph Hellwig return -EOPNOTSUPP; 1288a39e596bSChristoph Hellwig 128930fa529eSChristoph Hellwig file_accessed(file); 12906b698edeSDave Chinner vma->vm_ops = &xfs_file_vm_ops; 129130fa529eSChristoph Hellwig if (IS_DAX(inode)) 1292e1fb4a08SDave Jiang vma->vm_flags |= VM_HUGEPAGE; 12936b698edeSDave Chinner return 0; 1294075a924dSDave Chinner } 1295075a924dSDave Chinner 1296c59d87c4SChristoph Hellwig const struct file_operations xfs_file_operations = { 12973fe3e6b1SJeff Liu .llseek = xfs_file_llseek, 1298b4f5d2c6SAl Viro .read_iter = xfs_file_read_iter, 1299bf97f3bcSAl Viro .write_iter = xfs_file_write_iter, 130082c156f8SAl Viro .splice_read = generic_file_splice_read, 13018d020765SAl Viro .splice_write = iter_file_splice_write, 130281214babSChristoph Hellwig .iopoll = iomap_dio_iopoll, 1303c59d87c4SChristoph Hellwig .unlocked_ioctl = xfs_file_ioctl, 1304c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT 1305c59d87c4SChristoph Hellwig .compat_ioctl = xfs_file_compat_ioctl, 1306c59d87c4SChristoph Hellwig #endif 1307c59d87c4SChristoph Hellwig .mmap = xfs_file_mmap, 1308a39e596bSChristoph Hellwig .mmap_supported_flags = MAP_SYNC, 1309c59d87c4SChristoph Hellwig .open = xfs_file_open, 1310c59d87c4SChristoph Hellwig .release = xfs_file_release, 1311c59d87c4SChristoph Hellwig .fsync = xfs_file_fsync, 1312dbe6ec81SToshi Kani .get_unmapped_area = thp_get_unmapped_area, 1313c59d87c4SChristoph Hellwig .fallocate = xfs_file_fallocate, 131440144e49SJan Kara .fadvise = xfs_file_fadvise, 13152e5dfc99SDarrick J. Wong .remap_file_range = xfs_file_remap_range, 1316c59d87c4SChristoph Hellwig }; 1317c59d87c4SChristoph Hellwig 1318c59d87c4SChristoph Hellwig const struct file_operations xfs_dir_file_operations = { 1319c59d87c4SChristoph Hellwig .open = xfs_dir_open, 1320c59d87c4SChristoph Hellwig .read = generic_read_dir, 13213b0a3c1aSAl Viro .iterate_shared = xfs_file_readdir, 1322c59d87c4SChristoph Hellwig .llseek = generic_file_llseek, 1323c59d87c4SChristoph Hellwig .unlocked_ioctl = xfs_file_ioctl, 1324c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT 1325c59d87c4SChristoph Hellwig .compat_ioctl = xfs_file_compat_ioctl, 1326c59d87c4SChristoph Hellwig #endif 13271da2f2dbSChristoph Hellwig .fsync = xfs_dir_fsync, 1328c59d87c4SChristoph Hellwig }; 1329