1c59d87c4SChristoph Hellwig /* 2c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3c59d87c4SChristoph Hellwig * All Rights Reserved. 4c59d87c4SChristoph Hellwig * 5c59d87c4SChristoph Hellwig * This program is free software; you can redistribute it and/or 6c59d87c4SChristoph Hellwig * modify it under the terms of the GNU General Public License as 7c59d87c4SChristoph Hellwig * published by the Free Software Foundation. 8c59d87c4SChristoph Hellwig * 9c59d87c4SChristoph Hellwig * This program is distributed in the hope that it would be useful, 10c59d87c4SChristoph Hellwig * but WITHOUT ANY WARRANTY; without even the implied warranty of 11c59d87c4SChristoph Hellwig * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12c59d87c4SChristoph Hellwig * GNU General Public License for more details. 13c59d87c4SChristoph Hellwig * 14c59d87c4SChristoph Hellwig * You should have received a copy of the GNU General Public License 15c59d87c4SChristoph Hellwig * along with this program; if not, write the Free Software Foundation, 16c59d87c4SChristoph Hellwig * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17c59d87c4SChristoph Hellwig */ 18c59d87c4SChristoph Hellwig #include "xfs.h" 19c59d87c4SChristoph Hellwig #include "xfs_fs.h" 2070a9883cSDave Chinner #include "xfs_shared.h" 21a4fbe6abSDave Chinner #include "xfs_format.h" 22239880efSDave Chinner #include "xfs_log_format.h" 23239880efSDave Chinner #include "xfs_trans_resv.h" 24c59d87c4SChristoph Hellwig #include "xfs_mount.h" 2557062787SDave Chinner #include "xfs_da_format.h" 2657062787SDave Chinner #include "xfs_da_btree.h" 27c59d87c4SChristoph Hellwig #include "xfs_inode.h" 28239880efSDave Chinner #include "xfs_trans.h" 29c59d87c4SChristoph Hellwig #include "xfs_inode_item.h" 30c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 31c24b5dfaSDave Chinner #include "xfs_bmap_util.h" 32c59d87c4SChristoph Hellwig #include "xfs_error.h" 332b9ab5abSDave Chinner #include "xfs_dir2.h" 34c24b5dfaSDave Chinner #include "xfs_dir2_priv.h" 35c59d87c4SChristoph Hellwig #include "xfs_ioctl.h" 36c59d87c4SChristoph Hellwig #include "xfs_trace.h" 37239880efSDave Chinner #include "xfs_log.h" 38dc06f398SBrian Foster #include "xfs_icache.h" 39781355c6SChristoph Hellwig #include "xfs_pnfs.h" 4068a9f5e7SChristoph Hellwig #include "xfs_iomap.h" 410613f16cSDarrick J. Wong #include "xfs_reflink.h" 42c59d87c4SChristoph Hellwig 43c59d87c4SChristoph Hellwig #include <linux/dcache.h> 44c59d87c4SChristoph Hellwig #include <linux/falloc.h> 45d126d43fSJeff Liu #include <linux/pagevec.h> 4666114cadSTejun Heo #include <linux/backing-dev.h> 47c59d87c4SChristoph Hellwig 48c59d87c4SChristoph Hellwig static const struct vm_operations_struct xfs_file_vm_ops; 49c59d87c4SChristoph Hellwig 50c59d87c4SChristoph Hellwig /* 5168a9f5e7SChristoph Hellwig * Clear the specified ranges to zero through either the pagecache or DAX. 5268a9f5e7SChristoph Hellwig * Holes and unwritten extents will be left as-is as they already are zeroed. 53c59d87c4SChristoph Hellwig */ 54ef9d8733SDave Chinner int 557bb41db3SChristoph Hellwig xfs_zero_range( 5668a9f5e7SChristoph Hellwig struct xfs_inode *ip, 577bb41db3SChristoph Hellwig xfs_off_t pos, 587bb41db3SChristoph Hellwig xfs_off_t count, 597bb41db3SChristoph Hellwig bool *did_zero) 60c59d87c4SChristoph Hellwig { 61459f0fbcSChristoph Hellwig return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); 62c59d87c4SChristoph Hellwig } 63c59d87c4SChristoph Hellwig 648add71caSChristoph Hellwig int 658add71caSChristoph Hellwig xfs_update_prealloc_flags( 668add71caSChristoph Hellwig struct xfs_inode *ip, 678add71caSChristoph Hellwig enum xfs_prealloc_flags flags) 688add71caSChristoph Hellwig { 698add71caSChristoph Hellwig struct xfs_trans *tp; 708add71caSChristoph Hellwig int error; 718add71caSChristoph Hellwig 72253f4911SChristoph Hellwig error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, 73253f4911SChristoph Hellwig 0, 0, 0, &tp); 74253f4911SChristoph Hellwig if (error) 758add71caSChristoph Hellwig return error; 768add71caSChristoph Hellwig 778add71caSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 788add71caSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 798add71caSChristoph Hellwig 808add71caSChristoph Hellwig if (!(flags & XFS_PREALLOC_INVISIBLE)) { 81c19b3b05SDave Chinner VFS_I(ip)->i_mode &= ~S_ISUID; 82c19b3b05SDave Chinner if (VFS_I(ip)->i_mode & S_IXGRP) 83c19b3b05SDave Chinner VFS_I(ip)->i_mode &= ~S_ISGID; 848add71caSChristoph Hellwig xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 858add71caSChristoph Hellwig } 868add71caSChristoph Hellwig 878add71caSChristoph Hellwig if (flags & XFS_PREALLOC_SET) 888add71caSChristoph Hellwig ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 898add71caSChristoph Hellwig if (flags & XFS_PREALLOC_CLEAR) 908add71caSChristoph Hellwig ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 918add71caSChristoph Hellwig 928add71caSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 938add71caSChristoph Hellwig if (flags & XFS_PREALLOC_SYNC) 948add71caSChristoph Hellwig xfs_trans_set_sync(tp); 9570393313SChristoph Hellwig return xfs_trans_commit(tp); 968add71caSChristoph Hellwig } 978add71caSChristoph Hellwig 981da2f2dbSChristoph Hellwig /* 991da2f2dbSChristoph Hellwig * Fsync operations on directories are much simpler than on regular files, 1001da2f2dbSChristoph Hellwig * as there is no file data to flush, and thus also no need for explicit 1011da2f2dbSChristoph Hellwig * cache flush operations, and there are no non-transaction metadata updates 1021da2f2dbSChristoph Hellwig * on directories either. 1031da2f2dbSChristoph Hellwig */ 1041da2f2dbSChristoph Hellwig STATIC int 1051da2f2dbSChristoph Hellwig xfs_dir_fsync( 1061da2f2dbSChristoph Hellwig struct file *file, 1071da2f2dbSChristoph Hellwig loff_t start, 1081da2f2dbSChristoph Hellwig loff_t end, 1091da2f2dbSChristoph Hellwig int datasync) 1101da2f2dbSChristoph Hellwig { 1111da2f2dbSChristoph Hellwig struct xfs_inode *ip = XFS_I(file->f_mapping->host); 1121da2f2dbSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1131da2f2dbSChristoph Hellwig xfs_lsn_t lsn = 0; 1141da2f2dbSChristoph Hellwig 1151da2f2dbSChristoph Hellwig trace_xfs_dir_fsync(ip); 1161da2f2dbSChristoph Hellwig 1171da2f2dbSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 1181da2f2dbSChristoph Hellwig if (xfs_ipincount(ip)) 1191da2f2dbSChristoph Hellwig lsn = ip->i_itemp->ili_last_lsn; 1201da2f2dbSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 1211da2f2dbSChristoph Hellwig 1221da2f2dbSChristoph Hellwig if (!lsn) 1231da2f2dbSChristoph Hellwig return 0; 1242451337dSDave Chinner return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); 1251da2f2dbSChristoph Hellwig } 1261da2f2dbSChristoph Hellwig 127c59d87c4SChristoph Hellwig STATIC int 128c59d87c4SChristoph Hellwig xfs_file_fsync( 129c59d87c4SChristoph Hellwig struct file *file, 130c59d87c4SChristoph Hellwig loff_t start, 131c59d87c4SChristoph Hellwig loff_t end, 132c59d87c4SChristoph Hellwig int datasync) 133c59d87c4SChristoph Hellwig { 134c59d87c4SChristoph Hellwig struct inode *inode = file->f_mapping->host; 135c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 136c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 137c59d87c4SChristoph Hellwig int error = 0; 138c59d87c4SChristoph Hellwig int log_flushed = 0; 139b1037058SChristoph Hellwig xfs_lsn_t lsn = 0; 140c59d87c4SChristoph Hellwig 141c59d87c4SChristoph Hellwig trace_xfs_file_fsync(ip); 142c59d87c4SChristoph Hellwig 143c59d87c4SChristoph Hellwig error = filemap_write_and_wait_range(inode->i_mapping, start, end); 144c59d87c4SChristoph Hellwig if (error) 145c59d87c4SChristoph Hellwig return error; 146c59d87c4SChristoph Hellwig 147c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 148b474c7aeSEric Sandeen return -EIO; 149c59d87c4SChristoph Hellwig 150c59d87c4SChristoph Hellwig xfs_iflags_clear(ip, XFS_ITRUNCATED); 151c59d87c4SChristoph Hellwig 152c59d87c4SChristoph Hellwig /* 1532291dab2SDave Chinner * If we have an RT and/or log subvolume we need to make sure to flush 1542291dab2SDave Chinner * the write cache the device used for file data first. This is to 1552291dab2SDave Chinner * ensure newly written file data make it to disk before logging the new 1562291dab2SDave Chinner * inode size in case of an extending write. 157c59d87c4SChristoph Hellwig */ 158c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 159c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_rtdev_targp); 160c59d87c4SChristoph Hellwig else if (mp->m_logdev_targp != mp->m_ddev_targp) 161c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_ddev_targp); 162c59d87c4SChristoph Hellwig 163c59d87c4SChristoph Hellwig /* 164fc0561ceSDave Chinner * All metadata updates are logged, which means that we just have to 165fc0561ceSDave Chinner * flush the log up to the latest LSN that touched the inode. If we have 166fc0561ceSDave Chinner * concurrent fsync/fdatasync() calls, we need them to all block on the 167fc0561ceSDave Chinner * log force before we clear the ili_fsync_fields field. This ensures 168fc0561ceSDave Chinner * that we don't get a racing sync operation that does not wait for the 169fc0561ceSDave Chinner * metadata to hit the journal before returning. If we race with 170fc0561ceSDave Chinner * clearing the ili_fsync_fields, then all that will happen is the log 171fc0561ceSDave Chinner * force will do nothing as the lsn will already be on disk. We can't 172fc0561ceSDave Chinner * race with setting ili_fsync_fields because that is done under 173fc0561ceSDave Chinner * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared 174fc0561ceSDave Chinner * until after the ili_fsync_fields is cleared. 175c59d87c4SChristoph Hellwig */ 176c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 1778f639ddeSChristoph Hellwig if (xfs_ipincount(ip)) { 1788f639ddeSChristoph Hellwig if (!datasync || 179fc0561ceSDave Chinner (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 180b1037058SChristoph Hellwig lsn = ip->i_itemp->ili_last_lsn; 1818f639ddeSChristoph Hellwig } 182c59d87c4SChristoph Hellwig 183fc0561ceSDave Chinner if (lsn) { 184b1037058SChristoph Hellwig error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 185fc0561ceSDave Chinner ip->i_itemp->ili_fsync_fields = 0; 186fc0561ceSDave Chinner } 187fc0561ceSDave Chinner xfs_iunlock(ip, XFS_ILOCK_SHARED); 188b1037058SChristoph Hellwig 189c59d87c4SChristoph Hellwig /* 190c59d87c4SChristoph Hellwig * If we only have a single device, and the log force about was 191c59d87c4SChristoph Hellwig * a no-op we might have to flush the data device cache here. 192c59d87c4SChristoph Hellwig * This can only happen for fdatasync/O_DSYNC if we were overwriting 193c59d87c4SChristoph Hellwig * an already allocated file and thus do not have any metadata to 194c59d87c4SChristoph Hellwig * commit. 195c59d87c4SChristoph Hellwig */ 1962291dab2SDave Chinner if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && 1972291dab2SDave Chinner mp->m_logdev_targp == mp->m_ddev_targp) 198c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_ddev_targp); 199c59d87c4SChristoph Hellwig 2002451337dSDave Chinner return error; 201c59d87c4SChristoph Hellwig } 202c59d87c4SChristoph Hellwig 203c59d87c4SChristoph Hellwig STATIC ssize_t 204bbc5a740SChristoph Hellwig xfs_file_dio_aio_read( 205c59d87c4SChristoph Hellwig struct kiocb *iocb, 206b4f5d2c6SAl Viro struct iov_iter *to) 207c59d87c4SChristoph Hellwig { 208acdda3aaSChristoph Hellwig struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 209bbc5a740SChristoph Hellwig size_t count = iov_iter_count(to); 210acdda3aaSChristoph Hellwig ssize_t ret; 211c59d87c4SChristoph Hellwig 212bbc5a740SChristoph Hellwig trace_xfs_file_direct_read(ip, count, iocb->ki_pos); 213c59d87c4SChristoph Hellwig 214f1285ff0SChristoph Hellwig if (!count) 215f1285ff0SChristoph Hellwig return 0; /* skip atime */ 216c59d87c4SChristoph Hellwig 217a447d7cdSChristoph Hellwig file_accessed(iocb->ki_filp); 218a447d7cdSChristoph Hellwig 21965523218SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 220acdda3aaSChristoph Hellwig ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL); 22165523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 222acdda3aaSChristoph Hellwig 22316d4d435SChristoph Hellwig return ret; 22416d4d435SChristoph Hellwig } 22516d4d435SChristoph Hellwig 226f021bd07SArnd Bergmann static noinline ssize_t 22716d4d435SChristoph Hellwig xfs_file_dax_read( 22816d4d435SChristoph Hellwig struct kiocb *iocb, 22916d4d435SChristoph Hellwig struct iov_iter *to) 23016d4d435SChristoph Hellwig { 2316c31f495SChristoph Hellwig struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); 23216d4d435SChristoph Hellwig size_t count = iov_iter_count(to); 23316d4d435SChristoph Hellwig ssize_t ret = 0; 23416d4d435SChristoph Hellwig 23516d4d435SChristoph Hellwig trace_xfs_file_dax_read(ip, count, iocb->ki_pos); 23616d4d435SChristoph Hellwig 23716d4d435SChristoph Hellwig if (!count) 23816d4d435SChristoph Hellwig return 0; /* skip atime */ 23916d4d435SChristoph Hellwig 24065523218SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 24111c59c92SRoss Zwisler ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); 24265523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 243bbc5a740SChristoph Hellwig 244f1285ff0SChristoph Hellwig file_accessed(iocb->ki_filp); 245bbc5a740SChristoph Hellwig return ret; 246bbc5a740SChristoph Hellwig } 247bbc5a740SChristoph Hellwig 248bbc5a740SChristoph Hellwig STATIC ssize_t 249bbc5a740SChristoph Hellwig xfs_file_buffered_aio_read( 250bbc5a740SChristoph Hellwig struct kiocb *iocb, 251bbc5a740SChristoph Hellwig struct iov_iter *to) 252bbc5a740SChristoph Hellwig { 253bbc5a740SChristoph Hellwig struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 254bbc5a740SChristoph Hellwig ssize_t ret; 255bbc5a740SChristoph Hellwig 256bbc5a740SChristoph Hellwig trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); 257bbc5a740SChristoph Hellwig 25865523218SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 259b4f5d2c6SAl Viro ret = generic_file_read_iter(iocb, to); 26065523218SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 261bbc5a740SChristoph Hellwig 262bbc5a740SChristoph Hellwig return ret; 263bbc5a740SChristoph Hellwig } 264bbc5a740SChristoph Hellwig 265bbc5a740SChristoph Hellwig STATIC ssize_t 266bbc5a740SChristoph Hellwig xfs_file_read_iter( 267bbc5a740SChristoph Hellwig struct kiocb *iocb, 268bbc5a740SChristoph Hellwig struct iov_iter *to) 269bbc5a740SChristoph Hellwig { 27016d4d435SChristoph Hellwig struct inode *inode = file_inode(iocb->ki_filp); 27116d4d435SChristoph Hellwig struct xfs_mount *mp = XFS_I(inode)->i_mount; 272bbc5a740SChristoph Hellwig ssize_t ret = 0; 273bbc5a740SChristoph Hellwig 274bbc5a740SChristoph Hellwig XFS_STATS_INC(mp, xs_read_calls); 275bbc5a740SChristoph Hellwig 276bbc5a740SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 277bbc5a740SChristoph Hellwig return -EIO; 278bbc5a740SChristoph Hellwig 27916d4d435SChristoph Hellwig if (IS_DAX(inode)) 28016d4d435SChristoph Hellwig ret = xfs_file_dax_read(iocb, to); 28116d4d435SChristoph Hellwig else if (iocb->ki_flags & IOCB_DIRECT) 282bbc5a740SChristoph Hellwig ret = xfs_file_dio_aio_read(iocb, to); 283bbc5a740SChristoph Hellwig else 284bbc5a740SChristoph Hellwig ret = xfs_file_buffered_aio_read(iocb, to); 285bbc5a740SChristoph Hellwig 286c59d87c4SChristoph Hellwig if (ret > 0) 287ff6d6af2SBill O'Donnell XFS_STATS_ADD(mp, xs_read_bytes, ret); 288c59d87c4SChristoph Hellwig return ret; 289c59d87c4SChristoph Hellwig } 290c59d87c4SChristoph Hellwig 291c59d87c4SChristoph Hellwig /* 292193aec10SChristoph Hellwig * Zero any on disk space between the current EOF and the new, larger EOF. 293193aec10SChristoph Hellwig * 294193aec10SChristoph Hellwig * This handles the normal case of zeroing the remainder of the last block in 295193aec10SChristoph Hellwig * the file and the unusual case of zeroing blocks out beyond the size of the 296193aec10SChristoph Hellwig * file. This second case only happens with fixed size extents and when the 297193aec10SChristoph Hellwig * system crashes before the inode size was updated but after blocks were 298193aec10SChristoph Hellwig * allocated. 299193aec10SChristoph Hellwig * 300193aec10SChristoph Hellwig * Expects the iolock to be held exclusive, and will take the ilock internally. 301c59d87c4SChristoph Hellwig */ 302c59d87c4SChristoph Hellwig int /* error (positive) */ 303c59d87c4SChristoph Hellwig xfs_zero_eof( 304193aec10SChristoph Hellwig struct xfs_inode *ip, 305c59d87c4SChristoph Hellwig xfs_off_t offset, /* starting I/O offset */ 3065885ebdaSDave Chinner xfs_fsize_t isize, /* current inode size */ 3075885ebdaSDave Chinner bool *did_zeroing) 308c59d87c4SChristoph Hellwig { 309193aec10SChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 310c59d87c4SChristoph Hellwig ASSERT(offset > isize); 311c59d87c4SChristoph Hellwig 3120a50f162SBrian Foster trace_xfs_zero_eof(ip, isize, offset - isize); 313570b6211SChristoph Hellwig return xfs_zero_range(ip, isize, offset - isize, did_zeroing); 314c59d87c4SChristoph Hellwig } 315c59d87c4SChristoph Hellwig 316c59d87c4SChristoph Hellwig /* 317c59d87c4SChristoph Hellwig * Common pre-write limit and setup checks. 318c59d87c4SChristoph Hellwig * 3195bf1f262SChristoph Hellwig * Called with the iolocked held either shared and exclusive according to 3205bf1f262SChristoph Hellwig * @iolock, and returns with it held. Might upgrade the iolock to exclusive 3215bf1f262SChristoph Hellwig * if called for a direct write beyond i_size. 322c59d87c4SChristoph Hellwig */ 323c59d87c4SChristoph Hellwig STATIC ssize_t 324c59d87c4SChristoph Hellwig xfs_file_aio_write_checks( 32599733fa3SAl Viro struct kiocb *iocb, 32699733fa3SAl Viro struct iov_iter *from, 327c59d87c4SChristoph Hellwig int *iolock) 328c59d87c4SChristoph Hellwig { 32999733fa3SAl Viro struct file *file = iocb->ki_filp; 330c59d87c4SChristoph Hellwig struct inode *inode = file->f_mapping->host; 331c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 3323309dd04SAl Viro ssize_t error = 0; 33399733fa3SAl Viro size_t count = iov_iter_count(from); 3343136e8bbSBrian Foster bool drained_dio = false; 335c59d87c4SChristoph Hellwig 3367271d243SDave Chinner restart: 3373309dd04SAl Viro error = generic_write_checks(iocb, from); 3383309dd04SAl Viro if (error <= 0) 339c59d87c4SChristoph Hellwig return error; 340c59d87c4SChristoph Hellwig 34165523218SChristoph Hellwig error = xfs_break_layouts(inode, iolock); 342781355c6SChristoph Hellwig if (error) 343781355c6SChristoph Hellwig return error; 344781355c6SChristoph Hellwig 34565523218SChristoph Hellwig /* 34665523218SChristoph Hellwig * For changing security info in file_remove_privs() we need i_rwsem 34765523218SChristoph Hellwig * exclusively. 34865523218SChristoph Hellwig */ 349a6de82caSJan Kara if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { 35065523218SChristoph Hellwig xfs_iunlock(ip, *iolock); 351a6de82caSJan Kara *iolock = XFS_IOLOCK_EXCL; 35265523218SChristoph Hellwig xfs_ilock(ip, *iolock); 353a6de82caSJan Kara goto restart; 354a6de82caSJan Kara } 355c59d87c4SChristoph Hellwig /* 356c59d87c4SChristoph Hellwig * If the offset is beyond the size of the file, we need to zero any 357c59d87c4SChristoph Hellwig * blocks that fall between the existing EOF and the start of this 3582813d682SChristoph Hellwig * write. If zeroing is needed and we are currently holding the 359467f7899SChristoph Hellwig * iolock shared, we need to update it to exclusive which implies 360467f7899SChristoph Hellwig * having to redo all checks before. 361b9d59846SDave Chinner * 362b9d59846SDave Chinner * We need to serialise against EOF updates that occur in IO 363b9d59846SDave Chinner * completions here. We want to make sure that nobody is changing the 364b9d59846SDave Chinner * size while we do this check until we have placed an IO barrier (i.e. 365b9d59846SDave Chinner * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. 366b9d59846SDave Chinner * The spinlock effectively forms a memory barrier once we have the 367b9d59846SDave Chinner * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value 368b9d59846SDave Chinner * and hence be able to correctly determine if we need to run zeroing. 369c59d87c4SChristoph Hellwig */ 370b9d59846SDave Chinner spin_lock(&ip->i_flags_lock); 37199733fa3SAl Viro if (iocb->ki_pos > i_size_read(inode)) { 3725885ebdaSDave Chinner bool zero = false; 3735885ebdaSDave Chinner 374b9d59846SDave Chinner spin_unlock(&ip->i_flags_lock); 3753136e8bbSBrian Foster if (!drained_dio) { 3767271d243SDave Chinner if (*iolock == XFS_IOLOCK_SHARED) { 37765523218SChristoph Hellwig xfs_iunlock(ip, *iolock); 3787271d243SDave Chinner *iolock = XFS_IOLOCK_EXCL; 37965523218SChristoph Hellwig xfs_ilock(ip, *iolock); 3803309dd04SAl Viro iov_iter_reexpand(from, count); 3813136e8bbSBrian Foster } 38240c63fbcSDave Chinner /* 38340c63fbcSDave Chinner * We now have an IO submission barrier in place, but 38440c63fbcSDave Chinner * AIO can do EOF updates during IO completion and hence 38540c63fbcSDave Chinner * we now need to wait for all of them to drain. Non-AIO 38640c63fbcSDave Chinner * DIO will have drained before we are given the 38740c63fbcSDave Chinner * XFS_IOLOCK_EXCL, and so for most cases this wait is a 38840c63fbcSDave Chinner * no-op. 38940c63fbcSDave Chinner */ 39040c63fbcSDave Chinner inode_dio_wait(inode); 3913136e8bbSBrian Foster drained_dio = true; 3927271d243SDave Chinner goto restart; 3937271d243SDave Chinner } 39499733fa3SAl Viro error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); 395c59d87c4SChristoph Hellwig if (error) 396c59d87c4SChristoph Hellwig return error; 397b9d59846SDave Chinner } else 398b9d59846SDave Chinner spin_unlock(&ip->i_flags_lock); 399c59d87c4SChristoph Hellwig 400c59d87c4SChristoph Hellwig /* 4018a9c9980SChristoph Hellwig * Updating the timestamps will grab the ilock again from 4028a9c9980SChristoph Hellwig * xfs_fs_dirty_inode, so we have to call it after dropping the 4038a9c9980SChristoph Hellwig * lock above. Eventually we should look into a way to avoid 4048a9c9980SChristoph Hellwig * the pointless lock roundtrip. 4058a9c9980SChristoph Hellwig */ 406c3b2da31SJosef Bacik if (likely(!(file->f_mode & FMODE_NOCMTIME))) { 407c3b2da31SJosef Bacik error = file_update_time(file); 408c3b2da31SJosef Bacik if (error) 409c3b2da31SJosef Bacik return error; 410c3b2da31SJosef Bacik } 4118a9c9980SChristoph Hellwig 4128a9c9980SChristoph Hellwig /* 413c59d87c4SChristoph Hellwig * If we're writing the file then make sure to clear the setuid and 414c59d87c4SChristoph Hellwig * setgid bits if the process is not being run by root. This keeps 415c59d87c4SChristoph Hellwig * people from modifying setuid and setgid binaries. 416c59d87c4SChristoph Hellwig */ 417a6de82caSJan Kara if (!IS_NOSEC(inode)) 4185fa8e0a1SJan Kara return file_remove_privs(file); 419a6de82caSJan Kara return 0; 420c59d87c4SChristoph Hellwig } 421c59d87c4SChristoph Hellwig 422acdda3aaSChristoph Hellwig static int 423acdda3aaSChristoph Hellwig xfs_dio_write_end_io( 424acdda3aaSChristoph Hellwig struct kiocb *iocb, 425acdda3aaSChristoph Hellwig ssize_t size, 426acdda3aaSChristoph Hellwig unsigned flags) 427acdda3aaSChristoph Hellwig { 428acdda3aaSChristoph Hellwig struct inode *inode = file_inode(iocb->ki_filp); 429acdda3aaSChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 430acdda3aaSChristoph Hellwig loff_t offset = iocb->ki_pos; 431acdda3aaSChristoph Hellwig bool update_size = false; 432acdda3aaSChristoph Hellwig int error = 0; 433acdda3aaSChristoph Hellwig 434acdda3aaSChristoph Hellwig trace_xfs_end_io_direct_write(ip, offset, size); 435acdda3aaSChristoph Hellwig 436acdda3aaSChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 437acdda3aaSChristoph Hellwig return -EIO; 438acdda3aaSChristoph Hellwig 439acdda3aaSChristoph Hellwig if (size <= 0) 440acdda3aaSChristoph Hellwig return size; 441acdda3aaSChristoph Hellwig 442acdda3aaSChristoph Hellwig /* 443acdda3aaSChristoph Hellwig * We need to update the in-core inode size here so that we don't end up 444acdda3aaSChristoph Hellwig * with the on-disk inode size being outside the in-core inode size. We 445acdda3aaSChristoph Hellwig * have no other method of updating EOF for AIO, so always do it here 446acdda3aaSChristoph Hellwig * if necessary. 447acdda3aaSChristoph Hellwig * 448acdda3aaSChristoph Hellwig * We need to lock the test/set EOF update as we can be racing with 449acdda3aaSChristoph Hellwig * other IO completions here to update the EOF. Failing to serialise 450acdda3aaSChristoph Hellwig * here can result in EOF moving backwards and Bad Things Happen when 451acdda3aaSChristoph Hellwig * that occurs. 452acdda3aaSChristoph Hellwig */ 453acdda3aaSChristoph Hellwig spin_lock(&ip->i_flags_lock); 454acdda3aaSChristoph Hellwig if (offset + size > i_size_read(inode)) { 455acdda3aaSChristoph Hellwig i_size_write(inode, offset + size); 456acdda3aaSChristoph Hellwig update_size = true; 457acdda3aaSChristoph Hellwig } 458acdda3aaSChristoph Hellwig spin_unlock(&ip->i_flags_lock); 459acdda3aaSChristoph Hellwig 460acdda3aaSChristoph Hellwig if (flags & IOMAP_DIO_COW) { 461acdda3aaSChristoph Hellwig error = xfs_reflink_end_cow(ip, offset, size); 462acdda3aaSChristoph Hellwig if (error) 463acdda3aaSChristoph Hellwig return error; 464acdda3aaSChristoph Hellwig } 465acdda3aaSChristoph Hellwig 466acdda3aaSChristoph Hellwig if (flags & IOMAP_DIO_UNWRITTEN) 467acdda3aaSChristoph Hellwig error = xfs_iomap_write_unwritten(ip, offset, size); 468acdda3aaSChristoph Hellwig else if (update_size) 469acdda3aaSChristoph Hellwig error = xfs_setfilesize(ip, offset, size); 470acdda3aaSChristoph Hellwig 471acdda3aaSChristoph Hellwig return error; 472acdda3aaSChristoph Hellwig } 473acdda3aaSChristoph Hellwig 474c59d87c4SChristoph Hellwig /* 475c59d87c4SChristoph Hellwig * xfs_file_dio_aio_write - handle direct IO writes 476c59d87c4SChristoph Hellwig * 477c59d87c4SChristoph Hellwig * Lock the inode appropriately to prepare for and issue a direct IO write. 478c59d87c4SChristoph Hellwig * By separating it from the buffered write path we remove all the tricky to 479c59d87c4SChristoph Hellwig * follow locking changes and looping. 480c59d87c4SChristoph Hellwig * 481c59d87c4SChristoph Hellwig * If there are cached pages or we're extending the file, we need IOLOCK_EXCL 482c59d87c4SChristoph Hellwig * until we're sure the bytes at the new EOF have been zeroed and/or the cached 483c59d87c4SChristoph Hellwig * pages are flushed out. 484c59d87c4SChristoph Hellwig * 485c59d87c4SChristoph Hellwig * In most cases the direct IO writes will be done holding IOLOCK_SHARED 486c59d87c4SChristoph Hellwig * allowing them to be done in parallel with reads and other direct IO writes. 487c59d87c4SChristoph Hellwig * However, if the IO is not aligned to filesystem blocks, the direct IO layer 488c59d87c4SChristoph Hellwig * needs to do sub-block zeroing and that requires serialisation against other 489c59d87c4SChristoph Hellwig * direct IOs to the same block. In this case we need to serialise the 490c59d87c4SChristoph Hellwig * submission of the unaligned IOs so that we don't get racing block zeroing in 491c59d87c4SChristoph Hellwig * the dio layer. To avoid the problem with aio, we also need to wait for 492c59d87c4SChristoph Hellwig * outstanding IOs to complete so that unwritten extent conversion is completed 493c59d87c4SChristoph Hellwig * before we try to map the overlapping block. This is currently implemented by 4944a06fd26SChristoph Hellwig * hitting it with a big hammer (i.e. inode_dio_wait()). 495c59d87c4SChristoph Hellwig * 496c59d87c4SChristoph Hellwig * Returns with locks held indicated by @iolock and errors indicated by 497c59d87c4SChristoph Hellwig * negative return values. 498c59d87c4SChristoph Hellwig */ 499c59d87c4SChristoph Hellwig STATIC ssize_t 500c59d87c4SChristoph Hellwig xfs_file_dio_aio_write( 501c59d87c4SChristoph Hellwig struct kiocb *iocb, 502b3188919SAl Viro struct iov_iter *from) 503c59d87c4SChristoph Hellwig { 504c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 505c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 506c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 507c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 508c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 509c59d87c4SChristoph Hellwig ssize_t ret = 0; 510c59d87c4SChristoph Hellwig int unaligned_io = 0; 511d0606464SChristoph Hellwig int iolock; 512b3188919SAl Viro size_t count = iov_iter_count(from); 513c59d87c4SChristoph Hellwig struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 514c59d87c4SChristoph Hellwig mp->m_rtdev_targp : mp->m_ddev_targp; 515c59d87c4SChristoph Hellwig 5167c71ee78SEric Sandeen /* DIO must be aligned to device logical sector size */ 51716d4d435SChristoph Hellwig if ((iocb->ki_pos | count) & target->bt_logical_sectormask) 518b474c7aeSEric Sandeen return -EINVAL; 519c59d87c4SChristoph Hellwig 5200ee7a3f6SChristoph Hellwig /* 5210ee7a3f6SChristoph Hellwig * Don't take the exclusive iolock here unless the I/O is unaligned to 5220ee7a3f6SChristoph Hellwig * the file system block size. We don't need to consider the EOF 5230ee7a3f6SChristoph Hellwig * extension case here because xfs_file_aio_write_checks() will relock 5240ee7a3f6SChristoph Hellwig * the inode as necessary for EOF zeroing cases and fill out the new 5250ee7a3f6SChristoph Hellwig * inode size as appropriate. 5260ee7a3f6SChristoph Hellwig */ 52713712713SChristoph Hellwig if ((iocb->ki_pos & mp->m_blockmask) || 5280ee7a3f6SChristoph Hellwig ((iocb->ki_pos + count) & mp->m_blockmask)) { 529c59d87c4SChristoph Hellwig unaligned_io = 1; 53054a4ef8aSChristoph Hellwig 53154a4ef8aSChristoph Hellwig /* 53254a4ef8aSChristoph Hellwig * We can't properly handle unaligned direct I/O to reflink 53354a4ef8aSChristoph Hellwig * files yet, as we can't unshare a partial block. 53454a4ef8aSChristoph Hellwig */ 53554a4ef8aSChristoph Hellwig if (xfs_is_reflink_inode(ip)) { 53654a4ef8aSChristoph Hellwig trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); 53754a4ef8aSChristoph Hellwig return -EREMCHG; 53854a4ef8aSChristoph Hellwig } 539d0606464SChristoph Hellwig iolock = XFS_IOLOCK_EXCL; 5400ee7a3f6SChristoph Hellwig } else { 541d0606464SChristoph Hellwig iolock = XFS_IOLOCK_SHARED; 542c58cb165SChristoph Hellwig } 543c59d87c4SChristoph Hellwig 54465523218SChristoph Hellwig xfs_ilock(ip, iolock); 5450ee7a3f6SChristoph Hellwig 54699733fa3SAl Viro ret = xfs_file_aio_write_checks(iocb, from, &iolock); 547c59d87c4SChristoph Hellwig if (ret) 548d0606464SChristoph Hellwig goto out; 54999733fa3SAl Viro count = iov_iter_count(from); 550c59d87c4SChristoph Hellwig 551c59d87c4SChristoph Hellwig /* 552c59d87c4SChristoph Hellwig * If we are doing unaligned IO, wait for all other IO to drain, 5530ee7a3f6SChristoph Hellwig * otherwise demote the lock if we had to take the exclusive lock 5540ee7a3f6SChristoph Hellwig * for other reasons in xfs_file_aio_write_checks. 555c59d87c4SChristoph Hellwig */ 556c59d87c4SChristoph Hellwig if (unaligned_io) 5574a06fd26SChristoph Hellwig inode_dio_wait(inode); 558d0606464SChristoph Hellwig else if (iolock == XFS_IOLOCK_EXCL) { 55965523218SChristoph Hellwig xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 560d0606464SChristoph Hellwig iolock = XFS_IOLOCK_SHARED; 561c59d87c4SChristoph Hellwig } 562c59d87c4SChristoph Hellwig 5633176c3e0SChristoph Hellwig trace_xfs_file_direct_write(ip, count, iocb->ki_pos); 564acdda3aaSChristoph Hellwig ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); 565d0606464SChristoph Hellwig out: 56665523218SChristoph Hellwig xfs_iunlock(ip, iolock); 567d0606464SChristoph Hellwig 5686b698edeSDave Chinner /* 56916d4d435SChristoph Hellwig * No fallback to buffered IO on errors for XFS, direct IO will either 57016d4d435SChristoph Hellwig * complete fully or fail. 5716b698edeSDave Chinner */ 57216d4d435SChristoph Hellwig ASSERT(ret < 0 || ret == count); 57316d4d435SChristoph Hellwig return ret; 57416d4d435SChristoph Hellwig } 57516d4d435SChristoph Hellwig 576f021bd07SArnd Bergmann static noinline ssize_t 57716d4d435SChristoph Hellwig xfs_file_dax_write( 57816d4d435SChristoph Hellwig struct kiocb *iocb, 57916d4d435SChristoph Hellwig struct iov_iter *from) 58016d4d435SChristoph Hellwig { 5816c31f495SChristoph Hellwig struct inode *inode = iocb->ki_filp->f_mapping->host; 58216d4d435SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 58317879e8fSChristoph Hellwig int iolock = XFS_IOLOCK_EXCL; 5846c31f495SChristoph Hellwig ssize_t ret, error = 0; 5856c31f495SChristoph Hellwig size_t count; 5866c31f495SChristoph Hellwig loff_t pos; 58716d4d435SChristoph Hellwig 58865523218SChristoph Hellwig xfs_ilock(ip, iolock); 58916d4d435SChristoph Hellwig ret = xfs_file_aio_write_checks(iocb, from, &iolock); 59016d4d435SChristoph Hellwig if (ret) 59116d4d435SChristoph Hellwig goto out; 59216d4d435SChristoph Hellwig 5936c31f495SChristoph Hellwig pos = iocb->ki_pos; 5946c31f495SChristoph Hellwig count = iov_iter_count(from); 5958b2180b3SDave Chinner 5966c31f495SChristoph Hellwig trace_xfs_file_dax_write(ip, count, pos); 59711c59c92SRoss Zwisler ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); 5986c31f495SChristoph Hellwig if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { 5996c31f495SChristoph Hellwig i_size_write(inode, iocb->ki_pos); 6006c31f495SChristoph Hellwig error = xfs_setfilesize(ip, pos, ret); 60116d4d435SChristoph Hellwig } 60216d4d435SChristoph Hellwig out: 60365523218SChristoph Hellwig xfs_iunlock(ip, iolock); 6046c31f495SChristoph Hellwig return error ? error : ret; 605c59d87c4SChristoph Hellwig } 606c59d87c4SChristoph Hellwig 607c59d87c4SChristoph Hellwig STATIC ssize_t 608c59d87c4SChristoph Hellwig xfs_file_buffered_aio_write( 609c59d87c4SChristoph Hellwig struct kiocb *iocb, 610b3188919SAl Viro struct iov_iter *from) 611c59d87c4SChristoph Hellwig { 612c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 613c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 614c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 615c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 616c59d87c4SChristoph Hellwig ssize_t ret; 617c59d87c4SChristoph Hellwig int enospc = 0; 618c3155097SBrian Foster int iolock; 619c59d87c4SChristoph Hellwig 620c3155097SBrian Foster write_retry: 621c3155097SBrian Foster iolock = XFS_IOLOCK_EXCL; 62265523218SChristoph Hellwig xfs_ilock(ip, iolock); 623c59d87c4SChristoph Hellwig 62499733fa3SAl Viro ret = xfs_file_aio_write_checks(iocb, from, &iolock); 625c59d87c4SChristoph Hellwig if (ret) 626d0606464SChristoph Hellwig goto out; 627c59d87c4SChristoph Hellwig 628c59d87c4SChristoph Hellwig /* We can write back this queue in page reclaim */ 629de1414a6SChristoph Hellwig current->backing_dev_info = inode_to_bdi(inode); 630c59d87c4SChristoph Hellwig 6313176c3e0SChristoph Hellwig trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); 63268a9f5e7SChristoph Hellwig ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); 6330a64bc2cSAl Viro if (likely(ret >= 0)) 63499733fa3SAl Viro iocb->ki_pos += ret; 635dc06f398SBrian Foster 636c59d87c4SChristoph Hellwig /* 637dc06f398SBrian Foster * If we hit a space limit, try to free up some lingering preallocated 638dc06f398SBrian Foster * space before returning an error. In the case of ENOSPC, first try to 639dc06f398SBrian Foster * write back all dirty inodes to free up some of the excess reserved 640dc06f398SBrian Foster * metadata space. This reduces the chances that the eofblocks scan 641dc06f398SBrian Foster * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this 642dc06f398SBrian Foster * also behaves as a filter to prevent too many eofblocks scans from 643dc06f398SBrian Foster * running at the same time. 644c59d87c4SChristoph Hellwig */ 645dc06f398SBrian Foster if (ret == -EDQUOT && !enospc) { 646c3155097SBrian Foster xfs_iunlock(ip, iolock); 647dc06f398SBrian Foster enospc = xfs_inode_free_quota_eofblocks(ip); 648dc06f398SBrian Foster if (enospc) 649dc06f398SBrian Foster goto write_retry; 65083104d44SDarrick J. Wong enospc = xfs_inode_free_quota_cowblocks(ip); 65183104d44SDarrick J. Wong if (enospc) 65283104d44SDarrick J. Wong goto write_retry; 653c3155097SBrian Foster iolock = 0; 654dc06f398SBrian Foster } else if (ret == -ENOSPC && !enospc) { 655dc06f398SBrian Foster struct xfs_eofblocks eofb = {0}; 656dc06f398SBrian Foster 657c59d87c4SChristoph Hellwig enospc = 1; 6589aa05000SDave Chinner xfs_flush_inodes(ip->i_mount); 659c3155097SBrian Foster 660c3155097SBrian Foster xfs_iunlock(ip, iolock); 661dc06f398SBrian Foster eofb.eof_flags = XFS_EOF_FLAGS_SYNC; 662dc06f398SBrian Foster xfs_icache_free_eofblocks(ip->i_mount, &eofb); 663c59d87c4SChristoph Hellwig goto write_retry; 664c59d87c4SChristoph Hellwig } 665d0606464SChristoph Hellwig 666c59d87c4SChristoph Hellwig current->backing_dev_info = NULL; 667d0606464SChristoph Hellwig out: 668c3155097SBrian Foster if (iolock) 66965523218SChristoph Hellwig xfs_iunlock(ip, iolock); 670c59d87c4SChristoph Hellwig return ret; 671c59d87c4SChristoph Hellwig } 672c59d87c4SChristoph Hellwig 673c59d87c4SChristoph Hellwig STATIC ssize_t 674bf97f3bcSAl Viro xfs_file_write_iter( 675c59d87c4SChristoph Hellwig struct kiocb *iocb, 676bf97f3bcSAl Viro struct iov_iter *from) 677c59d87c4SChristoph Hellwig { 678c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 679c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 680c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 681c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 682c59d87c4SChristoph Hellwig ssize_t ret; 683bf97f3bcSAl Viro size_t ocount = iov_iter_count(from); 684c59d87c4SChristoph Hellwig 685ff6d6af2SBill O'Donnell XFS_STATS_INC(ip->i_mount, xs_write_calls); 686c59d87c4SChristoph Hellwig 687c59d87c4SChristoph Hellwig if (ocount == 0) 688c59d87c4SChristoph Hellwig return 0; 689c59d87c4SChristoph Hellwig 690bf97f3bcSAl Viro if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 691bf97f3bcSAl Viro return -EIO; 692c59d87c4SChristoph Hellwig 69316d4d435SChristoph Hellwig if (IS_DAX(inode)) 69416d4d435SChristoph Hellwig ret = xfs_file_dax_write(iocb, from); 6950613f16cSDarrick J. Wong else if (iocb->ki_flags & IOCB_DIRECT) { 6960613f16cSDarrick J. Wong /* 6970613f16cSDarrick J. Wong * Allow a directio write to fall back to a buffered 6980613f16cSDarrick J. Wong * write *only* in the case that we're doing a reflink 6990613f16cSDarrick J. Wong * CoW. In all other directio scenarios we do not 7000613f16cSDarrick J. Wong * allow an operation to fall back to buffered mode. 7010613f16cSDarrick J. Wong */ 702bf97f3bcSAl Viro ret = xfs_file_dio_aio_write(iocb, from); 7030613f16cSDarrick J. Wong if (ret == -EREMCHG) 7040613f16cSDarrick J. Wong goto buffered; 7050613f16cSDarrick J. Wong } else { 7060613f16cSDarrick J. Wong buffered: 707bf97f3bcSAl Viro ret = xfs_file_buffered_aio_write(iocb, from); 7080613f16cSDarrick J. Wong } 709c59d87c4SChristoph Hellwig 710d0606464SChristoph Hellwig if (ret > 0) { 711ff6d6af2SBill O'Donnell XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 712ce7ae151SChristoph Hellwig 713c59d87c4SChristoph Hellwig /* Handle various SYNC-type writes */ 714e2592217SChristoph Hellwig ret = generic_write_sync(iocb, ret); 715c59d87c4SChristoph Hellwig } 716c59d87c4SChristoph Hellwig return ret; 717c59d87c4SChristoph Hellwig } 718c59d87c4SChristoph Hellwig 719a904b1caSNamjae Jeon #define XFS_FALLOC_FL_SUPPORTED \ 720a904b1caSNamjae Jeon (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 721a904b1caSNamjae Jeon FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ 72298cc2db5SDarrick J. Wong FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) 723a904b1caSNamjae Jeon 724c59d87c4SChristoph Hellwig STATIC long 725c59d87c4SChristoph Hellwig xfs_file_fallocate( 726c59d87c4SChristoph Hellwig struct file *file, 727c59d87c4SChristoph Hellwig int mode, 728c59d87c4SChristoph Hellwig loff_t offset, 729c59d87c4SChristoph Hellwig loff_t len) 730c59d87c4SChristoph Hellwig { 731496ad9aaSAl Viro struct inode *inode = file_inode(file); 73283aee9e4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 733c59d87c4SChristoph Hellwig long error; 7348add71caSChristoph Hellwig enum xfs_prealloc_flags flags = 0; 735781355c6SChristoph Hellwig uint iolock = XFS_IOLOCK_EXCL; 736c59d87c4SChristoph Hellwig loff_t new_size = 0; 737a904b1caSNamjae Jeon bool do_file_insert = 0; 738c59d87c4SChristoph Hellwig 73983aee9e4SChristoph Hellwig if (!S_ISREG(inode->i_mode)) 74083aee9e4SChristoph Hellwig return -EINVAL; 741a904b1caSNamjae Jeon if (mode & ~XFS_FALLOC_FL_SUPPORTED) 742c59d87c4SChristoph Hellwig return -EOPNOTSUPP; 743c59d87c4SChristoph Hellwig 744781355c6SChristoph Hellwig xfs_ilock(ip, iolock); 74565523218SChristoph Hellwig error = xfs_break_layouts(inode, &iolock); 746781355c6SChristoph Hellwig if (error) 747781355c6SChristoph Hellwig goto out_unlock; 748781355c6SChristoph Hellwig 749e8e9ad42SDave Chinner xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 750e8e9ad42SDave Chinner iolock |= XFS_MMAPLOCK_EXCL; 751e8e9ad42SDave Chinner 75283aee9e4SChristoph Hellwig if (mode & FALLOC_FL_PUNCH_HOLE) { 75383aee9e4SChristoph Hellwig error = xfs_free_file_space(ip, offset, len); 75483aee9e4SChristoph Hellwig if (error) 75583aee9e4SChristoph Hellwig goto out_unlock; 756e1d8fb88SNamjae Jeon } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 757e1d8fb88SNamjae Jeon unsigned blksize_mask = (1 << inode->i_blkbits) - 1; 758e1d8fb88SNamjae Jeon 759e1d8fb88SNamjae Jeon if (offset & blksize_mask || len & blksize_mask) { 7602451337dSDave Chinner error = -EINVAL; 761e1d8fb88SNamjae Jeon goto out_unlock; 762e1d8fb88SNamjae Jeon } 763e1d8fb88SNamjae Jeon 76423fffa92SLukas Czerner /* 76523fffa92SLukas Czerner * There is no need to overlap collapse range with EOF, 76623fffa92SLukas Czerner * in which case it is effectively a truncate operation 76723fffa92SLukas Czerner */ 76823fffa92SLukas Czerner if (offset + len >= i_size_read(inode)) { 7692451337dSDave Chinner error = -EINVAL; 77023fffa92SLukas Czerner goto out_unlock; 77123fffa92SLukas Czerner } 77223fffa92SLukas Czerner 773e1d8fb88SNamjae Jeon new_size = i_size_read(inode) - len; 774e1d8fb88SNamjae Jeon 775e1d8fb88SNamjae Jeon error = xfs_collapse_file_space(ip, offset, len); 776e1d8fb88SNamjae Jeon if (error) 777e1d8fb88SNamjae Jeon goto out_unlock; 778a904b1caSNamjae Jeon } else if (mode & FALLOC_FL_INSERT_RANGE) { 779a904b1caSNamjae Jeon unsigned blksize_mask = (1 << inode->i_blkbits) - 1; 780a904b1caSNamjae Jeon 781a904b1caSNamjae Jeon new_size = i_size_read(inode) + len; 782a904b1caSNamjae Jeon if (offset & blksize_mask || len & blksize_mask) { 783a904b1caSNamjae Jeon error = -EINVAL; 784a904b1caSNamjae Jeon goto out_unlock; 785a904b1caSNamjae Jeon } 786a904b1caSNamjae Jeon 787a904b1caSNamjae Jeon /* check the new inode size does not wrap through zero */ 788a904b1caSNamjae Jeon if (new_size > inode->i_sb->s_maxbytes) { 789a904b1caSNamjae Jeon error = -EFBIG; 790a904b1caSNamjae Jeon goto out_unlock; 791a904b1caSNamjae Jeon } 792a904b1caSNamjae Jeon 793a904b1caSNamjae Jeon /* Offset should be less than i_size */ 794a904b1caSNamjae Jeon if (offset >= i_size_read(inode)) { 795a904b1caSNamjae Jeon error = -EINVAL; 796a904b1caSNamjae Jeon goto out_unlock; 797a904b1caSNamjae Jeon } 798a904b1caSNamjae Jeon do_file_insert = 1; 79983aee9e4SChristoph Hellwig } else { 8008add71caSChristoph Hellwig flags |= XFS_PREALLOC_SET; 8018add71caSChristoph Hellwig 802c59d87c4SChristoph Hellwig if (!(mode & FALLOC_FL_KEEP_SIZE) && 803c59d87c4SChristoph Hellwig offset + len > i_size_read(inode)) { 804c59d87c4SChristoph Hellwig new_size = offset + len; 8052451337dSDave Chinner error = inode_newsize_ok(inode, new_size); 806c59d87c4SChristoph Hellwig if (error) 807c59d87c4SChristoph Hellwig goto out_unlock; 808c59d87c4SChristoph Hellwig } 809c59d87c4SChristoph Hellwig 810376ba313SLukas Czerner if (mode & FALLOC_FL_ZERO_RANGE) 811376ba313SLukas Czerner error = xfs_zero_file_space(ip, offset, len); 81298cc2db5SDarrick J. Wong else { 81398cc2db5SDarrick J. Wong if (mode & FALLOC_FL_UNSHARE_RANGE) { 81498cc2db5SDarrick J. Wong error = xfs_reflink_unshare(ip, offset, len); 81598cc2db5SDarrick J. Wong if (error) 81698cc2db5SDarrick J. Wong goto out_unlock; 81798cc2db5SDarrick J. Wong } 81883aee9e4SChristoph Hellwig error = xfs_alloc_file_space(ip, offset, len, 81983aee9e4SChristoph Hellwig XFS_BMAPI_PREALLOC); 82098cc2db5SDarrick J. Wong } 82183aee9e4SChristoph Hellwig if (error) 82283aee9e4SChristoph Hellwig goto out_unlock; 82383aee9e4SChristoph Hellwig } 824c59d87c4SChristoph Hellwig 82583aee9e4SChristoph Hellwig if (file->f_flags & O_DSYNC) 8268add71caSChristoph Hellwig flags |= XFS_PREALLOC_SYNC; 8278add71caSChristoph Hellwig 8288add71caSChristoph Hellwig error = xfs_update_prealloc_flags(ip, flags); 829c59d87c4SChristoph Hellwig if (error) 830c59d87c4SChristoph Hellwig goto out_unlock; 831c59d87c4SChristoph Hellwig 832c59d87c4SChristoph Hellwig /* Change file size if needed */ 833c59d87c4SChristoph Hellwig if (new_size) { 834c59d87c4SChristoph Hellwig struct iattr iattr; 835c59d87c4SChristoph Hellwig 836c59d87c4SChristoph Hellwig iattr.ia_valid = ATTR_SIZE; 837c59d87c4SChristoph Hellwig iattr.ia_size = new_size; 83869bca807SJan Kara error = xfs_vn_setattr_size(file_dentry(file), &iattr); 839a904b1caSNamjae Jeon if (error) 840a904b1caSNamjae Jeon goto out_unlock; 841c59d87c4SChristoph Hellwig } 842c59d87c4SChristoph Hellwig 843a904b1caSNamjae Jeon /* 844a904b1caSNamjae Jeon * Perform hole insertion now that the file size has been 845a904b1caSNamjae Jeon * updated so that if we crash during the operation we don't 846a904b1caSNamjae Jeon * leave shifted extents past EOF and hence losing access to 847a904b1caSNamjae Jeon * the data that is contained within them. 848a904b1caSNamjae Jeon */ 849a904b1caSNamjae Jeon if (do_file_insert) 850a904b1caSNamjae Jeon error = xfs_insert_file_space(ip, offset, len); 851a904b1caSNamjae Jeon 852c59d87c4SChristoph Hellwig out_unlock: 853781355c6SChristoph Hellwig xfs_iunlock(ip, iolock); 8542451337dSDave Chinner return error; 855c59d87c4SChristoph Hellwig } 856c59d87c4SChristoph Hellwig 8579fe26045SDarrick J. Wong STATIC int 8589fe26045SDarrick J. Wong xfs_file_clone_range( 8599fe26045SDarrick J. Wong struct file *file_in, 8609fe26045SDarrick J. Wong loff_t pos_in, 8619fe26045SDarrick J. Wong struct file *file_out, 8629fe26045SDarrick J. Wong loff_t pos_out, 8639fe26045SDarrick J. Wong u64 len) 8649fe26045SDarrick J. Wong { 8655faaf4faSChristoph Hellwig return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, 866cc714660SDarrick J. Wong len, false); 867cc714660SDarrick J. Wong } 868cc714660SDarrick J. Wong 869cc714660SDarrick J. Wong STATIC ssize_t 870cc714660SDarrick J. Wong xfs_file_dedupe_range( 871cc714660SDarrick J. Wong struct file *src_file, 872cc714660SDarrick J. Wong u64 loff, 873cc714660SDarrick J. Wong u64 len, 874cc714660SDarrick J. Wong struct file *dst_file, 875cc714660SDarrick J. Wong u64 dst_loff) 876cc714660SDarrick J. Wong { 877cc714660SDarrick J. Wong int error; 878cc714660SDarrick J. Wong 8795faaf4faSChristoph Hellwig error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, 880cc714660SDarrick J. Wong len, true); 881cc714660SDarrick J. Wong if (error) 882cc714660SDarrick J. Wong return error; 883cc714660SDarrick J. Wong return len; 8849fe26045SDarrick J. Wong } 885c59d87c4SChristoph Hellwig 886c59d87c4SChristoph Hellwig STATIC int 887c59d87c4SChristoph Hellwig xfs_file_open( 888c59d87c4SChristoph Hellwig struct inode *inode, 889c59d87c4SChristoph Hellwig struct file *file) 890c59d87c4SChristoph Hellwig { 891c59d87c4SChristoph Hellwig if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 892c59d87c4SChristoph Hellwig return -EFBIG; 893c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) 894c59d87c4SChristoph Hellwig return -EIO; 895c59d87c4SChristoph Hellwig return 0; 896c59d87c4SChristoph Hellwig } 897c59d87c4SChristoph Hellwig 898c59d87c4SChristoph Hellwig STATIC int 899c59d87c4SChristoph Hellwig xfs_dir_open( 900c59d87c4SChristoph Hellwig struct inode *inode, 901c59d87c4SChristoph Hellwig struct file *file) 902c59d87c4SChristoph Hellwig { 903c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 904c59d87c4SChristoph Hellwig int mode; 905c59d87c4SChristoph Hellwig int error; 906c59d87c4SChristoph Hellwig 907c59d87c4SChristoph Hellwig error = xfs_file_open(inode, file); 908c59d87c4SChristoph Hellwig if (error) 909c59d87c4SChristoph Hellwig return error; 910c59d87c4SChristoph Hellwig 911c59d87c4SChristoph Hellwig /* 912c59d87c4SChristoph Hellwig * If there are any blocks, read-ahead block 0 as we're almost 913c59d87c4SChristoph Hellwig * certain to have the next operation be a read there. 914c59d87c4SChristoph Hellwig */ 915309ecac8SChristoph Hellwig mode = xfs_ilock_data_map_shared(ip); 916c59d87c4SChristoph Hellwig if (ip->i_d.di_nextents > 0) 9177a652bbeSDarrick J. Wong error = xfs_dir3_data_readahead(ip, 0, -1); 918c59d87c4SChristoph Hellwig xfs_iunlock(ip, mode); 9197a652bbeSDarrick J. Wong return error; 920c59d87c4SChristoph Hellwig } 921c59d87c4SChristoph Hellwig 922c59d87c4SChristoph Hellwig STATIC int 923c59d87c4SChristoph Hellwig xfs_file_release( 924c59d87c4SChristoph Hellwig struct inode *inode, 925c59d87c4SChristoph Hellwig struct file *filp) 926c59d87c4SChristoph Hellwig { 9272451337dSDave Chinner return xfs_release(XFS_I(inode)); 928c59d87c4SChristoph Hellwig } 929c59d87c4SChristoph Hellwig 930c59d87c4SChristoph Hellwig STATIC int 931c59d87c4SChristoph Hellwig xfs_file_readdir( 932b8227554SAl Viro struct file *file, 933b8227554SAl Viro struct dir_context *ctx) 934c59d87c4SChristoph Hellwig { 935b8227554SAl Viro struct inode *inode = file_inode(file); 936c59d87c4SChristoph Hellwig xfs_inode_t *ip = XFS_I(inode); 937c59d87c4SChristoph Hellwig size_t bufsize; 938c59d87c4SChristoph Hellwig 939c59d87c4SChristoph Hellwig /* 940c59d87c4SChristoph Hellwig * The Linux API doesn't pass down the total size of the buffer 941c59d87c4SChristoph Hellwig * we read into down to the filesystem. With the filldir concept 942c59d87c4SChristoph Hellwig * it's not needed for correct information, but the XFS dir2 leaf 943c59d87c4SChristoph Hellwig * code wants an estimate of the buffer size to calculate it's 944c59d87c4SChristoph Hellwig * readahead window and size the buffers used for mapping to 945c59d87c4SChristoph Hellwig * physical blocks. 946c59d87c4SChristoph Hellwig * 947c59d87c4SChristoph Hellwig * Try to give it an estimate that's good enough, maybe at some 948c59d87c4SChristoph Hellwig * point we can change the ->readdir prototype to include the 949c59d87c4SChristoph Hellwig * buffer size. For now we use the current glibc buffer size. 950c59d87c4SChristoph Hellwig */ 951c59d87c4SChristoph Hellwig bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); 952c59d87c4SChristoph Hellwig 9538300475eSkbuild test robot return xfs_readdir(ip, ctx, bufsize); 954c59d87c4SChristoph Hellwig } 955c59d87c4SChristoph Hellwig 956c59d87c4SChristoph Hellwig /* 957d126d43fSJeff Liu * This type is designed to indicate the type of offset we would like 95849c69591SEric Sandeen * to search from page cache for xfs_seek_hole_data(). 959d126d43fSJeff Liu */ 960d126d43fSJeff Liu enum { 961d126d43fSJeff Liu HOLE_OFF = 0, 962d126d43fSJeff Liu DATA_OFF, 963d126d43fSJeff Liu }; 964d126d43fSJeff Liu 965d126d43fSJeff Liu /* 966d126d43fSJeff Liu * Lookup the desired type of offset from the given page. 967d126d43fSJeff Liu * 968d126d43fSJeff Liu * On success, return true and the offset argument will point to the 969d126d43fSJeff Liu * start of the region that was found. Otherwise this function will 970d126d43fSJeff Liu * return false and keep the offset argument unchanged. 971d126d43fSJeff Liu */ 972d126d43fSJeff Liu STATIC bool 973d126d43fSJeff Liu xfs_lookup_buffer_offset( 974d126d43fSJeff Liu struct page *page, 975d126d43fSJeff Liu loff_t *offset, 976d126d43fSJeff Liu unsigned int type) 977d126d43fSJeff Liu { 978d126d43fSJeff Liu loff_t lastoff = page_offset(page); 979d126d43fSJeff Liu bool found = false; 980d126d43fSJeff Liu struct buffer_head *bh, *head; 981d126d43fSJeff Liu 982d126d43fSJeff Liu bh = head = page_buffers(page); 983d126d43fSJeff Liu do { 984d126d43fSJeff Liu /* 985d126d43fSJeff Liu * Unwritten extents that have data in the page 986d126d43fSJeff Liu * cache covering them can be identified by the 987d126d43fSJeff Liu * BH_Unwritten state flag. Pages with multiple 988d126d43fSJeff Liu * buffers might have a mix of holes, data and 989d126d43fSJeff Liu * unwritten extents - any buffer with valid 990d126d43fSJeff Liu * data in it should have BH_Uptodate flag set 991d126d43fSJeff Liu * on it. 992d126d43fSJeff Liu */ 993d126d43fSJeff Liu if (buffer_unwritten(bh) || 994d126d43fSJeff Liu buffer_uptodate(bh)) { 995d126d43fSJeff Liu if (type == DATA_OFF) 996d126d43fSJeff Liu found = true; 997d126d43fSJeff Liu } else { 998d126d43fSJeff Liu if (type == HOLE_OFF) 999d126d43fSJeff Liu found = true; 1000d126d43fSJeff Liu } 1001d126d43fSJeff Liu 1002d126d43fSJeff Liu if (found) { 1003d126d43fSJeff Liu *offset = lastoff; 1004d126d43fSJeff Liu break; 1005d126d43fSJeff Liu } 1006d126d43fSJeff Liu lastoff += bh->b_size; 1007d126d43fSJeff Liu } while ((bh = bh->b_this_page) != head); 1008d126d43fSJeff Liu 1009d126d43fSJeff Liu return found; 1010d126d43fSJeff Liu } 1011d126d43fSJeff Liu 1012d126d43fSJeff Liu /* 1013d126d43fSJeff Liu * This routine is called to find out and return a data or hole offset 1014d126d43fSJeff Liu * from the page cache for unwritten extents according to the desired 101549c69591SEric Sandeen * type for xfs_seek_hole_data(). 1016d126d43fSJeff Liu * 1017d126d43fSJeff Liu * The argument offset is used to tell where we start to search from the 1018d126d43fSJeff Liu * page cache. Map is used to figure out the end points of the range to 1019d126d43fSJeff Liu * lookup pages. 1020d126d43fSJeff Liu * 1021d126d43fSJeff Liu * Return true if the desired type of offset was found, and the argument 1022d126d43fSJeff Liu * offset is filled with that address. Otherwise, return false and keep 1023d126d43fSJeff Liu * offset unchanged. 1024d126d43fSJeff Liu */ 1025d126d43fSJeff Liu STATIC bool 1026d126d43fSJeff Liu xfs_find_get_desired_pgoff( 1027d126d43fSJeff Liu struct inode *inode, 1028d126d43fSJeff Liu struct xfs_bmbt_irec *map, 1029d126d43fSJeff Liu unsigned int type, 1030d126d43fSJeff Liu loff_t *offset) 1031d126d43fSJeff Liu { 1032d126d43fSJeff Liu struct xfs_inode *ip = XFS_I(inode); 1033d126d43fSJeff Liu struct xfs_mount *mp = ip->i_mount; 1034d126d43fSJeff Liu struct pagevec pvec; 1035d126d43fSJeff Liu pgoff_t index; 1036d126d43fSJeff Liu pgoff_t end; 1037d126d43fSJeff Liu loff_t endoff; 1038d126d43fSJeff Liu loff_t startoff = *offset; 1039d126d43fSJeff Liu loff_t lastoff = startoff; 1040d126d43fSJeff Liu bool found = false; 1041d126d43fSJeff Liu 1042d126d43fSJeff Liu pagevec_init(&pvec, 0); 1043d126d43fSJeff Liu 104409cbfeafSKirill A. Shutemov index = startoff >> PAGE_SHIFT; 1045d126d43fSJeff Liu endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); 104609cbfeafSKirill A. Shutemov end = endoff >> PAGE_SHIFT; 1047d126d43fSJeff Liu do { 1048d126d43fSJeff Liu int want; 1049d126d43fSJeff Liu unsigned nr_pages; 1050d126d43fSJeff Liu unsigned int i; 1051d126d43fSJeff Liu 1052d126d43fSJeff Liu want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); 1053d126d43fSJeff Liu nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, 1054d126d43fSJeff Liu want); 1055d126d43fSJeff Liu /* 1056d126d43fSJeff Liu * No page mapped into given range. If we are searching holes 1057d126d43fSJeff Liu * and if this is the first time we got into the loop, it means 1058d126d43fSJeff Liu * that the given offset is landed in a hole, return it. 1059d126d43fSJeff Liu * 1060d126d43fSJeff Liu * If we have already stepped through some block buffers to find 1061d126d43fSJeff Liu * holes but they all contains data. In this case, the last 1062d126d43fSJeff Liu * offset is already updated and pointed to the end of the last 1063d126d43fSJeff Liu * mapped page, if it does not reach the endpoint to search, 1064d126d43fSJeff Liu * that means there should be a hole between them. 1065d126d43fSJeff Liu */ 1066d126d43fSJeff Liu if (nr_pages == 0) { 1067d126d43fSJeff Liu /* Data search found nothing */ 1068d126d43fSJeff Liu if (type == DATA_OFF) 1069d126d43fSJeff Liu break; 1070d126d43fSJeff Liu 1071d126d43fSJeff Liu ASSERT(type == HOLE_OFF); 1072d126d43fSJeff Liu if (lastoff == startoff || lastoff < endoff) { 1073d126d43fSJeff Liu found = true; 1074d126d43fSJeff Liu *offset = lastoff; 1075d126d43fSJeff Liu } 1076d126d43fSJeff Liu break; 1077d126d43fSJeff Liu } 1078d126d43fSJeff Liu 1079d126d43fSJeff Liu /* 1080d126d43fSJeff Liu * At lease we found one page. If this is the first time we 1081d126d43fSJeff Liu * step into the loop, and if the first page index offset is 1082d126d43fSJeff Liu * greater than the given search offset, a hole was found. 1083d126d43fSJeff Liu */ 1084d126d43fSJeff Liu if (type == HOLE_OFF && lastoff == startoff && 1085d126d43fSJeff Liu lastoff < page_offset(pvec.pages[0])) { 1086d126d43fSJeff Liu found = true; 1087d126d43fSJeff Liu break; 1088d126d43fSJeff Liu } 1089d126d43fSJeff Liu 1090d126d43fSJeff Liu for (i = 0; i < nr_pages; i++) { 1091d126d43fSJeff Liu struct page *page = pvec.pages[i]; 1092d126d43fSJeff Liu loff_t b_offset; 1093d126d43fSJeff Liu 1094d126d43fSJeff Liu /* 1095d126d43fSJeff Liu * At this point, the page may be truncated or 1096d126d43fSJeff Liu * invalidated (changing page->mapping to NULL), 1097d126d43fSJeff Liu * or even swizzled back from swapper_space to tmpfs 1098d126d43fSJeff Liu * file mapping. However, page->index will not change 1099d126d43fSJeff Liu * because we have a reference on the page. 1100d126d43fSJeff Liu * 1101d126d43fSJeff Liu * Searching done if the page index is out of range. 1102d126d43fSJeff Liu * If the current offset is not reaches the end of 1103d126d43fSJeff Liu * the specified search range, there should be a hole 1104d126d43fSJeff Liu * between them. 1105d126d43fSJeff Liu */ 1106d126d43fSJeff Liu if (page->index > end) { 1107d126d43fSJeff Liu if (type == HOLE_OFF && lastoff < endoff) { 1108d126d43fSJeff Liu *offset = lastoff; 1109d126d43fSJeff Liu found = true; 1110d126d43fSJeff Liu } 1111d126d43fSJeff Liu goto out; 1112d126d43fSJeff Liu } 1113d126d43fSJeff Liu 1114d126d43fSJeff Liu lock_page(page); 1115d126d43fSJeff Liu /* 1116d126d43fSJeff Liu * Page truncated or invalidated(page->mapping == NULL). 1117d126d43fSJeff Liu * We can freely skip it and proceed to check the next 1118d126d43fSJeff Liu * page. 1119d126d43fSJeff Liu */ 1120d126d43fSJeff Liu if (unlikely(page->mapping != inode->i_mapping)) { 1121d126d43fSJeff Liu unlock_page(page); 1122d126d43fSJeff Liu continue; 1123d126d43fSJeff Liu } 1124d126d43fSJeff Liu 1125d126d43fSJeff Liu if (!page_has_buffers(page)) { 1126d126d43fSJeff Liu unlock_page(page); 1127d126d43fSJeff Liu continue; 1128d126d43fSJeff Liu } 1129d126d43fSJeff Liu 1130d126d43fSJeff Liu found = xfs_lookup_buffer_offset(page, &b_offset, type); 1131d126d43fSJeff Liu if (found) { 1132d126d43fSJeff Liu /* 1133d126d43fSJeff Liu * The found offset may be less than the start 1134d126d43fSJeff Liu * point to search if this is the first time to 1135d126d43fSJeff Liu * come here. 1136d126d43fSJeff Liu */ 1137d126d43fSJeff Liu *offset = max_t(loff_t, startoff, b_offset); 1138d126d43fSJeff Liu unlock_page(page); 1139d126d43fSJeff Liu goto out; 1140d126d43fSJeff Liu } 1141d126d43fSJeff Liu 1142d126d43fSJeff Liu /* 1143d126d43fSJeff Liu * We either searching data but nothing was found, or 1144d126d43fSJeff Liu * searching hole but found a data buffer. In either 1145d126d43fSJeff Liu * case, probably the next page contains the desired 1146d126d43fSJeff Liu * things, update the last offset to it so. 1147d126d43fSJeff Liu */ 1148d126d43fSJeff Liu lastoff = page_offset(page) + PAGE_SIZE; 1149d126d43fSJeff Liu unlock_page(page); 1150d126d43fSJeff Liu } 1151d126d43fSJeff Liu 1152d126d43fSJeff Liu /* 1153d126d43fSJeff Liu * The number of returned pages less than our desired, search 1154d126d43fSJeff Liu * done. In this case, nothing was found for searching data, 1155d126d43fSJeff Liu * but we found a hole behind the last offset. 1156d126d43fSJeff Liu */ 1157d126d43fSJeff Liu if (nr_pages < want) { 1158d126d43fSJeff Liu if (type == HOLE_OFF) { 1159d126d43fSJeff Liu *offset = lastoff; 1160d126d43fSJeff Liu found = true; 1161d126d43fSJeff Liu } 1162d126d43fSJeff Liu break; 1163d126d43fSJeff Liu } 1164d126d43fSJeff Liu 1165d126d43fSJeff Liu index = pvec.pages[i - 1]->index + 1; 1166d126d43fSJeff Liu pagevec_release(&pvec); 1167d126d43fSJeff Liu } while (index <= end); 1168d126d43fSJeff Liu 1169d126d43fSJeff Liu out: 1170d126d43fSJeff Liu pagevec_release(&pvec); 1171d126d43fSJeff Liu return found; 1172d126d43fSJeff Liu } 1173d126d43fSJeff Liu 11748aa7d37eSEric Sandeen /* 11758aa7d37eSEric Sandeen * caller must lock inode with xfs_ilock_data_map_shared, 11768aa7d37eSEric Sandeen * can we craft an appropriate ASSERT? 11778aa7d37eSEric Sandeen * 11788aa7d37eSEric Sandeen * end is because the VFS-level lseek interface is defined such that any 11798aa7d37eSEric Sandeen * offset past i_size shall return -ENXIO, but we use this for quota code 11808aa7d37eSEric Sandeen * which does not maintain i_size, and we want to SEEK_DATA past i_size. 11818aa7d37eSEric Sandeen */ 11828aa7d37eSEric Sandeen loff_t 11838aa7d37eSEric Sandeen __xfs_seek_hole_data( 11848aa7d37eSEric Sandeen struct inode *inode, 118549c69591SEric Sandeen loff_t start, 11868aa7d37eSEric Sandeen loff_t end, 118749c69591SEric Sandeen int whence) 11883fe3e6b1SJeff Liu { 11893fe3e6b1SJeff Liu struct xfs_inode *ip = XFS_I(inode); 11903fe3e6b1SJeff Liu struct xfs_mount *mp = ip->i_mount; 11913fe3e6b1SJeff Liu loff_t uninitialized_var(offset); 11923fe3e6b1SJeff Liu xfs_fileoff_t fsbno; 11938aa7d37eSEric Sandeen xfs_filblks_t lastbno; 11943fe3e6b1SJeff Liu int error; 11953fe3e6b1SJeff Liu 11968aa7d37eSEric Sandeen if (start >= end) { 11972451337dSDave Chinner error = -ENXIO; 11988aa7d37eSEric Sandeen goto out_error; 11993fe3e6b1SJeff Liu } 12003fe3e6b1SJeff Liu 120149c69591SEric Sandeen /* 120249c69591SEric Sandeen * Try to read extents from the first block indicated 120349c69591SEric Sandeen * by fsbno to the end block of the file. 120449c69591SEric Sandeen */ 12053fe3e6b1SJeff Liu fsbno = XFS_B_TO_FSBT(mp, start); 12068aa7d37eSEric Sandeen lastbno = XFS_B_TO_FSB(mp, end); 1207b686d1f7SJeff Liu 1208b686d1f7SJeff Liu for (;;) { 1209b686d1f7SJeff Liu struct xfs_bmbt_irec map[2]; 1210b686d1f7SJeff Liu int nmap = 2; 1211b686d1f7SJeff Liu unsigned int i; 1212b686d1f7SJeff Liu 12138aa7d37eSEric Sandeen error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap, 1214b686d1f7SJeff Liu XFS_BMAPI_ENTIRE); 12153fe3e6b1SJeff Liu if (error) 12168aa7d37eSEric Sandeen goto out_error; 12173fe3e6b1SJeff Liu 1218b686d1f7SJeff Liu /* No extents at given offset, must be beyond EOF */ 1219b686d1f7SJeff Liu if (nmap == 0) { 12202451337dSDave Chinner error = -ENXIO; 12218aa7d37eSEric Sandeen goto out_error; 12223fe3e6b1SJeff Liu } 12233fe3e6b1SJeff Liu 1224b686d1f7SJeff Liu for (i = 0; i < nmap; i++) { 1225b686d1f7SJeff Liu offset = max_t(loff_t, start, 1226b686d1f7SJeff Liu XFS_FSB_TO_B(mp, map[i].br_startoff)); 1227b686d1f7SJeff Liu 122849c69591SEric Sandeen /* Landed in the hole we wanted? */ 122949c69591SEric Sandeen if (whence == SEEK_HOLE && 123049c69591SEric Sandeen map[i].br_startblock == HOLESTARTBLOCK) 123149c69591SEric Sandeen goto out; 123249c69591SEric Sandeen 123349c69591SEric Sandeen /* Landed in the data extent we wanted? */ 123449c69591SEric Sandeen if (whence == SEEK_DATA && 123549c69591SEric Sandeen (map[i].br_startblock == DELAYSTARTBLOCK || 123649c69591SEric Sandeen (map[i].br_state == XFS_EXT_NORM && 123749c69591SEric Sandeen !isnullstartblock(map[i].br_startblock)))) 1238b686d1f7SJeff Liu goto out; 1239b686d1f7SJeff Liu 1240b686d1f7SJeff Liu /* 124149c69591SEric Sandeen * Landed in an unwritten extent, try to search 124249c69591SEric Sandeen * for hole or data from page cache. 1243b686d1f7SJeff Liu */ 1244b686d1f7SJeff Liu if (map[i].br_state == XFS_EXT_UNWRITTEN) { 1245b686d1f7SJeff Liu if (xfs_find_get_desired_pgoff(inode, &map[i], 124649c69591SEric Sandeen whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF, 124749c69591SEric Sandeen &offset)) 1248b686d1f7SJeff Liu goto out; 1249b686d1f7SJeff Liu } 1250b686d1f7SJeff Liu } 1251b686d1f7SJeff Liu 1252b686d1f7SJeff Liu /* 125349c69591SEric Sandeen * We only received one extent out of the two requested. This 125449c69591SEric Sandeen * means we've hit EOF and didn't find what we are looking for. 1255b686d1f7SJeff Liu */ 1256b686d1f7SJeff Liu if (nmap == 1) { 125749c69591SEric Sandeen /* 125849c69591SEric Sandeen * If we were looking for a hole, set offset to 125949c69591SEric Sandeen * the end of the file (i.e., there is an implicit 126049c69591SEric Sandeen * hole at the end of any file). 126149c69591SEric Sandeen */ 126249c69591SEric Sandeen if (whence == SEEK_HOLE) { 12638aa7d37eSEric Sandeen offset = end; 1264b686d1f7SJeff Liu break; 1265b686d1f7SJeff Liu } 126649c69591SEric Sandeen /* 126749c69591SEric Sandeen * If we were looking for data, it's nowhere to be found 126849c69591SEric Sandeen */ 126949c69591SEric Sandeen ASSERT(whence == SEEK_DATA); 127049c69591SEric Sandeen error = -ENXIO; 12718aa7d37eSEric Sandeen goto out_error; 127249c69591SEric Sandeen } 1273b686d1f7SJeff Liu 1274b686d1f7SJeff Liu ASSERT(i > 1); 1275b686d1f7SJeff Liu 1276b686d1f7SJeff Liu /* 127749c69591SEric Sandeen * Nothing was found, proceed to the next round of search 127849c69591SEric Sandeen * if the next reading offset is not at or beyond EOF. 1279b686d1f7SJeff Liu */ 1280b686d1f7SJeff Liu fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; 1281b686d1f7SJeff Liu start = XFS_FSB_TO_B(mp, fsbno); 12828aa7d37eSEric Sandeen if (start >= end) { 128349c69591SEric Sandeen if (whence == SEEK_HOLE) { 12848aa7d37eSEric Sandeen offset = end; 1285b686d1f7SJeff Liu break; 1286b686d1f7SJeff Liu } 128749c69591SEric Sandeen ASSERT(whence == SEEK_DATA); 128849c69591SEric Sandeen error = -ENXIO; 12898aa7d37eSEric Sandeen goto out_error; 129049c69591SEric Sandeen } 1291b686d1f7SJeff Liu } 1292b686d1f7SJeff Liu 1293b686d1f7SJeff Liu out: 1294b686d1f7SJeff Liu /* 129549c69591SEric Sandeen * If at this point we have found the hole we wanted, the returned 1296b686d1f7SJeff Liu * offset may be bigger than the file size as it may be aligned to 129749c69591SEric Sandeen * page boundary for unwritten extents. We need to deal with this 1298b686d1f7SJeff Liu * situation in particular. 1299b686d1f7SJeff Liu */ 130049c69591SEric Sandeen if (whence == SEEK_HOLE) 13018aa7d37eSEric Sandeen offset = min_t(loff_t, offset, end); 13028aa7d37eSEric Sandeen 13038aa7d37eSEric Sandeen return offset; 13048aa7d37eSEric Sandeen 13058aa7d37eSEric Sandeen out_error: 13068aa7d37eSEric Sandeen return error; 13078aa7d37eSEric Sandeen } 13088aa7d37eSEric Sandeen 13098aa7d37eSEric Sandeen STATIC loff_t 13108aa7d37eSEric Sandeen xfs_seek_hole_data( 13118aa7d37eSEric Sandeen struct file *file, 13128aa7d37eSEric Sandeen loff_t start, 13138aa7d37eSEric Sandeen int whence) 13148aa7d37eSEric Sandeen { 13158aa7d37eSEric Sandeen struct inode *inode = file->f_mapping->host; 13168aa7d37eSEric Sandeen struct xfs_inode *ip = XFS_I(inode); 13178aa7d37eSEric Sandeen struct xfs_mount *mp = ip->i_mount; 13188aa7d37eSEric Sandeen uint lock; 13198aa7d37eSEric Sandeen loff_t offset, end; 13208aa7d37eSEric Sandeen int error = 0; 13218aa7d37eSEric Sandeen 13228aa7d37eSEric Sandeen if (XFS_FORCED_SHUTDOWN(mp)) 13238aa7d37eSEric Sandeen return -EIO; 13248aa7d37eSEric Sandeen 13258aa7d37eSEric Sandeen lock = xfs_ilock_data_map_shared(ip); 13268aa7d37eSEric Sandeen 13278aa7d37eSEric Sandeen end = i_size_read(inode); 13288aa7d37eSEric Sandeen offset = __xfs_seek_hole_data(inode, start, end, whence); 13298aa7d37eSEric Sandeen if (offset < 0) { 13308aa7d37eSEric Sandeen error = offset; 13318aa7d37eSEric Sandeen goto out_unlock; 13328aa7d37eSEric Sandeen } 13338aa7d37eSEric Sandeen 133446a1c2c7SJie Liu offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 13353fe3e6b1SJeff Liu 13363fe3e6b1SJeff Liu out_unlock: 133701f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 13383fe3e6b1SJeff Liu 13393fe3e6b1SJeff Liu if (error) 13402451337dSDave Chinner return error; 13413fe3e6b1SJeff Liu return offset; 13423fe3e6b1SJeff Liu } 13433fe3e6b1SJeff Liu 13443fe3e6b1SJeff Liu STATIC loff_t 13453fe3e6b1SJeff Liu xfs_file_llseek( 13463fe3e6b1SJeff Liu struct file *file, 13473fe3e6b1SJeff Liu loff_t offset, 134859f9c004SEric Sandeen int whence) 13493fe3e6b1SJeff Liu { 135059f9c004SEric Sandeen switch (whence) { 13513fe3e6b1SJeff Liu case SEEK_END: 13523fe3e6b1SJeff Liu case SEEK_CUR: 13533fe3e6b1SJeff Liu case SEEK_SET: 135459f9c004SEric Sandeen return generic_file_llseek(file, offset, whence); 13553fe3e6b1SJeff Liu case SEEK_HOLE: 135649c69591SEric Sandeen case SEEK_DATA: 135759f9c004SEric Sandeen return xfs_seek_hole_data(file, offset, whence); 13583fe3e6b1SJeff Liu default: 13593fe3e6b1SJeff Liu return -EINVAL; 13603fe3e6b1SJeff Liu } 13613fe3e6b1SJeff Liu } 13623fe3e6b1SJeff Liu 1363de0e8c20SDave Chinner /* 1364de0e8c20SDave Chinner * Locking for serialisation of IO during page faults. This results in a lock 1365de0e8c20SDave Chinner * ordering of: 1366de0e8c20SDave Chinner * 1367de0e8c20SDave Chinner * mmap_sem (MM) 13686b698edeSDave Chinner * sb_start_pagefault(vfs, freeze) 136913ad4fe3SDave Chinner * i_mmaplock (XFS - truncate serialisation) 1370de0e8c20SDave Chinner * page_lock (MM) 1371de0e8c20SDave Chinner * i_lock (XFS - extent map serialisation) 1372de0e8c20SDave Chinner */ 1373de0e8c20SDave Chinner 1374075a924dSDave Chinner /* 1375075a924dSDave Chinner * mmap()d file has taken write protection fault and is being made writable. We 1376075a924dSDave Chinner * can set the page state up correctly for a writable page, which means we can 1377075a924dSDave Chinner * do correct delalloc accounting (ENOSPC checking!) and unwritten extent 1378075a924dSDave Chinner * mapping. 1379075a924dSDave Chinner */ 1380075a924dSDave Chinner STATIC int 1381075a924dSDave Chinner xfs_filemap_page_mkwrite( 1382075a924dSDave Chinner struct vm_fault *vmf) 1383075a924dSDave Chinner { 138411bac800SDave Jiang struct inode *inode = file_inode(vmf->vma->vm_file); 1385ec56b1f1SDave Chinner int ret; 1386075a924dSDave Chinner 13876b698edeSDave Chinner trace_xfs_filemap_page_mkwrite(XFS_I(inode)); 1388075a924dSDave Chinner 13896b698edeSDave Chinner sb_start_pagefault(inode->i_sb); 139011bac800SDave Jiang file_update_time(vmf->vma->vm_file); 13916b698edeSDave Chinner xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1392075a924dSDave Chinner 13936b698edeSDave Chinner if (IS_DAX(inode)) { 139411bac800SDave Jiang ret = dax_iomap_fault(vmf, &xfs_iomap_ops); 13956b698edeSDave Chinner } else { 139611bac800SDave Jiang ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); 13976b698edeSDave Chinner ret = block_page_mkwrite_return(ret); 13986b698edeSDave Chinner } 1399ec56b1f1SDave Chinner 14006b698edeSDave Chinner xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 14016b698edeSDave Chinner sb_end_pagefault(inode->i_sb); 14026b698edeSDave Chinner 14036b698edeSDave Chinner return ret; 14046b698edeSDave Chinner } 14056b698edeSDave Chinner 14066b698edeSDave Chinner STATIC int 14076b698edeSDave Chinner xfs_filemap_fault( 14086b698edeSDave Chinner struct vm_fault *vmf) 14096b698edeSDave Chinner { 141011bac800SDave Jiang struct inode *inode = file_inode(vmf->vma->vm_file); 14116b698edeSDave Chinner int ret; 14126b698edeSDave Chinner 1413b2442c5aSDave Chinner trace_xfs_filemap_fault(XFS_I(inode)); 14146b698edeSDave Chinner 14156b698edeSDave Chinner /* DAX can shortcut the normal fault path on write faults! */ 1416b2442c5aSDave Chinner if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode)) 141711bac800SDave Jiang return xfs_filemap_page_mkwrite(vmf); 1418075a924dSDave Chinner 1419b2442c5aSDave Chinner xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1420acdda3aaSChristoph Hellwig if (IS_DAX(inode)) 142111bac800SDave Jiang ret = dax_iomap_fault(vmf, &xfs_iomap_ops); 1422acdda3aaSChristoph Hellwig else 142311bac800SDave Jiang ret = filemap_fault(vmf); 1424b2442c5aSDave Chinner xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1425075a924dSDave Chinner 14266b698edeSDave Chinner return ret; 14276b698edeSDave Chinner } 14286b698edeSDave Chinner 142913ad4fe3SDave Chinner /* 143013ad4fe3SDave Chinner * Similar to xfs_filemap_fault(), the DAX fault path can call into here on 143113ad4fe3SDave Chinner * both read and write faults. Hence we need to handle both cases. There is no 1432*a2d58167SDave Jiang * ->huge_mkwrite callout for huge pages, so we have a single function here to 143313ad4fe3SDave Chinner * handle both cases here. @flags carries the information on the type of fault 143413ad4fe3SDave Chinner * occuring. 143513ad4fe3SDave Chinner */ 1436acd76e74SMatthew Wilcox STATIC int 1437*a2d58167SDave Jiang xfs_filemap_huge_fault( 1438d8a849e1SDave Jiang struct vm_fault *vmf) 1439acd76e74SMatthew Wilcox { 1440f4200391SDave Jiang struct inode *inode = file_inode(vmf->vma->vm_file); 1441acd76e74SMatthew Wilcox struct xfs_inode *ip = XFS_I(inode); 1442acd76e74SMatthew Wilcox int ret; 1443acd76e74SMatthew Wilcox 1444acd76e74SMatthew Wilcox if (!IS_DAX(inode)) 1445acd76e74SMatthew Wilcox return VM_FAULT_FALLBACK; 1446acd76e74SMatthew Wilcox 1447*a2d58167SDave Jiang trace_xfs_filemap_huge_fault(ip); 1448acd76e74SMatthew Wilcox 1449d8a849e1SDave Jiang if (vmf->flags & FAULT_FLAG_WRITE) { 1450acd76e74SMatthew Wilcox sb_start_pagefault(inode->i_sb); 1451f4200391SDave Jiang file_update_time(vmf->vma->vm_file); 145213ad4fe3SDave Chinner } 145313ad4fe3SDave Chinner 1454acd76e74SMatthew Wilcox xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1455*a2d58167SDave Jiang ret = dax_iomap_fault(vmf, &xfs_iomap_ops); 1456acd76e74SMatthew Wilcox xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 145713ad4fe3SDave Chinner 1458d8a849e1SDave Jiang if (vmf->flags & FAULT_FLAG_WRITE) 1459acd76e74SMatthew Wilcox sb_end_pagefault(inode->i_sb); 1460acd76e74SMatthew Wilcox 1461acd76e74SMatthew Wilcox return ret; 1462acd76e74SMatthew Wilcox } 1463acd76e74SMatthew Wilcox 14643af49285SDave Chinner /* 14653af49285SDave Chinner * pfn_mkwrite was originally inteneded to ensure we capture time stamp 14663af49285SDave Chinner * updates on write faults. In reality, it's need to serialise against 14675eb88dcaSRoss Zwisler * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED 14685eb88dcaSRoss Zwisler * to ensure we serialise the fault barrier in place. 14693af49285SDave Chinner */ 14703af49285SDave Chinner static int 14713af49285SDave Chinner xfs_filemap_pfn_mkwrite( 14723af49285SDave Chinner struct vm_fault *vmf) 14733af49285SDave Chinner { 14743af49285SDave Chinner 147511bac800SDave Jiang struct inode *inode = file_inode(vmf->vma->vm_file); 14763af49285SDave Chinner struct xfs_inode *ip = XFS_I(inode); 14773af49285SDave Chinner int ret = VM_FAULT_NOPAGE; 14783af49285SDave Chinner loff_t size; 14793af49285SDave Chinner 14803af49285SDave Chinner trace_xfs_filemap_pfn_mkwrite(ip); 14813af49285SDave Chinner 14823af49285SDave Chinner sb_start_pagefault(inode->i_sb); 148311bac800SDave Jiang file_update_time(vmf->vma->vm_file); 14843af49285SDave Chinner 14853af49285SDave Chinner /* check if the faulting page hasn't raced with truncate */ 14863af49285SDave Chinner xfs_ilock(ip, XFS_MMAPLOCK_SHARED); 14873af49285SDave Chinner size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; 14883af49285SDave Chinner if (vmf->pgoff >= size) 14893af49285SDave Chinner ret = VM_FAULT_SIGBUS; 14905eb88dcaSRoss Zwisler else if (IS_DAX(inode)) 149111bac800SDave Jiang ret = dax_pfn_mkwrite(vmf); 14923af49285SDave Chinner xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 14933af49285SDave Chinner sb_end_pagefault(inode->i_sb); 14943af49285SDave Chinner return ret; 14953af49285SDave Chinner 14963af49285SDave Chinner } 14973af49285SDave Chinner 14986b698edeSDave Chinner static const struct vm_operations_struct xfs_file_vm_ops = { 14996b698edeSDave Chinner .fault = xfs_filemap_fault, 1500*a2d58167SDave Jiang .huge_fault = xfs_filemap_huge_fault, 15016b698edeSDave Chinner .map_pages = filemap_map_pages, 15026b698edeSDave Chinner .page_mkwrite = xfs_filemap_page_mkwrite, 15033af49285SDave Chinner .pfn_mkwrite = xfs_filemap_pfn_mkwrite, 15046b698edeSDave Chinner }; 15056b698edeSDave Chinner 15066b698edeSDave Chinner STATIC int 15076b698edeSDave Chinner xfs_file_mmap( 15086b698edeSDave Chinner struct file *filp, 15096b698edeSDave Chinner struct vm_area_struct *vma) 15106b698edeSDave Chinner { 15116b698edeSDave Chinner file_accessed(filp); 15126b698edeSDave Chinner vma->vm_ops = &xfs_file_vm_ops; 15136b698edeSDave Chinner if (IS_DAX(file_inode(filp))) 1514acd76e74SMatthew Wilcox vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 15156b698edeSDave Chinner return 0; 1516075a924dSDave Chinner } 1517075a924dSDave Chinner 1518c59d87c4SChristoph Hellwig const struct file_operations xfs_file_operations = { 15193fe3e6b1SJeff Liu .llseek = xfs_file_llseek, 1520b4f5d2c6SAl Viro .read_iter = xfs_file_read_iter, 1521bf97f3bcSAl Viro .write_iter = xfs_file_write_iter, 152282c156f8SAl Viro .splice_read = generic_file_splice_read, 15238d020765SAl Viro .splice_write = iter_file_splice_write, 1524c59d87c4SChristoph Hellwig .unlocked_ioctl = xfs_file_ioctl, 1525c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT 1526c59d87c4SChristoph Hellwig .compat_ioctl = xfs_file_compat_ioctl, 1527c59d87c4SChristoph Hellwig #endif 1528c59d87c4SChristoph Hellwig .mmap = xfs_file_mmap, 1529c59d87c4SChristoph Hellwig .open = xfs_file_open, 1530c59d87c4SChristoph Hellwig .release = xfs_file_release, 1531c59d87c4SChristoph Hellwig .fsync = xfs_file_fsync, 1532dbe6ec81SToshi Kani .get_unmapped_area = thp_get_unmapped_area, 1533c59d87c4SChristoph Hellwig .fallocate = xfs_file_fallocate, 15349fe26045SDarrick J. Wong .clone_file_range = xfs_file_clone_range, 1535cc714660SDarrick J. Wong .dedupe_file_range = xfs_file_dedupe_range, 1536c59d87c4SChristoph Hellwig }; 1537c59d87c4SChristoph Hellwig 1538c59d87c4SChristoph Hellwig const struct file_operations xfs_dir_file_operations = { 1539c59d87c4SChristoph Hellwig .open = xfs_dir_open, 1540c59d87c4SChristoph Hellwig .read = generic_read_dir, 15413b0a3c1aSAl Viro .iterate_shared = xfs_file_readdir, 1542c59d87c4SChristoph Hellwig .llseek = generic_file_llseek, 1543c59d87c4SChristoph Hellwig .unlocked_ioctl = xfs_file_ioctl, 1544c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT 1545c59d87c4SChristoph Hellwig .compat_ioctl = xfs_file_compat_ioctl, 1546c59d87c4SChristoph Hellwig #endif 15471da2f2dbSChristoph Hellwig .fsync = xfs_dir_fsync, 1548c59d87c4SChristoph Hellwig }; 1549