1c59d87c4SChristoph Hellwig /* 2c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3c59d87c4SChristoph Hellwig * All Rights Reserved. 4c59d87c4SChristoph Hellwig * 5c59d87c4SChristoph Hellwig * This program is free software; you can redistribute it and/or 6c59d87c4SChristoph Hellwig * modify it under the terms of the GNU General Public License as 7c59d87c4SChristoph Hellwig * published by the Free Software Foundation. 8c59d87c4SChristoph Hellwig * 9c59d87c4SChristoph Hellwig * This program is distributed in the hope that it would be useful, 10c59d87c4SChristoph Hellwig * but WITHOUT ANY WARRANTY; without even the implied warranty of 11c59d87c4SChristoph Hellwig * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12c59d87c4SChristoph Hellwig * GNU General Public License for more details. 13c59d87c4SChristoph Hellwig * 14c59d87c4SChristoph Hellwig * You should have received a copy of the GNU General Public License 15c59d87c4SChristoph Hellwig * along with this program; if not, write the Free Software Foundation, 16c59d87c4SChristoph Hellwig * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17c59d87c4SChristoph Hellwig */ 18c59d87c4SChristoph Hellwig #include "xfs.h" 19c59d87c4SChristoph Hellwig #include "xfs_fs.h" 2070a9883cSDave Chinner #include "xfs_shared.h" 21a4fbe6abSDave Chinner #include "xfs_format.h" 22239880efSDave Chinner #include "xfs_log_format.h" 23239880efSDave Chinner #include "xfs_trans_resv.h" 24c59d87c4SChristoph Hellwig #include "xfs_sb.h" 25c59d87c4SChristoph Hellwig #include "xfs_ag.h" 26c59d87c4SChristoph Hellwig #include "xfs_mount.h" 2757062787SDave Chinner #include "xfs_da_format.h" 2857062787SDave Chinner #include "xfs_da_btree.h" 29c59d87c4SChristoph Hellwig #include "xfs_inode.h" 30239880efSDave Chinner #include "xfs_trans.h" 31c59d87c4SChristoph Hellwig #include "xfs_inode_item.h" 32c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 33c24b5dfaSDave Chinner #include "xfs_bmap_util.h" 34c59d87c4SChristoph Hellwig #include "xfs_error.h" 352b9ab5abSDave Chinner #include "xfs_dir2.h" 36c24b5dfaSDave Chinner #include "xfs_dir2_priv.h" 37c59d87c4SChristoph Hellwig #include "xfs_ioctl.h" 38c59d87c4SChristoph Hellwig #include "xfs_trace.h" 39239880efSDave Chinner #include "xfs_log.h" 40a4fbe6abSDave Chinner #include "xfs_dinode.h" 41c59d87c4SChristoph Hellwig 42a27bb332SKent Overstreet #include <linux/aio.h> 43c59d87c4SChristoph Hellwig #include <linux/dcache.h> 44c59d87c4SChristoph Hellwig #include <linux/falloc.h> 45d126d43fSJeff Liu #include <linux/pagevec.h> 46c59d87c4SChristoph Hellwig 47c59d87c4SChristoph Hellwig static const struct vm_operations_struct xfs_file_vm_ops; 48c59d87c4SChristoph Hellwig 49c59d87c4SChristoph Hellwig /* 50c59d87c4SChristoph Hellwig * Locking primitives for read and write IO paths to ensure we consistently use 51c59d87c4SChristoph Hellwig * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. 52c59d87c4SChristoph Hellwig */ 53c59d87c4SChristoph Hellwig static inline void 54c59d87c4SChristoph Hellwig xfs_rw_ilock( 55c59d87c4SChristoph Hellwig struct xfs_inode *ip, 56c59d87c4SChristoph Hellwig int type) 57c59d87c4SChristoph Hellwig { 58c59d87c4SChristoph Hellwig if (type & XFS_IOLOCK_EXCL) 59c59d87c4SChristoph Hellwig mutex_lock(&VFS_I(ip)->i_mutex); 60c59d87c4SChristoph Hellwig xfs_ilock(ip, type); 61c59d87c4SChristoph Hellwig } 62c59d87c4SChristoph Hellwig 63c59d87c4SChristoph Hellwig static inline void 64c59d87c4SChristoph Hellwig xfs_rw_iunlock( 65c59d87c4SChristoph Hellwig struct xfs_inode *ip, 66c59d87c4SChristoph Hellwig int type) 67c59d87c4SChristoph Hellwig { 68c59d87c4SChristoph Hellwig xfs_iunlock(ip, type); 69c59d87c4SChristoph Hellwig if (type & XFS_IOLOCK_EXCL) 70c59d87c4SChristoph Hellwig mutex_unlock(&VFS_I(ip)->i_mutex); 71c59d87c4SChristoph Hellwig } 72c59d87c4SChristoph Hellwig 73c59d87c4SChristoph Hellwig static inline void 74c59d87c4SChristoph Hellwig xfs_rw_ilock_demote( 75c59d87c4SChristoph Hellwig struct xfs_inode *ip, 76c59d87c4SChristoph Hellwig int type) 77c59d87c4SChristoph Hellwig { 78c59d87c4SChristoph Hellwig xfs_ilock_demote(ip, type); 79c59d87c4SChristoph Hellwig if (type & XFS_IOLOCK_EXCL) 80c59d87c4SChristoph Hellwig mutex_unlock(&VFS_I(ip)->i_mutex); 81c59d87c4SChristoph Hellwig } 82c59d87c4SChristoph Hellwig 83c59d87c4SChristoph Hellwig /* 84c59d87c4SChristoph Hellwig * xfs_iozero 85c59d87c4SChristoph Hellwig * 86c59d87c4SChristoph Hellwig * xfs_iozero clears the specified range of buffer supplied, 87c59d87c4SChristoph Hellwig * and marks all the affected blocks as valid and modified. If 88c59d87c4SChristoph Hellwig * an affected block is not allocated, it will be allocated. If 89c59d87c4SChristoph Hellwig * an affected block is not completely overwritten, and is not 90c59d87c4SChristoph Hellwig * valid before the operation, it will be read from disk before 91c59d87c4SChristoph Hellwig * being partially zeroed. 92c59d87c4SChristoph Hellwig */ 93ef9d8733SDave Chinner int 94c59d87c4SChristoph Hellwig xfs_iozero( 95c59d87c4SChristoph Hellwig struct xfs_inode *ip, /* inode */ 96c59d87c4SChristoph Hellwig loff_t pos, /* offset in file */ 97c59d87c4SChristoph Hellwig size_t count) /* size of data to zero */ 98c59d87c4SChristoph Hellwig { 99c59d87c4SChristoph Hellwig struct page *page; 100c59d87c4SChristoph Hellwig struct address_space *mapping; 101c59d87c4SChristoph Hellwig int status; 102c59d87c4SChristoph Hellwig 103c59d87c4SChristoph Hellwig mapping = VFS_I(ip)->i_mapping; 104c59d87c4SChristoph Hellwig do { 105c59d87c4SChristoph Hellwig unsigned offset, bytes; 106c59d87c4SChristoph Hellwig void *fsdata; 107c59d87c4SChristoph Hellwig 108c59d87c4SChristoph Hellwig offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 109c59d87c4SChristoph Hellwig bytes = PAGE_CACHE_SIZE - offset; 110c59d87c4SChristoph Hellwig if (bytes > count) 111c59d87c4SChristoph Hellwig bytes = count; 112c59d87c4SChristoph Hellwig 113c59d87c4SChristoph Hellwig status = pagecache_write_begin(NULL, mapping, pos, bytes, 114c59d87c4SChristoph Hellwig AOP_FLAG_UNINTERRUPTIBLE, 115c59d87c4SChristoph Hellwig &page, &fsdata); 116c59d87c4SChristoph Hellwig if (status) 117c59d87c4SChristoph Hellwig break; 118c59d87c4SChristoph Hellwig 119c59d87c4SChristoph Hellwig zero_user(page, offset, bytes); 120c59d87c4SChristoph Hellwig 121c59d87c4SChristoph Hellwig status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, 122c59d87c4SChristoph Hellwig page, fsdata); 123c59d87c4SChristoph Hellwig WARN_ON(status <= 0); /* can't return less than zero! */ 124c59d87c4SChristoph Hellwig pos += bytes; 125c59d87c4SChristoph Hellwig count -= bytes; 126c59d87c4SChristoph Hellwig status = 0; 127c59d87c4SChristoph Hellwig } while (count); 128c59d87c4SChristoph Hellwig 129c59d87c4SChristoph Hellwig return (-status); 130c59d87c4SChristoph Hellwig } 131c59d87c4SChristoph Hellwig 1321da2f2dbSChristoph Hellwig /* 1331da2f2dbSChristoph Hellwig * Fsync operations on directories are much simpler than on regular files, 1341da2f2dbSChristoph Hellwig * as there is no file data to flush, and thus also no need for explicit 1351da2f2dbSChristoph Hellwig * cache flush operations, and there are no non-transaction metadata updates 1361da2f2dbSChristoph Hellwig * on directories either. 1371da2f2dbSChristoph Hellwig */ 1381da2f2dbSChristoph Hellwig STATIC int 1391da2f2dbSChristoph Hellwig xfs_dir_fsync( 1401da2f2dbSChristoph Hellwig struct file *file, 1411da2f2dbSChristoph Hellwig loff_t start, 1421da2f2dbSChristoph Hellwig loff_t end, 1431da2f2dbSChristoph Hellwig int datasync) 1441da2f2dbSChristoph Hellwig { 1451da2f2dbSChristoph Hellwig struct xfs_inode *ip = XFS_I(file->f_mapping->host); 1461da2f2dbSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1471da2f2dbSChristoph Hellwig xfs_lsn_t lsn = 0; 1481da2f2dbSChristoph Hellwig 1491da2f2dbSChristoph Hellwig trace_xfs_dir_fsync(ip); 1501da2f2dbSChristoph Hellwig 1511da2f2dbSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 1521da2f2dbSChristoph Hellwig if (xfs_ipincount(ip)) 1531da2f2dbSChristoph Hellwig lsn = ip->i_itemp->ili_last_lsn; 1541da2f2dbSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 1551da2f2dbSChristoph Hellwig 1561da2f2dbSChristoph Hellwig if (!lsn) 1571da2f2dbSChristoph Hellwig return 0; 1581da2f2dbSChristoph Hellwig return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); 1591da2f2dbSChristoph Hellwig } 1601da2f2dbSChristoph Hellwig 161c59d87c4SChristoph Hellwig STATIC int 162c59d87c4SChristoph Hellwig xfs_file_fsync( 163c59d87c4SChristoph Hellwig struct file *file, 164c59d87c4SChristoph Hellwig loff_t start, 165c59d87c4SChristoph Hellwig loff_t end, 166c59d87c4SChristoph Hellwig int datasync) 167c59d87c4SChristoph Hellwig { 168c59d87c4SChristoph Hellwig struct inode *inode = file->f_mapping->host; 169c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 170c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 171c59d87c4SChristoph Hellwig int error = 0; 172c59d87c4SChristoph Hellwig int log_flushed = 0; 173b1037058SChristoph Hellwig xfs_lsn_t lsn = 0; 174c59d87c4SChristoph Hellwig 175c59d87c4SChristoph Hellwig trace_xfs_file_fsync(ip); 176c59d87c4SChristoph Hellwig 177c59d87c4SChristoph Hellwig error = filemap_write_and_wait_range(inode->i_mapping, start, end); 178c59d87c4SChristoph Hellwig if (error) 179c59d87c4SChristoph Hellwig return error; 180c59d87c4SChristoph Hellwig 181c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 182c59d87c4SChristoph Hellwig return -XFS_ERROR(EIO); 183c59d87c4SChristoph Hellwig 184c59d87c4SChristoph Hellwig xfs_iflags_clear(ip, XFS_ITRUNCATED); 185c59d87c4SChristoph Hellwig 186c59d87c4SChristoph Hellwig if (mp->m_flags & XFS_MOUNT_BARRIER) { 187c59d87c4SChristoph Hellwig /* 188c59d87c4SChristoph Hellwig * If we have an RT and/or log subvolume we need to make sure 189c59d87c4SChristoph Hellwig * to flush the write cache the device used for file data 190c59d87c4SChristoph Hellwig * first. This is to ensure newly written file data make 191c59d87c4SChristoph Hellwig * it to disk before logging the new inode size in case of 192c59d87c4SChristoph Hellwig * an extending write. 193c59d87c4SChristoph Hellwig */ 194c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 195c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_rtdev_targp); 196c59d87c4SChristoph Hellwig else if (mp->m_logdev_targp != mp->m_ddev_targp) 197c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_ddev_targp); 198c59d87c4SChristoph Hellwig } 199c59d87c4SChristoph Hellwig 200c59d87c4SChristoph Hellwig /* 2018a9c9980SChristoph Hellwig * All metadata updates are logged, which means that we just have 2028a9c9980SChristoph Hellwig * to flush the log up to the latest LSN that touched the inode. 203c59d87c4SChristoph Hellwig */ 204c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 2058f639ddeSChristoph Hellwig if (xfs_ipincount(ip)) { 2068f639ddeSChristoph Hellwig if (!datasync || 2078f639ddeSChristoph Hellwig (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) 208b1037058SChristoph Hellwig lsn = ip->i_itemp->ili_last_lsn; 2098f639ddeSChristoph Hellwig } 210c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 211c59d87c4SChristoph Hellwig 2128a9c9980SChristoph Hellwig if (lsn) 213b1037058SChristoph Hellwig error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 214b1037058SChristoph Hellwig 215c59d87c4SChristoph Hellwig /* 216c59d87c4SChristoph Hellwig * If we only have a single device, and the log force about was 217c59d87c4SChristoph Hellwig * a no-op we might have to flush the data device cache here. 218c59d87c4SChristoph Hellwig * This can only happen for fdatasync/O_DSYNC if we were overwriting 219c59d87c4SChristoph Hellwig * an already allocated file and thus do not have any metadata to 220c59d87c4SChristoph Hellwig * commit. 221c59d87c4SChristoph Hellwig */ 222c59d87c4SChristoph Hellwig if ((mp->m_flags & XFS_MOUNT_BARRIER) && 223c59d87c4SChristoph Hellwig mp->m_logdev_targp == mp->m_ddev_targp && 224c59d87c4SChristoph Hellwig !XFS_IS_REALTIME_INODE(ip) && 225c59d87c4SChristoph Hellwig !log_flushed) 226c59d87c4SChristoph Hellwig xfs_blkdev_issue_flush(mp->m_ddev_targp); 227c59d87c4SChristoph Hellwig 228c59d87c4SChristoph Hellwig return -error; 229c59d87c4SChristoph Hellwig } 230c59d87c4SChristoph Hellwig 231c59d87c4SChristoph Hellwig STATIC ssize_t 232b4f5d2c6SAl Viro xfs_file_read_iter( 233c59d87c4SChristoph Hellwig struct kiocb *iocb, 234b4f5d2c6SAl Viro struct iov_iter *to) 235c59d87c4SChristoph Hellwig { 236c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 237c59d87c4SChristoph Hellwig struct inode *inode = file->f_mapping->host; 238c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 239c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 240b4f5d2c6SAl Viro size_t size = iov_iter_count(to); 241c59d87c4SChristoph Hellwig ssize_t ret = 0; 242c59d87c4SChristoph Hellwig int ioflags = 0; 243c59d87c4SChristoph Hellwig xfs_fsize_t n; 244b4f5d2c6SAl Viro loff_t pos = iocb->ki_pos; 245c59d87c4SChristoph Hellwig 246c59d87c4SChristoph Hellwig XFS_STATS_INC(xs_read_calls); 247c59d87c4SChristoph Hellwig 248c59d87c4SChristoph Hellwig if (unlikely(file->f_flags & O_DIRECT)) 249c59d87c4SChristoph Hellwig ioflags |= IO_ISDIRECT; 250c59d87c4SChristoph Hellwig if (file->f_mode & FMODE_NOCMTIME) 251c59d87c4SChristoph Hellwig ioflags |= IO_INVIS; 252c59d87c4SChristoph Hellwig 253c59d87c4SChristoph Hellwig if (unlikely(ioflags & IO_ISDIRECT)) { 254c59d87c4SChristoph Hellwig xfs_buftarg_t *target = 255c59d87c4SChristoph Hellwig XFS_IS_REALTIME_INODE(ip) ? 256c59d87c4SChristoph Hellwig mp->m_rtdev_targp : mp->m_ddev_targp; 2577c71ee78SEric Sandeen /* DIO must be aligned to device logical sector size */ 2587c71ee78SEric Sandeen if ((pos | size) & target->bt_logical_sectormask) { 259fb595814SDave Chinner if (pos == i_size_read(inode)) 260c59d87c4SChristoph Hellwig return 0; 261c59d87c4SChristoph Hellwig return -XFS_ERROR(EINVAL); 262c59d87c4SChristoph Hellwig } 263c59d87c4SChristoph Hellwig } 264c59d87c4SChristoph Hellwig 265fb595814SDave Chinner n = mp->m_super->s_maxbytes - pos; 266c59d87c4SChristoph Hellwig if (n <= 0 || size == 0) 267c59d87c4SChristoph Hellwig return 0; 268c59d87c4SChristoph Hellwig 269c59d87c4SChristoph Hellwig if (n < size) 270c59d87c4SChristoph Hellwig size = n; 271c59d87c4SChristoph Hellwig 272c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 273c59d87c4SChristoph Hellwig return -EIO; 274c59d87c4SChristoph Hellwig 2750c38a251SDave Chinner /* 2760c38a251SDave Chinner * Locking is a bit tricky here. If we take an exclusive lock 2770c38a251SDave Chinner * for direct IO, we effectively serialise all new concurrent 2780c38a251SDave Chinner * read IO to this file and block it behind IO that is currently in 2790c38a251SDave Chinner * progress because IO in progress holds the IO lock shared. We only 2800c38a251SDave Chinner * need to hold the lock exclusive to blow away the page cache, so 2810c38a251SDave Chinner * only take lock exclusively if the page cache needs invalidation. 2820c38a251SDave Chinner * This allows the normal direct IO case of no page cache pages to 2830c38a251SDave Chinner * proceeed concurrently without serialisation. 2840c38a251SDave Chinner */ 2850c38a251SDave Chinner xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); 2860c38a251SDave Chinner if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { 2870c38a251SDave Chinner xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 288c59d87c4SChristoph Hellwig xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); 289c59d87c4SChristoph Hellwig 290c59d87c4SChristoph Hellwig if (inode->i_mapping->nrpages) { 291fb595814SDave Chinner ret = -filemap_write_and_wait_range( 292fb595814SDave Chinner VFS_I(ip)->i_mapping, 293fb595814SDave Chinner pos, -1); 294c59d87c4SChristoph Hellwig if (ret) { 295c59d87c4SChristoph Hellwig xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); 296c59d87c4SChristoph Hellwig return ret; 297c59d87c4SChristoph Hellwig } 298fb595814SDave Chinner truncate_pagecache_range(VFS_I(ip), pos, -1); 299c59d87c4SChristoph Hellwig } 300c59d87c4SChristoph Hellwig xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); 3010c38a251SDave Chinner } 302c59d87c4SChristoph Hellwig 303fb595814SDave Chinner trace_xfs_file_read(ip, size, pos, ioflags); 304c59d87c4SChristoph Hellwig 305b4f5d2c6SAl Viro ret = generic_file_read_iter(iocb, to); 306c59d87c4SChristoph Hellwig if (ret > 0) 307c59d87c4SChristoph Hellwig XFS_STATS_ADD(xs_read_bytes, ret); 308c59d87c4SChristoph Hellwig 309c59d87c4SChristoph Hellwig xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 310c59d87c4SChristoph Hellwig return ret; 311c59d87c4SChristoph Hellwig } 312c59d87c4SChristoph Hellwig 313c59d87c4SChristoph Hellwig STATIC ssize_t 314c59d87c4SChristoph Hellwig xfs_file_splice_read( 315c59d87c4SChristoph Hellwig struct file *infilp, 316c59d87c4SChristoph Hellwig loff_t *ppos, 317c59d87c4SChristoph Hellwig struct pipe_inode_info *pipe, 318c59d87c4SChristoph Hellwig size_t count, 319c59d87c4SChristoph Hellwig unsigned int flags) 320c59d87c4SChristoph Hellwig { 321c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); 322c59d87c4SChristoph Hellwig int ioflags = 0; 323c59d87c4SChristoph Hellwig ssize_t ret; 324c59d87c4SChristoph Hellwig 325c59d87c4SChristoph Hellwig XFS_STATS_INC(xs_read_calls); 326c59d87c4SChristoph Hellwig 327c59d87c4SChristoph Hellwig if (infilp->f_mode & FMODE_NOCMTIME) 328c59d87c4SChristoph Hellwig ioflags |= IO_INVIS; 329c59d87c4SChristoph Hellwig 330c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 331c59d87c4SChristoph Hellwig return -EIO; 332c59d87c4SChristoph Hellwig 333c59d87c4SChristoph Hellwig xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); 334c59d87c4SChristoph Hellwig 335c59d87c4SChristoph Hellwig trace_xfs_file_splice_read(ip, count, *ppos, ioflags); 336c59d87c4SChristoph Hellwig 337c59d87c4SChristoph Hellwig ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 338c59d87c4SChristoph Hellwig if (ret > 0) 339c59d87c4SChristoph Hellwig XFS_STATS_ADD(xs_read_bytes, ret); 340c59d87c4SChristoph Hellwig 341c59d87c4SChristoph Hellwig xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 342c59d87c4SChristoph Hellwig return ret; 343c59d87c4SChristoph Hellwig } 344c59d87c4SChristoph Hellwig 345c59d87c4SChristoph Hellwig /* 346193aec10SChristoph Hellwig * This routine is called to handle zeroing any space in the last block of the 347193aec10SChristoph Hellwig * file that is beyond the EOF. We do this since the size is being increased 348193aec10SChristoph Hellwig * without writing anything to that block and we don't want to read the 349193aec10SChristoph Hellwig * garbage on the disk. 350c59d87c4SChristoph Hellwig */ 351c59d87c4SChristoph Hellwig STATIC int /* error (positive) */ 352c59d87c4SChristoph Hellwig xfs_zero_last_block( 353193aec10SChristoph Hellwig struct xfs_inode *ip, 354c59d87c4SChristoph Hellwig xfs_fsize_t offset, 355c59d87c4SChristoph Hellwig xfs_fsize_t isize) 356c59d87c4SChristoph Hellwig { 357193aec10SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 358193aec10SChristoph Hellwig xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); 359193aec10SChristoph Hellwig int zero_offset = XFS_B_FSB_OFFSET(mp, isize); 360c59d87c4SChristoph Hellwig int zero_len; 361193aec10SChristoph Hellwig int nimaps = 1; 362c59d87c4SChristoph Hellwig int error = 0; 363193aec10SChristoph Hellwig struct xfs_bmbt_irec imap; 364c59d87c4SChristoph Hellwig 365193aec10SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 3665c8ed202SDave Chinner error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); 367193aec10SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 3685c8ed202SDave Chinner if (error) 369c59d87c4SChristoph Hellwig return error; 370193aec10SChristoph Hellwig 371c59d87c4SChristoph Hellwig ASSERT(nimaps > 0); 372193aec10SChristoph Hellwig 373c59d87c4SChristoph Hellwig /* 374c59d87c4SChristoph Hellwig * If the block underlying isize is just a hole, then there 375c59d87c4SChristoph Hellwig * is nothing to zero. 376c59d87c4SChristoph Hellwig */ 377193aec10SChristoph Hellwig if (imap.br_startblock == HOLESTARTBLOCK) 378c59d87c4SChristoph Hellwig return 0; 379c59d87c4SChristoph Hellwig 380c59d87c4SChristoph Hellwig zero_len = mp->m_sb.sb_blocksize - zero_offset; 381c59d87c4SChristoph Hellwig if (isize + zero_len > offset) 382c59d87c4SChristoph Hellwig zero_len = offset - isize; 383193aec10SChristoph Hellwig return xfs_iozero(ip, isize, zero_len); 384c59d87c4SChristoph Hellwig } 385c59d87c4SChristoph Hellwig 386c59d87c4SChristoph Hellwig /* 387193aec10SChristoph Hellwig * Zero any on disk space between the current EOF and the new, larger EOF. 388193aec10SChristoph Hellwig * 389193aec10SChristoph Hellwig * This handles the normal case of zeroing the remainder of the last block in 390193aec10SChristoph Hellwig * the file and the unusual case of zeroing blocks out beyond the size of the 391193aec10SChristoph Hellwig * file. This second case only happens with fixed size extents and when the 392193aec10SChristoph Hellwig * system crashes before the inode size was updated but after blocks were 393193aec10SChristoph Hellwig * allocated. 394193aec10SChristoph Hellwig * 395193aec10SChristoph Hellwig * Expects the iolock to be held exclusive, and will take the ilock internally. 396c59d87c4SChristoph Hellwig */ 397c59d87c4SChristoph Hellwig int /* error (positive) */ 398c59d87c4SChristoph Hellwig xfs_zero_eof( 399193aec10SChristoph Hellwig struct xfs_inode *ip, 400c59d87c4SChristoph Hellwig xfs_off_t offset, /* starting I/O offset */ 401c59d87c4SChristoph Hellwig xfs_fsize_t isize) /* current inode size */ 402c59d87c4SChristoph Hellwig { 403193aec10SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 404c59d87c4SChristoph Hellwig xfs_fileoff_t start_zero_fsb; 405c59d87c4SChristoph Hellwig xfs_fileoff_t end_zero_fsb; 406c59d87c4SChristoph Hellwig xfs_fileoff_t zero_count_fsb; 407c59d87c4SChristoph Hellwig xfs_fileoff_t last_fsb; 408c59d87c4SChristoph Hellwig xfs_fileoff_t zero_off; 409c59d87c4SChristoph Hellwig xfs_fsize_t zero_len; 410c59d87c4SChristoph Hellwig int nimaps; 411c59d87c4SChristoph Hellwig int error = 0; 412193aec10SChristoph Hellwig struct xfs_bmbt_irec imap; 413c59d87c4SChristoph Hellwig 414193aec10SChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 415c59d87c4SChristoph Hellwig ASSERT(offset > isize); 416c59d87c4SChristoph Hellwig 417c59d87c4SChristoph Hellwig /* 418c59d87c4SChristoph Hellwig * First handle zeroing the block on which isize resides. 419193aec10SChristoph Hellwig * 420c59d87c4SChristoph Hellwig * We only zero a part of that block so it is handled specially. 421c59d87c4SChristoph Hellwig */ 422193aec10SChristoph Hellwig if (XFS_B_FSB_OFFSET(mp, isize) != 0) { 423c59d87c4SChristoph Hellwig error = xfs_zero_last_block(ip, offset, isize); 424193aec10SChristoph Hellwig if (error) 425c59d87c4SChristoph Hellwig return error; 426c59d87c4SChristoph Hellwig } 427c59d87c4SChristoph Hellwig 428c59d87c4SChristoph Hellwig /* 429193aec10SChristoph Hellwig * Calculate the range between the new size and the old where blocks 430193aec10SChristoph Hellwig * needing to be zeroed may exist. 431193aec10SChristoph Hellwig * 432193aec10SChristoph Hellwig * To get the block where the last byte in the file currently resides, 433193aec10SChristoph Hellwig * we need to subtract one from the size and truncate back to a block 434193aec10SChristoph Hellwig * boundary. We subtract 1 in case the size is exactly on a block 435193aec10SChristoph Hellwig * boundary. 436c59d87c4SChristoph Hellwig */ 437c59d87c4SChristoph Hellwig last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; 438c59d87c4SChristoph Hellwig start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); 439c59d87c4SChristoph Hellwig end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); 440c59d87c4SChristoph Hellwig ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); 441c59d87c4SChristoph Hellwig if (last_fsb == end_zero_fsb) { 442c59d87c4SChristoph Hellwig /* 443c59d87c4SChristoph Hellwig * The size was only incremented on its last block. 444c59d87c4SChristoph Hellwig * We took care of that above, so just return. 445c59d87c4SChristoph Hellwig */ 446c59d87c4SChristoph Hellwig return 0; 447c59d87c4SChristoph Hellwig } 448c59d87c4SChristoph Hellwig 449c59d87c4SChristoph Hellwig ASSERT(start_zero_fsb <= end_zero_fsb); 450c59d87c4SChristoph Hellwig while (start_zero_fsb <= end_zero_fsb) { 451c59d87c4SChristoph Hellwig nimaps = 1; 452c59d87c4SChristoph Hellwig zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; 453193aec10SChristoph Hellwig 454193aec10SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 4555c8ed202SDave Chinner error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, 4565c8ed202SDave Chinner &imap, &nimaps, 0); 457193aec10SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 458193aec10SChristoph Hellwig if (error) 459c59d87c4SChristoph Hellwig return error; 460193aec10SChristoph Hellwig 461c59d87c4SChristoph Hellwig ASSERT(nimaps > 0); 462c59d87c4SChristoph Hellwig 463c59d87c4SChristoph Hellwig if (imap.br_state == XFS_EXT_UNWRITTEN || 464c59d87c4SChristoph Hellwig imap.br_startblock == HOLESTARTBLOCK) { 465c59d87c4SChristoph Hellwig start_zero_fsb = imap.br_startoff + imap.br_blockcount; 466c59d87c4SChristoph Hellwig ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 467c59d87c4SChristoph Hellwig continue; 468c59d87c4SChristoph Hellwig } 469c59d87c4SChristoph Hellwig 470c59d87c4SChristoph Hellwig /* 471c59d87c4SChristoph Hellwig * There are blocks we need to zero. 472c59d87c4SChristoph Hellwig */ 473c59d87c4SChristoph Hellwig zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); 474c59d87c4SChristoph Hellwig zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); 475c59d87c4SChristoph Hellwig 476c59d87c4SChristoph Hellwig if ((zero_off + zero_len) > offset) 477c59d87c4SChristoph Hellwig zero_len = offset - zero_off; 478c59d87c4SChristoph Hellwig 479c59d87c4SChristoph Hellwig error = xfs_iozero(ip, zero_off, zero_len); 480193aec10SChristoph Hellwig if (error) 481193aec10SChristoph Hellwig return error; 482c59d87c4SChristoph Hellwig 483c59d87c4SChristoph Hellwig start_zero_fsb = imap.br_startoff + imap.br_blockcount; 484c59d87c4SChristoph Hellwig ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); 485c59d87c4SChristoph Hellwig } 486c59d87c4SChristoph Hellwig 487c59d87c4SChristoph Hellwig return 0; 488c59d87c4SChristoph Hellwig } 489c59d87c4SChristoph Hellwig 490c59d87c4SChristoph Hellwig /* 491c59d87c4SChristoph Hellwig * Common pre-write limit and setup checks. 492c59d87c4SChristoph Hellwig * 4935bf1f262SChristoph Hellwig * Called with the iolocked held either shared and exclusive according to 4945bf1f262SChristoph Hellwig * @iolock, and returns with it held. Might upgrade the iolock to exclusive 4955bf1f262SChristoph Hellwig * if called for a direct write beyond i_size. 496c59d87c4SChristoph Hellwig */ 497c59d87c4SChristoph Hellwig STATIC ssize_t 498c59d87c4SChristoph Hellwig xfs_file_aio_write_checks( 499c59d87c4SChristoph Hellwig struct file *file, 500c59d87c4SChristoph Hellwig loff_t *pos, 501c59d87c4SChristoph Hellwig size_t *count, 502c59d87c4SChristoph Hellwig int *iolock) 503c59d87c4SChristoph Hellwig { 504c59d87c4SChristoph Hellwig struct inode *inode = file->f_mapping->host; 505c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 506c59d87c4SChristoph Hellwig int error = 0; 507c59d87c4SChristoph Hellwig 5087271d243SDave Chinner restart: 509c59d87c4SChristoph Hellwig error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); 510467f7899SChristoph Hellwig if (error) 511c59d87c4SChristoph Hellwig return error; 512c59d87c4SChristoph Hellwig 513c59d87c4SChristoph Hellwig /* 514c59d87c4SChristoph Hellwig * If the offset is beyond the size of the file, we need to zero any 515c59d87c4SChristoph Hellwig * blocks that fall between the existing EOF and the start of this 5162813d682SChristoph Hellwig * write. If zeroing is needed and we are currently holding the 517467f7899SChristoph Hellwig * iolock shared, we need to update it to exclusive which implies 518467f7899SChristoph Hellwig * having to redo all checks before. 519c59d87c4SChristoph Hellwig */ 5202813d682SChristoph Hellwig if (*pos > i_size_read(inode)) { 5217271d243SDave Chinner if (*iolock == XFS_IOLOCK_SHARED) { 522467f7899SChristoph Hellwig xfs_rw_iunlock(ip, *iolock); 5237271d243SDave Chinner *iolock = XFS_IOLOCK_EXCL; 524467f7899SChristoph Hellwig xfs_rw_ilock(ip, *iolock); 5257271d243SDave Chinner goto restart; 5267271d243SDave Chinner } 527ce7ae151SChristoph Hellwig error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); 528c59d87c4SChristoph Hellwig if (error) 529c59d87c4SChristoph Hellwig return error; 530467f7899SChristoph Hellwig } 531c59d87c4SChristoph Hellwig 532c59d87c4SChristoph Hellwig /* 5338a9c9980SChristoph Hellwig * Updating the timestamps will grab the ilock again from 5348a9c9980SChristoph Hellwig * xfs_fs_dirty_inode, so we have to call it after dropping the 5358a9c9980SChristoph Hellwig * lock above. Eventually we should look into a way to avoid 5368a9c9980SChristoph Hellwig * the pointless lock roundtrip. 5378a9c9980SChristoph Hellwig */ 538c3b2da31SJosef Bacik if (likely(!(file->f_mode & FMODE_NOCMTIME))) { 539c3b2da31SJosef Bacik error = file_update_time(file); 540c3b2da31SJosef Bacik if (error) 541c3b2da31SJosef Bacik return error; 542c3b2da31SJosef Bacik } 5438a9c9980SChristoph Hellwig 5448a9c9980SChristoph Hellwig /* 545c59d87c4SChristoph Hellwig * If we're writing the file then make sure to clear the setuid and 546c59d87c4SChristoph Hellwig * setgid bits if the process is not being run by root. This keeps 547c59d87c4SChristoph Hellwig * people from modifying setuid and setgid binaries. 548c59d87c4SChristoph Hellwig */ 549c59d87c4SChristoph Hellwig return file_remove_suid(file); 550c59d87c4SChristoph Hellwig } 551c59d87c4SChristoph Hellwig 552c59d87c4SChristoph Hellwig /* 553c59d87c4SChristoph Hellwig * xfs_file_dio_aio_write - handle direct IO writes 554c59d87c4SChristoph Hellwig * 555c59d87c4SChristoph Hellwig * Lock the inode appropriately to prepare for and issue a direct IO write. 556c59d87c4SChristoph Hellwig * By separating it from the buffered write path we remove all the tricky to 557c59d87c4SChristoph Hellwig * follow locking changes and looping. 558c59d87c4SChristoph Hellwig * 559c59d87c4SChristoph Hellwig * If there are cached pages or we're extending the file, we need IOLOCK_EXCL 560c59d87c4SChristoph Hellwig * until we're sure the bytes at the new EOF have been zeroed and/or the cached 561c59d87c4SChristoph Hellwig * pages are flushed out. 562c59d87c4SChristoph Hellwig * 563c59d87c4SChristoph Hellwig * In most cases the direct IO writes will be done holding IOLOCK_SHARED 564c59d87c4SChristoph Hellwig * allowing them to be done in parallel with reads and other direct IO writes. 565c59d87c4SChristoph Hellwig * However, if the IO is not aligned to filesystem blocks, the direct IO layer 566c59d87c4SChristoph Hellwig * needs to do sub-block zeroing and that requires serialisation against other 567c59d87c4SChristoph Hellwig * direct IOs to the same block. In this case we need to serialise the 568c59d87c4SChristoph Hellwig * submission of the unaligned IOs so that we don't get racing block zeroing in 569c59d87c4SChristoph Hellwig * the dio layer. To avoid the problem with aio, we also need to wait for 570c59d87c4SChristoph Hellwig * outstanding IOs to complete so that unwritten extent conversion is completed 571c59d87c4SChristoph Hellwig * before we try to map the overlapping block. This is currently implemented by 5724a06fd26SChristoph Hellwig * hitting it with a big hammer (i.e. inode_dio_wait()). 573c59d87c4SChristoph Hellwig * 574c59d87c4SChristoph Hellwig * Returns with locks held indicated by @iolock and errors indicated by 575c59d87c4SChristoph Hellwig * negative return values. 576c59d87c4SChristoph Hellwig */ 577c59d87c4SChristoph Hellwig STATIC ssize_t 578c59d87c4SChristoph Hellwig xfs_file_dio_aio_write( 579c59d87c4SChristoph Hellwig struct kiocb *iocb, 580b3188919SAl Viro struct iov_iter *from) 581c59d87c4SChristoph Hellwig { 582c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 583c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 584c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 585c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 586c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 587c59d87c4SChristoph Hellwig ssize_t ret = 0; 588c59d87c4SChristoph Hellwig int unaligned_io = 0; 589d0606464SChristoph Hellwig int iolock; 590b3188919SAl Viro size_t count = iov_iter_count(from); 591b3188919SAl Viro loff_t pos = iocb->ki_pos; 592c59d87c4SChristoph Hellwig struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? 593c59d87c4SChristoph Hellwig mp->m_rtdev_targp : mp->m_ddev_targp; 594c59d87c4SChristoph Hellwig 5957c71ee78SEric Sandeen /* DIO must be aligned to device logical sector size */ 5967c71ee78SEric Sandeen if ((pos | count) & target->bt_logical_sectormask) 597c59d87c4SChristoph Hellwig return -XFS_ERROR(EINVAL); 598c59d87c4SChristoph Hellwig 5997c71ee78SEric Sandeen /* "unaligned" here means not aligned to a filesystem block */ 600c59d87c4SChristoph Hellwig if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) 601c59d87c4SChristoph Hellwig unaligned_io = 1; 602c59d87c4SChristoph Hellwig 6037271d243SDave Chinner /* 6047271d243SDave Chinner * We don't need to take an exclusive lock unless there page cache needs 6057271d243SDave Chinner * to be invalidated or unaligned IO is being executed. We don't need to 6067271d243SDave Chinner * consider the EOF extension case here because 6077271d243SDave Chinner * xfs_file_aio_write_checks() will relock the inode as necessary for 6087271d243SDave Chinner * EOF zeroing cases and fill out the new inode size as appropriate. 6097271d243SDave Chinner */ 6107271d243SDave Chinner if (unaligned_io || mapping->nrpages) 611d0606464SChristoph Hellwig iolock = XFS_IOLOCK_EXCL; 612c59d87c4SChristoph Hellwig else 613d0606464SChristoph Hellwig iolock = XFS_IOLOCK_SHARED; 614d0606464SChristoph Hellwig xfs_rw_ilock(ip, iolock); 615c58cb165SChristoph Hellwig 616c58cb165SChristoph Hellwig /* 617c58cb165SChristoph Hellwig * Recheck if there are cached pages that need invalidate after we got 618c58cb165SChristoph Hellwig * the iolock to protect against other threads adding new pages while 619c58cb165SChristoph Hellwig * we were waiting for the iolock. 620c58cb165SChristoph Hellwig */ 621d0606464SChristoph Hellwig if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) { 622d0606464SChristoph Hellwig xfs_rw_iunlock(ip, iolock); 623d0606464SChristoph Hellwig iolock = XFS_IOLOCK_EXCL; 624d0606464SChristoph Hellwig xfs_rw_ilock(ip, iolock); 625c58cb165SChristoph Hellwig } 626c59d87c4SChristoph Hellwig 627d0606464SChristoph Hellwig ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); 628c59d87c4SChristoph Hellwig if (ret) 629d0606464SChristoph Hellwig goto out; 630b3188919SAl Viro iov_iter_truncate(from, count); 631c59d87c4SChristoph Hellwig 632c59d87c4SChristoph Hellwig if (mapping->nrpages) { 63307d5035aSDave Chinner ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 634fb595814SDave Chinner pos, -1); 635c59d87c4SChristoph Hellwig if (ret) 636d0606464SChristoph Hellwig goto out; 637fb595814SDave Chinner truncate_pagecache_range(VFS_I(ip), pos, -1); 638c59d87c4SChristoph Hellwig } 639c59d87c4SChristoph Hellwig 640c59d87c4SChristoph Hellwig /* 641c59d87c4SChristoph Hellwig * If we are doing unaligned IO, wait for all other IO to drain, 642c59d87c4SChristoph Hellwig * otherwise demote the lock if we had to flush cached pages 643c59d87c4SChristoph Hellwig */ 644c59d87c4SChristoph Hellwig if (unaligned_io) 6454a06fd26SChristoph Hellwig inode_dio_wait(inode); 646d0606464SChristoph Hellwig else if (iolock == XFS_IOLOCK_EXCL) { 647c59d87c4SChristoph Hellwig xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); 648d0606464SChristoph Hellwig iolock = XFS_IOLOCK_SHARED; 649c59d87c4SChristoph Hellwig } 650c59d87c4SChristoph Hellwig 651c59d87c4SChristoph Hellwig trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); 652b3188919SAl Viro ret = generic_file_direct_write(iocb, from, pos); 653c59d87c4SChristoph Hellwig 654d0606464SChristoph Hellwig out: 655d0606464SChristoph Hellwig xfs_rw_iunlock(ip, iolock); 656d0606464SChristoph Hellwig 657c59d87c4SChristoph Hellwig /* No fallback to buffered IO on errors for XFS. */ 658c59d87c4SChristoph Hellwig ASSERT(ret < 0 || ret == count); 659c59d87c4SChristoph Hellwig return ret; 660c59d87c4SChristoph Hellwig } 661c59d87c4SChristoph Hellwig 662c59d87c4SChristoph Hellwig STATIC ssize_t 663c59d87c4SChristoph Hellwig xfs_file_buffered_aio_write( 664c59d87c4SChristoph Hellwig struct kiocb *iocb, 665b3188919SAl Viro struct iov_iter *from) 666c59d87c4SChristoph Hellwig { 667c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 668c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 669c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 670c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 671c59d87c4SChristoph Hellwig ssize_t ret; 672c59d87c4SChristoph Hellwig int enospc = 0; 673d0606464SChristoph Hellwig int iolock = XFS_IOLOCK_EXCL; 674b3188919SAl Viro loff_t pos = iocb->ki_pos; 675b3188919SAl Viro size_t count = iov_iter_count(from); 676c59d87c4SChristoph Hellwig 677d0606464SChristoph Hellwig xfs_rw_ilock(ip, iolock); 678c59d87c4SChristoph Hellwig 679d0606464SChristoph Hellwig ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); 680c59d87c4SChristoph Hellwig if (ret) 681d0606464SChristoph Hellwig goto out; 682c59d87c4SChristoph Hellwig 683b3188919SAl Viro iov_iter_truncate(from, count); 684c59d87c4SChristoph Hellwig /* We can write back this queue in page reclaim */ 685c59d87c4SChristoph Hellwig current->backing_dev_info = mapping->backing_dev_info; 686c59d87c4SChristoph Hellwig 687c59d87c4SChristoph Hellwig write_retry: 688c59d87c4SChristoph Hellwig trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); 689b3188919SAl Viro ret = generic_perform_write(file, from, pos); 6900a64bc2cSAl Viro if (likely(ret >= 0)) 6910a64bc2cSAl Viro iocb->ki_pos = pos + ret; 692c59d87c4SChristoph Hellwig /* 6939aa05000SDave Chinner * If we just got an ENOSPC, try to write back all dirty inodes to 6949aa05000SDave Chinner * convert delalloc space to free up some of the excess reserved 6959aa05000SDave Chinner * metadata space. 696c59d87c4SChristoph Hellwig */ 697c59d87c4SChristoph Hellwig if (ret == -ENOSPC && !enospc) { 698c59d87c4SChristoph Hellwig enospc = 1; 6999aa05000SDave Chinner xfs_flush_inodes(ip->i_mount); 700c59d87c4SChristoph Hellwig goto write_retry; 701c59d87c4SChristoph Hellwig } 702d0606464SChristoph Hellwig 703c59d87c4SChristoph Hellwig current->backing_dev_info = NULL; 704d0606464SChristoph Hellwig out: 705d0606464SChristoph Hellwig xfs_rw_iunlock(ip, iolock); 706c59d87c4SChristoph Hellwig return ret; 707c59d87c4SChristoph Hellwig } 708c59d87c4SChristoph Hellwig 709c59d87c4SChristoph Hellwig STATIC ssize_t 710bf97f3bcSAl Viro xfs_file_write_iter( 711c59d87c4SChristoph Hellwig struct kiocb *iocb, 712bf97f3bcSAl Viro struct iov_iter *from) 713c59d87c4SChristoph Hellwig { 714c59d87c4SChristoph Hellwig struct file *file = iocb->ki_filp; 715c59d87c4SChristoph Hellwig struct address_space *mapping = file->f_mapping; 716c59d87c4SChristoph Hellwig struct inode *inode = mapping->host; 717c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 718c59d87c4SChristoph Hellwig ssize_t ret; 719bf97f3bcSAl Viro size_t ocount = iov_iter_count(from); 720c59d87c4SChristoph Hellwig 721c59d87c4SChristoph Hellwig XFS_STATS_INC(xs_write_calls); 722c59d87c4SChristoph Hellwig 723c59d87c4SChristoph Hellwig if (ocount == 0) 724c59d87c4SChristoph Hellwig return 0; 725c59d87c4SChristoph Hellwig 726bf97f3bcSAl Viro if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 727bf97f3bcSAl Viro return -EIO; 728c59d87c4SChristoph Hellwig 729c59d87c4SChristoph Hellwig if (unlikely(file->f_flags & O_DIRECT)) 730bf97f3bcSAl Viro ret = xfs_file_dio_aio_write(iocb, from); 731c59d87c4SChristoph Hellwig else 732bf97f3bcSAl Viro ret = xfs_file_buffered_aio_write(iocb, from); 733c59d87c4SChristoph Hellwig 734d0606464SChristoph Hellwig if (ret > 0) { 735d0606464SChristoph Hellwig ssize_t err; 736c59d87c4SChristoph Hellwig 737ce7ae151SChristoph Hellwig XFS_STATS_ADD(xs_write_bytes, ret); 738ce7ae151SChristoph Hellwig 739c59d87c4SChristoph Hellwig /* Handle various SYNC-type writes */ 740d311d79dSAl Viro err = generic_write_sync(file, iocb->ki_pos - ret, ret); 741d0606464SChristoph Hellwig if (err < 0) 742d0606464SChristoph Hellwig ret = err; 743c59d87c4SChristoph Hellwig } 744c59d87c4SChristoph Hellwig return ret; 745c59d87c4SChristoph Hellwig } 746c59d87c4SChristoph Hellwig 747c59d87c4SChristoph Hellwig STATIC long 748c59d87c4SChristoph Hellwig xfs_file_fallocate( 749c59d87c4SChristoph Hellwig struct file *file, 750c59d87c4SChristoph Hellwig int mode, 751c59d87c4SChristoph Hellwig loff_t offset, 752c59d87c4SChristoph Hellwig loff_t len) 753c59d87c4SChristoph Hellwig { 754496ad9aaSAl Viro struct inode *inode = file_inode(file); 75583aee9e4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 75683aee9e4SChristoph Hellwig struct xfs_trans *tp; 757c59d87c4SChristoph Hellwig long error; 758c59d87c4SChristoph Hellwig loff_t new_size = 0; 759c59d87c4SChristoph Hellwig 76083aee9e4SChristoph Hellwig if (!S_ISREG(inode->i_mode)) 76183aee9e4SChristoph Hellwig return -EINVAL; 762e1d8fb88SNamjae Jeon if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | 763376ba313SLukas Czerner FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) 764c59d87c4SChristoph Hellwig return -EOPNOTSUPP; 765c59d87c4SChristoph Hellwig 766c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_EXCL); 76783aee9e4SChristoph Hellwig if (mode & FALLOC_FL_PUNCH_HOLE) { 76883aee9e4SChristoph Hellwig error = xfs_free_file_space(ip, offset, len); 76983aee9e4SChristoph Hellwig if (error) 77083aee9e4SChristoph Hellwig goto out_unlock; 771e1d8fb88SNamjae Jeon } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 772e1d8fb88SNamjae Jeon unsigned blksize_mask = (1 << inode->i_blkbits) - 1; 773e1d8fb88SNamjae Jeon 774e1d8fb88SNamjae Jeon if (offset & blksize_mask || len & blksize_mask) { 775e1d8fb88SNamjae Jeon error = -EINVAL; 776e1d8fb88SNamjae Jeon goto out_unlock; 777e1d8fb88SNamjae Jeon } 778e1d8fb88SNamjae Jeon 77923fffa92SLukas Czerner /* 78023fffa92SLukas Czerner * There is no need to overlap collapse range with EOF, 78123fffa92SLukas Czerner * in which case it is effectively a truncate operation 78223fffa92SLukas Czerner */ 78323fffa92SLukas Czerner if (offset + len >= i_size_read(inode)) { 78423fffa92SLukas Czerner error = -EINVAL; 78523fffa92SLukas Czerner goto out_unlock; 78623fffa92SLukas Czerner } 78723fffa92SLukas Czerner 788e1d8fb88SNamjae Jeon new_size = i_size_read(inode) - len; 789e1d8fb88SNamjae Jeon 790e1d8fb88SNamjae Jeon error = xfs_collapse_file_space(ip, offset, len); 791e1d8fb88SNamjae Jeon if (error) 792e1d8fb88SNamjae Jeon goto out_unlock; 79383aee9e4SChristoph Hellwig } else { 794c59d87c4SChristoph Hellwig if (!(mode & FALLOC_FL_KEEP_SIZE) && 795c59d87c4SChristoph Hellwig offset + len > i_size_read(inode)) { 796c59d87c4SChristoph Hellwig new_size = offset + len; 79783aee9e4SChristoph Hellwig error = -inode_newsize_ok(inode, new_size); 798c59d87c4SChristoph Hellwig if (error) 799c59d87c4SChristoph Hellwig goto out_unlock; 800c59d87c4SChristoph Hellwig } 801c59d87c4SChristoph Hellwig 802376ba313SLukas Czerner if (mode & FALLOC_FL_ZERO_RANGE) 803376ba313SLukas Czerner error = xfs_zero_file_space(ip, offset, len); 804376ba313SLukas Czerner else 80583aee9e4SChristoph Hellwig error = xfs_alloc_file_space(ip, offset, len, 80683aee9e4SChristoph Hellwig XFS_BMAPI_PREALLOC); 80783aee9e4SChristoph Hellwig if (error) 80883aee9e4SChristoph Hellwig goto out_unlock; 80983aee9e4SChristoph Hellwig } 810c59d87c4SChristoph Hellwig 81183aee9e4SChristoph Hellwig tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); 81283aee9e4SChristoph Hellwig error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); 81383aee9e4SChristoph Hellwig if (error) { 81483aee9e4SChristoph Hellwig xfs_trans_cancel(tp, 0); 81583aee9e4SChristoph Hellwig goto out_unlock; 81683aee9e4SChristoph Hellwig } 81783aee9e4SChristoph Hellwig 81883aee9e4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 81983aee9e4SChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 82083aee9e4SChristoph Hellwig ip->i_d.di_mode &= ~S_ISUID; 82183aee9e4SChristoph Hellwig if (ip->i_d.di_mode & S_IXGRP) 82283aee9e4SChristoph Hellwig ip->i_d.di_mode &= ~S_ISGID; 82383aee9e4SChristoph Hellwig 824e1d8fb88SNamjae Jeon if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) 82583aee9e4SChristoph Hellwig ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 82683aee9e4SChristoph Hellwig 82783aee9e4SChristoph Hellwig xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 82883aee9e4SChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 82983aee9e4SChristoph Hellwig 83083aee9e4SChristoph Hellwig if (file->f_flags & O_DSYNC) 83183aee9e4SChristoph Hellwig xfs_trans_set_sync(tp); 83283aee9e4SChristoph Hellwig error = xfs_trans_commit(tp, 0); 833c59d87c4SChristoph Hellwig if (error) 834c59d87c4SChristoph Hellwig goto out_unlock; 835c59d87c4SChristoph Hellwig 836c59d87c4SChristoph Hellwig /* Change file size if needed */ 837c59d87c4SChristoph Hellwig if (new_size) { 838c59d87c4SChristoph Hellwig struct iattr iattr; 839c59d87c4SChristoph Hellwig 840c59d87c4SChristoph Hellwig iattr.ia_valid = ATTR_SIZE; 841c59d87c4SChristoph Hellwig iattr.ia_size = new_size; 84283aee9e4SChristoph Hellwig error = xfs_setattr_size(ip, &iattr); 843c59d87c4SChristoph Hellwig } 844c59d87c4SChristoph Hellwig 845c59d87c4SChristoph Hellwig out_unlock: 846c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_EXCL); 84783aee9e4SChristoph Hellwig return -error; 848c59d87c4SChristoph Hellwig } 849c59d87c4SChristoph Hellwig 850c59d87c4SChristoph Hellwig 851c59d87c4SChristoph Hellwig STATIC int 852c59d87c4SChristoph Hellwig xfs_file_open( 853c59d87c4SChristoph Hellwig struct inode *inode, 854c59d87c4SChristoph Hellwig struct file *file) 855c59d87c4SChristoph Hellwig { 856c59d87c4SChristoph Hellwig if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 857c59d87c4SChristoph Hellwig return -EFBIG; 858c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) 859c59d87c4SChristoph Hellwig return -EIO; 860c59d87c4SChristoph Hellwig return 0; 861c59d87c4SChristoph Hellwig } 862c59d87c4SChristoph Hellwig 863c59d87c4SChristoph Hellwig STATIC int 864c59d87c4SChristoph Hellwig xfs_dir_open( 865c59d87c4SChristoph Hellwig struct inode *inode, 866c59d87c4SChristoph Hellwig struct file *file) 867c59d87c4SChristoph Hellwig { 868c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 869c59d87c4SChristoph Hellwig int mode; 870c59d87c4SChristoph Hellwig int error; 871c59d87c4SChristoph Hellwig 872c59d87c4SChristoph Hellwig error = xfs_file_open(inode, file); 873c59d87c4SChristoph Hellwig if (error) 874c59d87c4SChristoph Hellwig return error; 875c59d87c4SChristoph Hellwig 876c59d87c4SChristoph Hellwig /* 877c59d87c4SChristoph Hellwig * If there are any blocks, read-ahead block 0 as we're almost 878c59d87c4SChristoph Hellwig * certain to have the next operation be a read there. 879c59d87c4SChristoph Hellwig */ 880309ecac8SChristoph Hellwig mode = xfs_ilock_data_map_shared(ip); 881c59d87c4SChristoph Hellwig if (ip->i_d.di_nextents > 0) 88233363feeSDave Chinner xfs_dir3_data_readahead(NULL, ip, 0, -1); 883c59d87c4SChristoph Hellwig xfs_iunlock(ip, mode); 884c59d87c4SChristoph Hellwig return 0; 885c59d87c4SChristoph Hellwig } 886c59d87c4SChristoph Hellwig 887c59d87c4SChristoph Hellwig STATIC int 888c59d87c4SChristoph Hellwig xfs_file_release( 889c59d87c4SChristoph Hellwig struct inode *inode, 890c59d87c4SChristoph Hellwig struct file *filp) 891c59d87c4SChristoph Hellwig { 892c59d87c4SChristoph Hellwig return -xfs_release(XFS_I(inode)); 893c59d87c4SChristoph Hellwig } 894c59d87c4SChristoph Hellwig 895c59d87c4SChristoph Hellwig STATIC int 896c59d87c4SChristoph Hellwig xfs_file_readdir( 897b8227554SAl Viro struct file *file, 898b8227554SAl Viro struct dir_context *ctx) 899c59d87c4SChristoph Hellwig { 900b8227554SAl Viro struct inode *inode = file_inode(file); 901c59d87c4SChristoph Hellwig xfs_inode_t *ip = XFS_I(inode); 902c59d87c4SChristoph Hellwig int error; 903c59d87c4SChristoph Hellwig size_t bufsize; 904c59d87c4SChristoph Hellwig 905c59d87c4SChristoph Hellwig /* 906c59d87c4SChristoph Hellwig * The Linux API doesn't pass down the total size of the buffer 907c59d87c4SChristoph Hellwig * we read into down to the filesystem. With the filldir concept 908c59d87c4SChristoph Hellwig * it's not needed for correct information, but the XFS dir2 leaf 909c59d87c4SChristoph Hellwig * code wants an estimate of the buffer size to calculate it's 910c59d87c4SChristoph Hellwig * readahead window and size the buffers used for mapping to 911c59d87c4SChristoph Hellwig * physical blocks. 912c59d87c4SChristoph Hellwig * 913c59d87c4SChristoph Hellwig * Try to give it an estimate that's good enough, maybe at some 914c59d87c4SChristoph Hellwig * point we can change the ->readdir prototype to include the 915c59d87c4SChristoph Hellwig * buffer size. For now we use the current glibc buffer size. 916c59d87c4SChristoph Hellwig */ 917c59d87c4SChristoph Hellwig bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); 918c59d87c4SChristoph Hellwig 919b8227554SAl Viro error = xfs_readdir(ip, ctx, bufsize); 920c59d87c4SChristoph Hellwig if (error) 921c59d87c4SChristoph Hellwig return -error; 922c59d87c4SChristoph Hellwig return 0; 923c59d87c4SChristoph Hellwig } 924c59d87c4SChristoph Hellwig 925c59d87c4SChristoph Hellwig STATIC int 926c59d87c4SChristoph Hellwig xfs_file_mmap( 927c59d87c4SChristoph Hellwig struct file *filp, 928c59d87c4SChristoph Hellwig struct vm_area_struct *vma) 929c59d87c4SChristoph Hellwig { 930c59d87c4SChristoph Hellwig vma->vm_ops = &xfs_file_vm_ops; 931c59d87c4SChristoph Hellwig 932c59d87c4SChristoph Hellwig file_accessed(filp); 933c59d87c4SChristoph Hellwig return 0; 934c59d87c4SChristoph Hellwig } 935c59d87c4SChristoph Hellwig 936c59d87c4SChristoph Hellwig /* 937c59d87c4SChristoph Hellwig * mmap()d file has taken write protection fault and is being made 938c59d87c4SChristoph Hellwig * writable. We can set the page state up correctly for a writable 939c59d87c4SChristoph Hellwig * page, which means we can do correct delalloc accounting (ENOSPC 940c59d87c4SChristoph Hellwig * checking!) and unwritten extent mapping. 941c59d87c4SChristoph Hellwig */ 942c59d87c4SChristoph Hellwig STATIC int 943c59d87c4SChristoph Hellwig xfs_vm_page_mkwrite( 944c59d87c4SChristoph Hellwig struct vm_area_struct *vma, 945c59d87c4SChristoph Hellwig struct vm_fault *vmf) 946c59d87c4SChristoph Hellwig { 947c59d87c4SChristoph Hellwig return block_page_mkwrite(vma, vmf, xfs_get_blocks); 948c59d87c4SChristoph Hellwig } 949c59d87c4SChristoph Hellwig 950d126d43fSJeff Liu /* 951d126d43fSJeff Liu * This type is designed to indicate the type of offset we would like 952d126d43fSJeff Liu * to search from page cache for either xfs_seek_data() or xfs_seek_hole(). 953d126d43fSJeff Liu */ 954d126d43fSJeff Liu enum { 955d126d43fSJeff Liu HOLE_OFF = 0, 956d126d43fSJeff Liu DATA_OFF, 957d126d43fSJeff Liu }; 958d126d43fSJeff Liu 959d126d43fSJeff Liu /* 960d126d43fSJeff Liu * Lookup the desired type of offset from the given page. 961d126d43fSJeff Liu * 962d126d43fSJeff Liu * On success, return true and the offset argument will point to the 963d126d43fSJeff Liu * start of the region that was found. Otherwise this function will 964d126d43fSJeff Liu * return false and keep the offset argument unchanged. 965d126d43fSJeff Liu */ 966d126d43fSJeff Liu STATIC bool 967d126d43fSJeff Liu xfs_lookup_buffer_offset( 968d126d43fSJeff Liu struct page *page, 969d126d43fSJeff Liu loff_t *offset, 970d126d43fSJeff Liu unsigned int type) 971d126d43fSJeff Liu { 972d126d43fSJeff Liu loff_t lastoff = page_offset(page); 973d126d43fSJeff Liu bool found = false; 974d126d43fSJeff Liu struct buffer_head *bh, *head; 975d126d43fSJeff Liu 976d126d43fSJeff Liu bh = head = page_buffers(page); 977d126d43fSJeff Liu do { 978d126d43fSJeff Liu /* 979d126d43fSJeff Liu * Unwritten extents that have data in the page 980d126d43fSJeff Liu * cache covering them can be identified by the 981d126d43fSJeff Liu * BH_Unwritten state flag. Pages with multiple 982d126d43fSJeff Liu * buffers might have a mix of holes, data and 983d126d43fSJeff Liu * unwritten extents - any buffer with valid 984d126d43fSJeff Liu * data in it should have BH_Uptodate flag set 985d126d43fSJeff Liu * on it. 986d126d43fSJeff Liu */ 987d126d43fSJeff Liu if (buffer_unwritten(bh) || 988d126d43fSJeff Liu buffer_uptodate(bh)) { 989d126d43fSJeff Liu if (type == DATA_OFF) 990d126d43fSJeff Liu found = true; 991d126d43fSJeff Liu } else { 992d126d43fSJeff Liu if (type == HOLE_OFF) 993d126d43fSJeff Liu found = true; 994d126d43fSJeff Liu } 995d126d43fSJeff Liu 996d126d43fSJeff Liu if (found) { 997d126d43fSJeff Liu *offset = lastoff; 998d126d43fSJeff Liu break; 999d126d43fSJeff Liu } 1000d126d43fSJeff Liu lastoff += bh->b_size; 1001d126d43fSJeff Liu } while ((bh = bh->b_this_page) != head); 1002d126d43fSJeff Liu 1003d126d43fSJeff Liu return found; 1004d126d43fSJeff Liu } 1005d126d43fSJeff Liu 1006d126d43fSJeff Liu /* 1007d126d43fSJeff Liu * This routine is called to find out and return a data or hole offset 1008d126d43fSJeff Liu * from the page cache for unwritten extents according to the desired 1009d126d43fSJeff Liu * type for xfs_seek_data() or xfs_seek_hole(). 1010d126d43fSJeff Liu * 1011d126d43fSJeff Liu * The argument offset is used to tell where we start to search from the 1012d126d43fSJeff Liu * page cache. Map is used to figure out the end points of the range to 1013d126d43fSJeff Liu * lookup pages. 1014d126d43fSJeff Liu * 1015d126d43fSJeff Liu * Return true if the desired type of offset was found, and the argument 1016d126d43fSJeff Liu * offset is filled with that address. Otherwise, return false and keep 1017d126d43fSJeff Liu * offset unchanged. 1018d126d43fSJeff Liu */ 1019d126d43fSJeff Liu STATIC bool 1020d126d43fSJeff Liu xfs_find_get_desired_pgoff( 1021d126d43fSJeff Liu struct inode *inode, 1022d126d43fSJeff Liu struct xfs_bmbt_irec *map, 1023d126d43fSJeff Liu unsigned int type, 1024d126d43fSJeff Liu loff_t *offset) 1025d126d43fSJeff Liu { 1026d126d43fSJeff Liu struct xfs_inode *ip = XFS_I(inode); 1027d126d43fSJeff Liu struct xfs_mount *mp = ip->i_mount; 1028d126d43fSJeff Liu struct pagevec pvec; 1029d126d43fSJeff Liu pgoff_t index; 1030d126d43fSJeff Liu pgoff_t end; 1031d126d43fSJeff Liu loff_t endoff; 1032d126d43fSJeff Liu loff_t startoff = *offset; 1033d126d43fSJeff Liu loff_t lastoff = startoff; 1034d126d43fSJeff Liu bool found = false; 1035d126d43fSJeff Liu 1036d126d43fSJeff Liu pagevec_init(&pvec, 0); 1037d126d43fSJeff Liu 1038d126d43fSJeff Liu index = startoff >> PAGE_CACHE_SHIFT; 1039d126d43fSJeff Liu endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); 1040d126d43fSJeff Liu end = endoff >> PAGE_CACHE_SHIFT; 1041d126d43fSJeff Liu do { 1042d126d43fSJeff Liu int want; 1043d126d43fSJeff Liu unsigned nr_pages; 1044d126d43fSJeff Liu unsigned int i; 1045d126d43fSJeff Liu 1046d126d43fSJeff Liu want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); 1047d126d43fSJeff Liu nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, 1048d126d43fSJeff Liu want); 1049d126d43fSJeff Liu /* 1050d126d43fSJeff Liu * No page mapped into given range. If we are searching holes 1051d126d43fSJeff Liu * and if this is the first time we got into the loop, it means 1052d126d43fSJeff Liu * that the given offset is landed in a hole, return it. 1053d126d43fSJeff Liu * 1054d126d43fSJeff Liu * If we have already stepped through some block buffers to find 1055d126d43fSJeff Liu * holes but they all contains data. In this case, the last 1056d126d43fSJeff Liu * offset is already updated and pointed to the end of the last 1057d126d43fSJeff Liu * mapped page, if it does not reach the endpoint to search, 1058d126d43fSJeff Liu * that means there should be a hole between them. 1059d126d43fSJeff Liu */ 1060d126d43fSJeff Liu if (nr_pages == 0) { 1061d126d43fSJeff Liu /* Data search found nothing */ 1062d126d43fSJeff Liu if (type == DATA_OFF) 1063d126d43fSJeff Liu break; 1064d126d43fSJeff Liu 1065d126d43fSJeff Liu ASSERT(type == HOLE_OFF); 1066d126d43fSJeff Liu if (lastoff == startoff || lastoff < endoff) { 1067d126d43fSJeff Liu found = true; 1068d126d43fSJeff Liu *offset = lastoff; 1069d126d43fSJeff Liu } 1070d126d43fSJeff Liu break; 1071d126d43fSJeff Liu } 1072d126d43fSJeff Liu 1073d126d43fSJeff Liu /* 1074d126d43fSJeff Liu * At lease we found one page. If this is the first time we 1075d126d43fSJeff Liu * step into the loop, and if the first page index offset is 1076d126d43fSJeff Liu * greater than the given search offset, a hole was found. 1077d126d43fSJeff Liu */ 1078d126d43fSJeff Liu if (type == HOLE_OFF && lastoff == startoff && 1079d126d43fSJeff Liu lastoff < page_offset(pvec.pages[0])) { 1080d126d43fSJeff Liu found = true; 1081d126d43fSJeff Liu break; 1082d126d43fSJeff Liu } 1083d126d43fSJeff Liu 1084d126d43fSJeff Liu for (i = 0; i < nr_pages; i++) { 1085d126d43fSJeff Liu struct page *page = pvec.pages[i]; 1086d126d43fSJeff Liu loff_t b_offset; 1087d126d43fSJeff Liu 1088d126d43fSJeff Liu /* 1089d126d43fSJeff Liu * At this point, the page may be truncated or 1090d126d43fSJeff Liu * invalidated (changing page->mapping to NULL), 1091d126d43fSJeff Liu * or even swizzled back from swapper_space to tmpfs 1092d126d43fSJeff Liu * file mapping. However, page->index will not change 1093d126d43fSJeff Liu * because we have a reference on the page. 1094d126d43fSJeff Liu * 1095d126d43fSJeff Liu * Searching done if the page index is out of range. 1096d126d43fSJeff Liu * If the current offset is not reaches the end of 1097d126d43fSJeff Liu * the specified search range, there should be a hole 1098d126d43fSJeff Liu * between them. 1099d126d43fSJeff Liu */ 1100d126d43fSJeff Liu if (page->index > end) { 1101d126d43fSJeff Liu if (type == HOLE_OFF && lastoff < endoff) { 1102d126d43fSJeff Liu *offset = lastoff; 1103d126d43fSJeff Liu found = true; 1104d126d43fSJeff Liu } 1105d126d43fSJeff Liu goto out; 1106d126d43fSJeff Liu } 1107d126d43fSJeff Liu 1108d126d43fSJeff Liu lock_page(page); 1109d126d43fSJeff Liu /* 1110d126d43fSJeff Liu * Page truncated or invalidated(page->mapping == NULL). 1111d126d43fSJeff Liu * We can freely skip it and proceed to check the next 1112d126d43fSJeff Liu * page. 1113d126d43fSJeff Liu */ 1114d126d43fSJeff Liu if (unlikely(page->mapping != inode->i_mapping)) { 1115d126d43fSJeff Liu unlock_page(page); 1116d126d43fSJeff Liu continue; 1117d126d43fSJeff Liu } 1118d126d43fSJeff Liu 1119d126d43fSJeff Liu if (!page_has_buffers(page)) { 1120d126d43fSJeff Liu unlock_page(page); 1121d126d43fSJeff Liu continue; 1122d126d43fSJeff Liu } 1123d126d43fSJeff Liu 1124d126d43fSJeff Liu found = xfs_lookup_buffer_offset(page, &b_offset, type); 1125d126d43fSJeff Liu if (found) { 1126d126d43fSJeff Liu /* 1127d126d43fSJeff Liu * The found offset may be less than the start 1128d126d43fSJeff Liu * point to search if this is the first time to 1129d126d43fSJeff Liu * come here. 1130d126d43fSJeff Liu */ 1131d126d43fSJeff Liu *offset = max_t(loff_t, startoff, b_offset); 1132d126d43fSJeff Liu unlock_page(page); 1133d126d43fSJeff Liu goto out; 1134d126d43fSJeff Liu } 1135d126d43fSJeff Liu 1136d126d43fSJeff Liu /* 1137d126d43fSJeff Liu * We either searching data but nothing was found, or 1138d126d43fSJeff Liu * searching hole but found a data buffer. In either 1139d126d43fSJeff Liu * case, probably the next page contains the desired 1140d126d43fSJeff Liu * things, update the last offset to it so. 1141d126d43fSJeff Liu */ 1142d126d43fSJeff Liu lastoff = page_offset(page) + PAGE_SIZE; 1143d126d43fSJeff Liu unlock_page(page); 1144d126d43fSJeff Liu } 1145d126d43fSJeff Liu 1146d126d43fSJeff Liu /* 1147d126d43fSJeff Liu * The number of returned pages less than our desired, search 1148d126d43fSJeff Liu * done. In this case, nothing was found for searching data, 1149d126d43fSJeff Liu * but we found a hole behind the last offset. 1150d126d43fSJeff Liu */ 1151d126d43fSJeff Liu if (nr_pages < want) { 1152d126d43fSJeff Liu if (type == HOLE_OFF) { 1153d126d43fSJeff Liu *offset = lastoff; 1154d126d43fSJeff Liu found = true; 1155d126d43fSJeff Liu } 1156d126d43fSJeff Liu break; 1157d126d43fSJeff Liu } 1158d126d43fSJeff Liu 1159d126d43fSJeff Liu index = pvec.pages[i - 1]->index + 1; 1160d126d43fSJeff Liu pagevec_release(&pvec); 1161d126d43fSJeff Liu } while (index <= end); 1162d126d43fSJeff Liu 1163d126d43fSJeff Liu out: 1164d126d43fSJeff Liu pagevec_release(&pvec); 1165d126d43fSJeff Liu return found; 1166d126d43fSJeff Liu } 1167d126d43fSJeff Liu 11683fe3e6b1SJeff Liu STATIC loff_t 11693fe3e6b1SJeff Liu xfs_seek_data( 11703fe3e6b1SJeff Liu struct file *file, 1171834ab122SJeff Liu loff_t start) 11723fe3e6b1SJeff Liu { 11733fe3e6b1SJeff Liu struct inode *inode = file->f_mapping->host; 11743fe3e6b1SJeff Liu struct xfs_inode *ip = XFS_I(inode); 11753fe3e6b1SJeff Liu struct xfs_mount *mp = ip->i_mount; 11763fe3e6b1SJeff Liu loff_t uninitialized_var(offset); 11773fe3e6b1SJeff Liu xfs_fsize_t isize; 11783fe3e6b1SJeff Liu xfs_fileoff_t fsbno; 11793fe3e6b1SJeff Liu xfs_filblks_t end; 11803fe3e6b1SJeff Liu uint lock; 11813fe3e6b1SJeff Liu int error; 11823fe3e6b1SJeff Liu 1183309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip); 11843fe3e6b1SJeff Liu 11853fe3e6b1SJeff Liu isize = i_size_read(inode); 11863fe3e6b1SJeff Liu if (start >= isize) { 11873fe3e6b1SJeff Liu error = ENXIO; 11883fe3e6b1SJeff Liu goto out_unlock; 11893fe3e6b1SJeff Liu } 11903fe3e6b1SJeff Liu 11913fe3e6b1SJeff Liu /* 11923fe3e6b1SJeff Liu * Try to read extents from the first block indicated 11933fe3e6b1SJeff Liu * by fsbno to the end block of the file. 11943fe3e6b1SJeff Liu */ 119552f1acc8SJeff Liu fsbno = XFS_B_TO_FSBT(mp, start); 11963fe3e6b1SJeff Liu end = XFS_B_TO_FSB(mp, isize); 119752f1acc8SJeff Liu for (;;) { 119852f1acc8SJeff Liu struct xfs_bmbt_irec map[2]; 119952f1acc8SJeff Liu int nmap = 2; 120052f1acc8SJeff Liu unsigned int i; 12013fe3e6b1SJeff Liu 12023fe3e6b1SJeff Liu error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, 12033fe3e6b1SJeff Liu XFS_BMAPI_ENTIRE); 12043fe3e6b1SJeff Liu if (error) 12053fe3e6b1SJeff Liu goto out_unlock; 12063fe3e6b1SJeff Liu 120752f1acc8SJeff Liu /* No extents at given offset, must be beyond EOF */ 120852f1acc8SJeff Liu if (nmap == 0) { 120952f1acc8SJeff Liu error = ENXIO; 121052f1acc8SJeff Liu goto out_unlock; 121152f1acc8SJeff Liu } 121252f1acc8SJeff Liu 121352f1acc8SJeff Liu for (i = 0; i < nmap; i++) { 12143fe3e6b1SJeff Liu offset = max_t(loff_t, start, 121552f1acc8SJeff Liu XFS_FSB_TO_B(mp, map[i].br_startoff)); 121652f1acc8SJeff Liu 121752f1acc8SJeff Liu /* Landed in a data extent */ 121852f1acc8SJeff Liu if (map[i].br_startblock == DELAYSTARTBLOCK || 121952f1acc8SJeff Liu (map[i].br_state == XFS_EXT_NORM && 122052f1acc8SJeff Liu !isnullstartblock(map[i].br_startblock))) 122152f1acc8SJeff Liu goto out; 122252f1acc8SJeff Liu 122352f1acc8SJeff Liu /* 122452f1acc8SJeff Liu * Landed in an unwritten extent, try to search data 122552f1acc8SJeff Liu * from page cache. 122652f1acc8SJeff Liu */ 122752f1acc8SJeff Liu if (map[i].br_state == XFS_EXT_UNWRITTEN) { 122852f1acc8SJeff Liu if (xfs_find_get_desired_pgoff(inode, &map[i], 122952f1acc8SJeff Liu DATA_OFF, &offset)) 123052f1acc8SJeff Liu goto out; 123152f1acc8SJeff Liu } 123252f1acc8SJeff Liu } 123352f1acc8SJeff Liu 123452f1acc8SJeff Liu /* 123552f1acc8SJeff Liu * map[0] is hole or its an unwritten extent but 123652f1acc8SJeff Liu * without data in page cache. Probably means that 123752f1acc8SJeff Liu * we are reading after EOF if nothing in map[1]. 123852f1acc8SJeff Liu */ 12393fe3e6b1SJeff Liu if (nmap == 1) { 12403fe3e6b1SJeff Liu error = ENXIO; 12413fe3e6b1SJeff Liu goto out_unlock; 12423fe3e6b1SJeff Liu } 12433fe3e6b1SJeff Liu 124452f1acc8SJeff Liu ASSERT(i > 1); 124552f1acc8SJeff Liu 124652f1acc8SJeff Liu /* 124752f1acc8SJeff Liu * Nothing was found, proceed to the next round of search 124852f1acc8SJeff Liu * if reading offset not beyond or hit EOF. 124952f1acc8SJeff Liu */ 125052f1acc8SJeff Liu fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; 125152f1acc8SJeff Liu start = XFS_FSB_TO_B(mp, fsbno); 125252f1acc8SJeff Liu if (start >= isize) { 125352f1acc8SJeff Liu error = ENXIO; 125452f1acc8SJeff Liu goto out_unlock; 125552f1acc8SJeff Liu } 12563fe3e6b1SJeff Liu } 12573fe3e6b1SJeff Liu 125852f1acc8SJeff Liu out: 125946a1c2c7SJie Liu offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 12603fe3e6b1SJeff Liu 12613fe3e6b1SJeff Liu out_unlock: 126201f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 12633fe3e6b1SJeff Liu 12643fe3e6b1SJeff Liu if (error) 12653fe3e6b1SJeff Liu return -error; 12663fe3e6b1SJeff Liu return offset; 12673fe3e6b1SJeff Liu } 12683fe3e6b1SJeff Liu 12693fe3e6b1SJeff Liu STATIC loff_t 12703fe3e6b1SJeff Liu xfs_seek_hole( 12713fe3e6b1SJeff Liu struct file *file, 1272834ab122SJeff Liu loff_t start) 12733fe3e6b1SJeff Liu { 12743fe3e6b1SJeff Liu struct inode *inode = file->f_mapping->host; 12753fe3e6b1SJeff Liu struct xfs_inode *ip = XFS_I(inode); 12763fe3e6b1SJeff Liu struct xfs_mount *mp = ip->i_mount; 12773fe3e6b1SJeff Liu loff_t uninitialized_var(offset); 12783fe3e6b1SJeff Liu xfs_fsize_t isize; 12793fe3e6b1SJeff Liu xfs_fileoff_t fsbno; 1280b686d1f7SJeff Liu xfs_filblks_t end; 12813fe3e6b1SJeff Liu uint lock; 12823fe3e6b1SJeff Liu int error; 12833fe3e6b1SJeff Liu 12843fe3e6b1SJeff Liu if (XFS_FORCED_SHUTDOWN(mp)) 12853fe3e6b1SJeff Liu return -XFS_ERROR(EIO); 12863fe3e6b1SJeff Liu 1287309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip); 12883fe3e6b1SJeff Liu 12893fe3e6b1SJeff Liu isize = i_size_read(inode); 12903fe3e6b1SJeff Liu if (start >= isize) { 12913fe3e6b1SJeff Liu error = ENXIO; 12923fe3e6b1SJeff Liu goto out_unlock; 12933fe3e6b1SJeff Liu } 12943fe3e6b1SJeff Liu 12953fe3e6b1SJeff Liu fsbno = XFS_B_TO_FSBT(mp, start); 1296b686d1f7SJeff Liu end = XFS_B_TO_FSB(mp, isize); 1297b686d1f7SJeff Liu 1298b686d1f7SJeff Liu for (;;) { 1299b686d1f7SJeff Liu struct xfs_bmbt_irec map[2]; 1300b686d1f7SJeff Liu int nmap = 2; 1301b686d1f7SJeff Liu unsigned int i; 1302b686d1f7SJeff Liu 1303b686d1f7SJeff Liu error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, 1304b686d1f7SJeff Liu XFS_BMAPI_ENTIRE); 13053fe3e6b1SJeff Liu if (error) 13063fe3e6b1SJeff Liu goto out_unlock; 13073fe3e6b1SJeff Liu 1308b686d1f7SJeff Liu /* No extents at given offset, must be beyond EOF */ 1309b686d1f7SJeff Liu if (nmap == 0) { 1310b686d1f7SJeff Liu error = ENXIO; 1311b686d1f7SJeff Liu goto out_unlock; 13123fe3e6b1SJeff Liu } 13133fe3e6b1SJeff Liu 1314b686d1f7SJeff Liu for (i = 0; i < nmap; i++) { 1315b686d1f7SJeff Liu offset = max_t(loff_t, start, 1316b686d1f7SJeff Liu XFS_FSB_TO_B(mp, map[i].br_startoff)); 1317b686d1f7SJeff Liu 1318b686d1f7SJeff Liu /* Landed in a hole */ 1319b686d1f7SJeff Liu if (map[i].br_startblock == HOLESTARTBLOCK) 1320b686d1f7SJeff Liu goto out; 1321b686d1f7SJeff Liu 1322b686d1f7SJeff Liu /* 1323b686d1f7SJeff Liu * Landed in an unwritten extent, try to search hole 1324b686d1f7SJeff Liu * from page cache. 1325b686d1f7SJeff Liu */ 1326b686d1f7SJeff Liu if (map[i].br_state == XFS_EXT_UNWRITTEN) { 1327b686d1f7SJeff Liu if (xfs_find_get_desired_pgoff(inode, &map[i], 1328b686d1f7SJeff Liu HOLE_OFF, &offset)) 1329b686d1f7SJeff Liu goto out; 1330b686d1f7SJeff Liu } 1331b686d1f7SJeff Liu } 1332b686d1f7SJeff Liu 1333b686d1f7SJeff Liu /* 1334b686d1f7SJeff Liu * map[0] contains data or its unwritten but contains 1335b686d1f7SJeff Liu * data in page cache, probably means that we are 1336b686d1f7SJeff Liu * reading after EOF. We should fix offset to point 1337b686d1f7SJeff Liu * to the end of the file(i.e., there is an implicit 1338b686d1f7SJeff Liu * hole at the end of any file). 1339b686d1f7SJeff Liu */ 1340b686d1f7SJeff Liu if (nmap == 1) { 1341b686d1f7SJeff Liu offset = isize; 1342b686d1f7SJeff Liu break; 1343b686d1f7SJeff Liu } 1344b686d1f7SJeff Liu 1345b686d1f7SJeff Liu ASSERT(i > 1); 1346b686d1f7SJeff Liu 1347b686d1f7SJeff Liu /* 1348b686d1f7SJeff Liu * Both mappings contains data, proceed to the next round of 1349b686d1f7SJeff Liu * search if the current reading offset not beyond or hit EOF. 1350b686d1f7SJeff Liu */ 1351b686d1f7SJeff Liu fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; 1352b686d1f7SJeff Liu start = XFS_FSB_TO_B(mp, fsbno); 1353b686d1f7SJeff Liu if (start >= isize) { 1354b686d1f7SJeff Liu offset = isize; 1355b686d1f7SJeff Liu break; 1356b686d1f7SJeff Liu } 1357b686d1f7SJeff Liu } 1358b686d1f7SJeff Liu 1359b686d1f7SJeff Liu out: 1360b686d1f7SJeff Liu /* 1361b686d1f7SJeff Liu * At this point, we must have found a hole. However, the returned 1362b686d1f7SJeff Liu * offset may be bigger than the file size as it may be aligned to 1363b686d1f7SJeff Liu * page boundary for unwritten extents, we need to deal with this 1364b686d1f7SJeff Liu * situation in particular. 1365b686d1f7SJeff Liu */ 1366b686d1f7SJeff Liu offset = min_t(loff_t, offset, isize); 136746a1c2c7SJie Liu offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 13683fe3e6b1SJeff Liu 13693fe3e6b1SJeff Liu out_unlock: 137001f4f327SChristoph Hellwig xfs_iunlock(ip, lock); 13713fe3e6b1SJeff Liu 13723fe3e6b1SJeff Liu if (error) 13733fe3e6b1SJeff Liu return -error; 13743fe3e6b1SJeff Liu return offset; 13753fe3e6b1SJeff Liu } 13763fe3e6b1SJeff Liu 13773fe3e6b1SJeff Liu STATIC loff_t 13783fe3e6b1SJeff Liu xfs_file_llseek( 13793fe3e6b1SJeff Liu struct file *file, 13803fe3e6b1SJeff Liu loff_t offset, 13813fe3e6b1SJeff Liu int origin) 13823fe3e6b1SJeff Liu { 13833fe3e6b1SJeff Liu switch (origin) { 13843fe3e6b1SJeff Liu case SEEK_END: 13853fe3e6b1SJeff Liu case SEEK_CUR: 13863fe3e6b1SJeff Liu case SEEK_SET: 13873fe3e6b1SJeff Liu return generic_file_llseek(file, offset, origin); 13883fe3e6b1SJeff Liu case SEEK_DATA: 1389834ab122SJeff Liu return xfs_seek_data(file, offset); 13903fe3e6b1SJeff Liu case SEEK_HOLE: 1391834ab122SJeff Liu return xfs_seek_hole(file, offset); 13923fe3e6b1SJeff Liu default: 13933fe3e6b1SJeff Liu return -EINVAL; 13943fe3e6b1SJeff Liu } 13953fe3e6b1SJeff Liu } 13963fe3e6b1SJeff Liu 1397c59d87c4SChristoph Hellwig const struct file_operations xfs_file_operations = { 13983fe3e6b1SJeff Liu .llseek = xfs_file_llseek, 1399b4f5d2c6SAl Viro .read = new_sync_read, 1400bf97f3bcSAl Viro .write = new_sync_write, 1401b4f5d2c6SAl Viro .read_iter = xfs_file_read_iter, 1402bf97f3bcSAl Viro .write_iter = xfs_file_write_iter, 1403c59d87c4SChristoph Hellwig .splice_read = xfs_file_splice_read, 1404*8d020765SAl Viro .splice_write = iter_file_splice_write, 1405c59d87c4SChristoph Hellwig .unlocked_ioctl = xfs_file_ioctl, 1406c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT 1407c59d87c4SChristoph Hellwig .compat_ioctl = xfs_file_compat_ioctl, 1408c59d87c4SChristoph Hellwig #endif 1409c59d87c4SChristoph Hellwig .mmap = xfs_file_mmap, 1410c59d87c4SChristoph Hellwig .open = xfs_file_open, 1411c59d87c4SChristoph Hellwig .release = xfs_file_release, 1412c59d87c4SChristoph Hellwig .fsync = xfs_file_fsync, 1413c59d87c4SChristoph Hellwig .fallocate = xfs_file_fallocate, 1414c59d87c4SChristoph Hellwig }; 1415c59d87c4SChristoph Hellwig 1416c59d87c4SChristoph Hellwig const struct file_operations xfs_dir_file_operations = { 1417c59d87c4SChristoph Hellwig .open = xfs_dir_open, 1418c59d87c4SChristoph Hellwig .read = generic_read_dir, 1419b8227554SAl Viro .iterate = xfs_file_readdir, 1420c59d87c4SChristoph Hellwig .llseek = generic_file_llseek, 1421c59d87c4SChristoph Hellwig .unlocked_ioctl = xfs_file_ioctl, 1422c59d87c4SChristoph Hellwig #ifdef CONFIG_COMPAT 1423c59d87c4SChristoph Hellwig .compat_ioctl = xfs_file_compat_ioctl, 1424c59d87c4SChristoph Hellwig #endif 14251da2f2dbSChristoph Hellwig .fsync = xfs_dir_fsync, 1426c59d87c4SChristoph Hellwig }; 1427c59d87c4SChristoph Hellwig 1428c59d87c4SChristoph Hellwig static const struct vm_operations_struct xfs_file_vm_ops = { 1429c59d87c4SChristoph Hellwig .fault = filemap_fault, 1430f1820361SKirill A. Shutemov .map_pages = filemap_map_pages, 1431c59d87c4SChristoph Hellwig .page_mkwrite = xfs_vm_page_mkwrite, 14320b173bc4SKonstantin Khlebnikov .remap_pages = generic_file_remap_pages, 1433c59d87c4SChristoph Hellwig }; 1434