1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_trans.h" 15 #include "xfs_inode_item.h" 16 #include "xfs_bmap.h" 17 #include "xfs_bmap_util.h" 18 #include "xfs_dir2.h" 19 #include "xfs_dir2_priv.h" 20 #include "xfs_ioctl.h" 21 #include "xfs_trace.h" 22 #include "xfs_log.h" 23 #include "xfs_icache.h" 24 #include "xfs_pnfs.h" 25 #include "xfs_iomap.h" 26 #include "xfs_reflink.h" 27 28 #include <linux/falloc.h> 29 #include <linux/backing-dev.h> 30 #include <linux/mman.h> 31 #include <linux/fadvise.h> 32 #include <linux/mount.h> 33 34 static const struct vm_operations_struct xfs_file_vm_ops; 35 36 /* 37 * Decide if the given file range is aligned to the size of the fundamental 38 * allocation unit for the file. 39 */ 40 static bool 41 xfs_is_falloc_aligned( 42 struct xfs_inode *ip, 43 loff_t pos, 44 long long int len) 45 { 46 struct xfs_mount *mp = ip->i_mount; 47 uint64_t mask; 48 49 if (XFS_IS_REALTIME_INODE(ip)) { 50 if (!is_power_of_2(mp->m_sb.sb_rextsize)) { 51 u64 rextbytes; 52 u32 mod; 53 54 rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 55 div_u64_rem(pos, rextbytes, &mod); 56 if (mod) 57 return false; 58 div_u64_rem(len, rextbytes, &mod); 59 return mod == 0; 60 } 61 mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1; 62 } else { 63 mask = mp->m_sb.sb_blocksize - 1; 64 } 65 66 return !((pos | len) & mask); 67 } 68 69 int 70 xfs_update_prealloc_flags( 71 struct xfs_inode *ip, 72 enum xfs_prealloc_flags flags) 73 { 74 struct xfs_trans *tp; 75 int error; 76 77 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, 78 0, 0, 0, &tp); 79 if (error) 80 return error; 81 82 xfs_ilock(ip, XFS_ILOCK_EXCL); 83 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 84 85 if (!(flags & XFS_PREALLOC_INVISIBLE)) { 86 VFS_I(ip)->i_mode &= ~S_ISUID; 87 if (VFS_I(ip)->i_mode & S_IXGRP) 88 VFS_I(ip)->i_mode &= ~S_ISGID; 89 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 90 } 91 92 if (flags & XFS_PREALLOC_SET) 93 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 94 if (flags & XFS_PREALLOC_CLEAR) 95 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 96 97 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 98 if (flags & XFS_PREALLOC_SYNC) 99 xfs_trans_set_sync(tp); 100 return xfs_trans_commit(tp); 101 } 102 103 /* 104 * Fsync operations on directories are much simpler than on regular files, 105 * as there is no file data to flush, and thus also no need for explicit 106 * cache flush operations, and there are no non-transaction metadata updates 107 * on directories either. 108 */ 109 STATIC int 110 xfs_dir_fsync( 111 struct file *file, 112 loff_t start, 113 loff_t end, 114 int datasync) 115 { 116 struct xfs_inode *ip = XFS_I(file->f_mapping->host); 117 118 trace_xfs_dir_fsync(ip); 119 return xfs_log_force_inode(ip); 120 } 121 122 STATIC int 123 xfs_file_fsync( 124 struct file *file, 125 loff_t start, 126 loff_t end, 127 int datasync) 128 { 129 struct inode *inode = file->f_mapping->host; 130 struct xfs_inode *ip = XFS_I(inode); 131 struct xfs_inode_log_item *iip = ip->i_itemp; 132 struct xfs_mount *mp = ip->i_mount; 133 int error = 0; 134 int log_flushed = 0; 135 xfs_lsn_t lsn = 0; 136 137 trace_xfs_file_fsync(ip); 138 139 error = file_write_and_wait_range(file, start, end); 140 if (error) 141 return error; 142 143 if (XFS_FORCED_SHUTDOWN(mp)) 144 return -EIO; 145 146 xfs_iflags_clear(ip, XFS_ITRUNCATED); 147 148 /* 149 * If we have an RT and/or log subvolume we need to make sure to flush 150 * the write cache the device used for file data first. This is to 151 * ensure newly written file data make it to disk before logging the new 152 * inode size in case of an extending write. 153 */ 154 if (XFS_IS_REALTIME_INODE(ip)) 155 xfs_blkdev_issue_flush(mp->m_rtdev_targp); 156 else if (mp->m_logdev_targp != mp->m_ddev_targp) 157 xfs_blkdev_issue_flush(mp->m_ddev_targp); 158 159 /* 160 * All metadata updates are logged, which means that we just have to 161 * flush the log up to the latest LSN that touched the inode. If we have 162 * concurrent fsync/fdatasync() calls, we need them to all block on the 163 * log force before we clear the ili_fsync_fields field. This ensures 164 * that we don't get a racing sync operation that does not wait for the 165 * metadata to hit the journal before returning. If we race with 166 * clearing the ili_fsync_fields, then all that will happen is the log 167 * force will do nothing as the lsn will already be on disk. We can't 168 * race with setting ili_fsync_fields because that is done under 169 * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared 170 * until after the ili_fsync_fields is cleared. 171 */ 172 xfs_ilock(ip, XFS_ILOCK_SHARED); 173 if (xfs_ipincount(ip)) { 174 if (!datasync || 175 (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 176 lsn = iip->ili_last_lsn; 177 } 178 179 if (lsn) { 180 error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 181 spin_lock(&iip->ili_lock); 182 iip->ili_fsync_fields = 0; 183 spin_unlock(&iip->ili_lock); 184 } 185 xfs_iunlock(ip, XFS_ILOCK_SHARED); 186 187 /* 188 * If we only have a single device, and the log force about was 189 * a no-op we might have to flush the data device cache here. 190 * This can only happen for fdatasync/O_DSYNC if we were overwriting 191 * an already allocated file and thus do not have any metadata to 192 * commit. 193 */ 194 if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && 195 mp->m_logdev_targp == mp->m_ddev_targp) 196 xfs_blkdev_issue_flush(mp->m_ddev_targp); 197 198 return error; 199 } 200 201 STATIC ssize_t 202 xfs_file_dio_aio_read( 203 struct kiocb *iocb, 204 struct iov_iter *to) 205 { 206 struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 207 size_t count = iov_iter_count(to); 208 ssize_t ret; 209 210 trace_xfs_file_direct_read(ip, count, iocb->ki_pos); 211 212 if (!count) 213 return 0; /* skip atime */ 214 215 file_accessed(iocb->ki_filp); 216 217 if (iocb->ki_flags & IOCB_NOWAIT) { 218 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 219 return -EAGAIN; 220 } else { 221 xfs_ilock(ip, XFS_IOLOCK_SHARED); 222 } 223 ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 224 is_sync_kiocb(iocb)); 225 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 226 227 return ret; 228 } 229 230 static noinline ssize_t 231 xfs_file_dax_read( 232 struct kiocb *iocb, 233 struct iov_iter *to) 234 { 235 struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); 236 size_t count = iov_iter_count(to); 237 ssize_t ret = 0; 238 239 trace_xfs_file_dax_read(ip, count, iocb->ki_pos); 240 241 if (!count) 242 return 0; /* skip atime */ 243 244 if (iocb->ki_flags & IOCB_NOWAIT) { 245 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 246 return -EAGAIN; 247 } else { 248 xfs_ilock(ip, XFS_IOLOCK_SHARED); 249 } 250 251 ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops); 252 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 253 254 file_accessed(iocb->ki_filp); 255 return ret; 256 } 257 258 STATIC ssize_t 259 xfs_file_buffered_aio_read( 260 struct kiocb *iocb, 261 struct iov_iter *to) 262 { 263 struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 264 ssize_t ret; 265 266 trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); 267 268 if (iocb->ki_flags & IOCB_NOWAIT) { 269 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 270 return -EAGAIN; 271 } else { 272 xfs_ilock(ip, XFS_IOLOCK_SHARED); 273 } 274 ret = generic_file_read_iter(iocb, to); 275 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 276 277 return ret; 278 } 279 280 STATIC ssize_t 281 xfs_file_read_iter( 282 struct kiocb *iocb, 283 struct iov_iter *to) 284 { 285 struct inode *inode = file_inode(iocb->ki_filp); 286 struct xfs_mount *mp = XFS_I(inode)->i_mount; 287 ssize_t ret = 0; 288 289 XFS_STATS_INC(mp, xs_read_calls); 290 291 if (XFS_FORCED_SHUTDOWN(mp)) 292 return -EIO; 293 294 if (IS_DAX(inode)) 295 ret = xfs_file_dax_read(iocb, to); 296 else if (iocb->ki_flags & IOCB_DIRECT) 297 ret = xfs_file_dio_aio_read(iocb, to); 298 else 299 ret = xfs_file_buffered_aio_read(iocb, to); 300 301 if (ret > 0) 302 XFS_STATS_ADD(mp, xs_read_bytes, ret); 303 return ret; 304 } 305 306 /* 307 * Common pre-write limit and setup checks. 308 * 309 * Called with the iolocked held either shared and exclusive according to 310 * @iolock, and returns with it held. Might upgrade the iolock to exclusive 311 * if called for a direct write beyond i_size. 312 */ 313 STATIC ssize_t 314 xfs_file_aio_write_checks( 315 struct kiocb *iocb, 316 struct iov_iter *from, 317 int *iolock) 318 { 319 struct file *file = iocb->ki_filp; 320 struct inode *inode = file->f_mapping->host; 321 struct xfs_inode *ip = XFS_I(inode); 322 ssize_t error = 0; 323 size_t count = iov_iter_count(from); 324 bool drained_dio = false; 325 loff_t isize; 326 327 restart: 328 error = generic_write_checks(iocb, from); 329 if (error <= 0) 330 return error; 331 332 error = xfs_break_layouts(inode, iolock, BREAK_WRITE); 333 if (error) 334 return error; 335 336 /* 337 * For changing security info in file_remove_privs() we need i_rwsem 338 * exclusively. 339 */ 340 if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { 341 xfs_iunlock(ip, *iolock); 342 *iolock = XFS_IOLOCK_EXCL; 343 xfs_ilock(ip, *iolock); 344 goto restart; 345 } 346 /* 347 * If the offset is beyond the size of the file, we need to zero any 348 * blocks that fall between the existing EOF and the start of this 349 * write. If zeroing is needed and we are currently holding the 350 * iolock shared, we need to update it to exclusive which implies 351 * having to redo all checks before. 352 * 353 * We need to serialise against EOF updates that occur in IO 354 * completions here. We want to make sure that nobody is changing the 355 * size while we do this check until we have placed an IO barrier (i.e. 356 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. 357 * The spinlock effectively forms a memory barrier once we have the 358 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value 359 * and hence be able to correctly determine if we need to run zeroing. 360 */ 361 spin_lock(&ip->i_flags_lock); 362 isize = i_size_read(inode); 363 if (iocb->ki_pos > isize) { 364 spin_unlock(&ip->i_flags_lock); 365 if (!drained_dio) { 366 if (*iolock == XFS_IOLOCK_SHARED) { 367 xfs_iunlock(ip, *iolock); 368 *iolock = XFS_IOLOCK_EXCL; 369 xfs_ilock(ip, *iolock); 370 iov_iter_reexpand(from, count); 371 } 372 /* 373 * We now have an IO submission barrier in place, but 374 * AIO can do EOF updates during IO completion and hence 375 * we now need to wait for all of them to drain. Non-AIO 376 * DIO will have drained before we are given the 377 * XFS_IOLOCK_EXCL, and so for most cases this wait is a 378 * no-op. 379 */ 380 inode_dio_wait(inode); 381 drained_dio = true; 382 goto restart; 383 } 384 385 trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); 386 error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, 387 NULL, &xfs_buffered_write_iomap_ops); 388 if (error) 389 return error; 390 } else 391 spin_unlock(&ip->i_flags_lock); 392 393 /* 394 * Updating the timestamps will grab the ilock again from 395 * xfs_fs_dirty_inode, so we have to call it after dropping the 396 * lock above. Eventually we should look into a way to avoid 397 * the pointless lock roundtrip. 398 */ 399 return file_modified(file); 400 } 401 402 static int 403 xfs_dio_write_end_io( 404 struct kiocb *iocb, 405 ssize_t size, 406 int error, 407 unsigned flags) 408 { 409 struct inode *inode = file_inode(iocb->ki_filp); 410 struct xfs_inode *ip = XFS_I(inode); 411 loff_t offset = iocb->ki_pos; 412 unsigned int nofs_flag; 413 414 trace_xfs_end_io_direct_write(ip, offset, size); 415 416 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 417 return -EIO; 418 419 if (error) 420 return error; 421 if (!size) 422 return 0; 423 424 /* 425 * Capture amount written on completion as we can't reliably account 426 * for it on submission. 427 */ 428 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); 429 430 /* 431 * We can allocate memory here while doing writeback on behalf of 432 * memory reclaim. To avoid memory allocation deadlocks set the 433 * task-wide nofs context for the following operations. 434 */ 435 nofs_flag = memalloc_nofs_save(); 436 437 if (flags & IOMAP_DIO_COW) { 438 error = xfs_reflink_end_cow(ip, offset, size); 439 if (error) 440 goto out; 441 } 442 443 /* 444 * Unwritten conversion updates the in-core isize after extent 445 * conversion but before updating the on-disk size. Updating isize any 446 * earlier allows a racing dio read to find unwritten extents before 447 * they are converted. 448 */ 449 if (flags & IOMAP_DIO_UNWRITTEN) { 450 error = xfs_iomap_write_unwritten(ip, offset, size, true); 451 goto out; 452 } 453 454 /* 455 * We need to update the in-core inode size here so that we don't end up 456 * with the on-disk inode size being outside the in-core inode size. We 457 * have no other method of updating EOF for AIO, so always do it here 458 * if necessary. 459 * 460 * We need to lock the test/set EOF update as we can be racing with 461 * other IO completions here to update the EOF. Failing to serialise 462 * here can result in EOF moving backwards and Bad Things Happen when 463 * that occurs. 464 */ 465 spin_lock(&ip->i_flags_lock); 466 if (offset + size > i_size_read(inode)) { 467 i_size_write(inode, offset + size); 468 spin_unlock(&ip->i_flags_lock); 469 error = xfs_setfilesize(ip, offset, size); 470 } else { 471 spin_unlock(&ip->i_flags_lock); 472 } 473 474 out: 475 memalloc_nofs_restore(nofs_flag); 476 return error; 477 } 478 479 static const struct iomap_dio_ops xfs_dio_write_ops = { 480 .end_io = xfs_dio_write_end_io, 481 }; 482 483 /* 484 * xfs_file_dio_aio_write - handle direct IO writes 485 * 486 * Lock the inode appropriately to prepare for and issue a direct IO write. 487 * By separating it from the buffered write path we remove all the tricky to 488 * follow locking changes and looping. 489 * 490 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL 491 * until we're sure the bytes at the new EOF have been zeroed and/or the cached 492 * pages are flushed out. 493 * 494 * In most cases the direct IO writes will be done holding IOLOCK_SHARED 495 * allowing them to be done in parallel with reads and other direct IO writes. 496 * However, if the IO is not aligned to filesystem blocks, the direct IO layer 497 * needs to do sub-block zeroing and that requires serialisation against other 498 * direct IOs to the same block. In this case we need to serialise the 499 * submission of the unaligned IOs so that we don't get racing block zeroing in 500 * the dio layer. To avoid the problem with aio, we also need to wait for 501 * outstanding IOs to complete so that unwritten extent conversion is completed 502 * before we try to map the overlapping block. This is currently implemented by 503 * hitting it with a big hammer (i.e. inode_dio_wait()). 504 * 505 * Returns with locks held indicated by @iolock and errors indicated by 506 * negative return values. 507 */ 508 STATIC ssize_t 509 xfs_file_dio_aio_write( 510 struct kiocb *iocb, 511 struct iov_iter *from) 512 { 513 struct file *file = iocb->ki_filp; 514 struct address_space *mapping = file->f_mapping; 515 struct inode *inode = mapping->host; 516 struct xfs_inode *ip = XFS_I(inode); 517 struct xfs_mount *mp = ip->i_mount; 518 ssize_t ret = 0; 519 int unaligned_io = 0; 520 int iolock; 521 size_t count = iov_iter_count(from); 522 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 523 524 /* DIO must be aligned to device logical sector size */ 525 if ((iocb->ki_pos | count) & target->bt_logical_sectormask) 526 return -EINVAL; 527 528 /* 529 * Don't take the exclusive iolock here unless the I/O is unaligned to 530 * the file system block size. We don't need to consider the EOF 531 * extension case here because xfs_file_aio_write_checks() will relock 532 * the inode as necessary for EOF zeroing cases and fill out the new 533 * inode size as appropriate. 534 */ 535 if ((iocb->ki_pos & mp->m_blockmask) || 536 ((iocb->ki_pos + count) & mp->m_blockmask)) { 537 unaligned_io = 1; 538 539 /* 540 * We can't properly handle unaligned direct I/O to reflink 541 * files yet, as we can't unshare a partial block. 542 */ 543 if (xfs_is_cow_inode(ip)) { 544 trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); 545 return -ENOTBLK; 546 } 547 iolock = XFS_IOLOCK_EXCL; 548 } else { 549 iolock = XFS_IOLOCK_SHARED; 550 } 551 552 if (iocb->ki_flags & IOCB_NOWAIT) { 553 /* unaligned dio always waits, bail */ 554 if (unaligned_io) 555 return -EAGAIN; 556 if (!xfs_ilock_nowait(ip, iolock)) 557 return -EAGAIN; 558 } else { 559 xfs_ilock(ip, iolock); 560 } 561 562 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 563 if (ret) 564 goto out; 565 count = iov_iter_count(from); 566 567 /* 568 * If we are doing unaligned IO, we can't allow any other overlapping IO 569 * in-flight at the same time or we risk data corruption. Wait for all 570 * other IO to drain before we submit. If the IO is aligned, demote the 571 * iolock if we had to take the exclusive lock in 572 * xfs_file_aio_write_checks() for other reasons. 573 */ 574 if (unaligned_io) { 575 inode_dio_wait(inode); 576 } else if (iolock == XFS_IOLOCK_EXCL) { 577 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 578 iolock = XFS_IOLOCK_SHARED; 579 } 580 581 trace_xfs_file_direct_write(ip, count, iocb->ki_pos); 582 /* 583 * If unaligned, this is the only IO in-flight. Wait on it before we 584 * release the iolock to prevent subsequent overlapping IO. 585 */ 586 ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, 587 &xfs_dio_write_ops, 588 is_sync_kiocb(iocb) || unaligned_io); 589 out: 590 xfs_iunlock(ip, iolock); 591 592 /* 593 * No fallback to buffered IO after short writes for XFS, direct I/O 594 * will either complete fully or return an error. 595 */ 596 ASSERT(ret < 0 || ret == count); 597 return ret; 598 } 599 600 static noinline ssize_t 601 xfs_file_dax_write( 602 struct kiocb *iocb, 603 struct iov_iter *from) 604 { 605 struct inode *inode = iocb->ki_filp->f_mapping->host; 606 struct xfs_inode *ip = XFS_I(inode); 607 int iolock = XFS_IOLOCK_EXCL; 608 ssize_t ret, error = 0; 609 size_t count; 610 loff_t pos; 611 612 if (iocb->ki_flags & IOCB_NOWAIT) { 613 if (!xfs_ilock_nowait(ip, iolock)) 614 return -EAGAIN; 615 } else { 616 xfs_ilock(ip, iolock); 617 } 618 619 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 620 if (ret) 621 goto out; 622 623 pos = iocb->ki_pos; 624 count = iov_iter_count(from); 625 626 trace_xfs_file_dax_write(ip, count, pos); 627 ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); 628 if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { 629 i_size_write(inode, iocb->ki_pos); 630 error = xfs_setfilesize(ip, pos, ret); 631 } 632 out: 633 xfs_iunlock(ip, iolock); 634 if (error) 635 return error; 636 637 if (ret > 0) { 638 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 639 640 /* Handle various SYNC-type writes */ 641 ret = generic_write_sync(iocb, ret); 642 } 643 return ret; 644 } 645 646 STATIC ssize_t 647 xfs_file_buffered_aio_write( 648 struct kiocb *iocb, 649 struct iov_iter *from) 650 { 651 struct file *file = iocb->ki_filp; 652 struct address_space *mapping = file->f_mapping; 653 struct inode *inode = mapping->host; 654 struct xfs_inode *ip = XFS_I(inode); 655 ssize_t ret; 656 int enospc = 0; 657 int iolock; 658 659 if (iocb->ki_flags & IOCB_NOWAIT) 660 return -EOPNOTSUPP; 661 662 write_retry: 663 iolock = XFS_IOLOCK_EXCL; 664 xfs_ilock(ip, iolock); 665 666 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 667 if (ret) 668 goto out; 669 670 /* We can write back this queue in page reclaim */ 671 current->backing_dev_info = inode_to_bdi(inode); 672 673 trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); 674 ret = iomap_file_buffered_write(iocb, from, 675 &xfs_buffered_write_iomap_ops); 676 if (likely(ret >= 0)) 677 iocb->ki_pos += ret; 678 679 /* 680 * If we hit a space limit, try to free up some lingering preallocated 681 * space before returning an error. In the case of ENOSPC, first try to 682 * write back all dirty inodes to free up some of the excess reserved 683 * metadata space. This reduces the chances that the eofblocks scan 684 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this 685 * also behaves as a filter to prevent too many eofblocks scans from 686 * running at the same time. 687 */ 688 if (ret == -EDQUOT && !enospc) { 689 xfs_iunlock(ip, iolock); 690 enospc = xfs_inode_free_quota_eofblocks(ip); 691 if (enospc) 692 goto write_retry; 693 enospc = xfs_inode_free_quota_cowblocks(ip); 694 if (enospc) 695 goto write_retry; 696 iolock = 0; 697 } else if (ret == -ENOSPC && !enospc) { 698 struct xfs_eofblocks eofb = {0}; 699 700 enospc = 1; 701 xfs_flush_inodes(ip->i_mount); 702 703 xfs_iunlock(ip, iolock); 704 eofb.eof_flags = XFS_EOF_FLAGS_SYNC; 705 xfs_icache_free_eofblocks(ip->i_mount, &eofb); 706 xfs_icache_free_cowblocks(ip->i_mount, &eofb); 707 goto write_retry; 708 } 709 710 current->backing_dev_info = NULL; 711 out: 712 if (iolock) 713 xfs_iunlock(ip, iolock); 714 715 if (ret > 0) { 716 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 717 /* Handle various SYNC-type writes */ 718 ret = generic_write_sync(iocb, ret); 719 } 720 return ret; 721 } 722 723 STATIC ssize_t 724 xfs_file_write_iter( 725 struct kiocb *iocb, 726 struct iov_iter *from) 727 { 728 struct file *file = iocb->ki_filp; 729 struct address_space *mapping = file->f_mapping; 730 struct inode *inode = mapping->host; 731 struct xfs_inode *ip = XFS_I(inode); 732 ssize_t ret; 733 size_t ocount = iov_iter_count(from); 734 735 XFS_STATS_INC(ip->i_mount, xs_write_calls); 736 737 if (ocount == 0) 738 return 0; 739 740 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 741 return -EIO; 742 743 if (IS_DAX(inode)) 744 return xfs_file_dax_write(iocb, from); 745 746 if (iocb->ki_flags & IOCB_DIRECT) { 747 /* 748 * Allow a directio write to fall back to a buffered 749 * write *only* in the case that we're doing a reflink 750 * CoW. In all other directio scenarios we do not 751 * allow an operation to fall back to buffered mode. 752 */ 753 ret = xfs_file_dio_aio_write(iocb, from); 754 if (ret != -ENOTBLK) 755 return ret; 756 } 757 758 return xfs_file_buffered_aio_write(iocb, from); 759 } 760 761 static void 762 xfs_wait_dax_page( 763 struct inode *inode) 764 { 765 struct xfs_inode *ip = XFS_I(inode); 766 767 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 768 schedule(); 769 xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 770 } 771 772 static int 773 xfs_break_dax_layouts( 774 struct inode *inode, 775 bool *retry) 776 { 777 struct page *page; 778 779 ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); 780 781 page = dax_layout_busy_page(inode->i_mapping); 782 if (!page) 783 return 0; 784 785 *retry = true; 786 return ___wait_var_event(&page->_refcount, 787 atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 788 0, 0, xfs_wait_dax_page(inode)); 789 } 790 791 int 792 xfs_break_layouts( 793 struct inode *inode, 794 uint *iolock, 795 enum layout_break_reason reason) 796 { 797 bool retry; 798 int error; 799 800 ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); 801 802 do { 803 retry = false; 804 switch (reason) { 805 case BREAK_UNMAP: 806 error = xfs_break_dax_layouts(inode, &retry); 807 if (error || retry) 808 break; 809 /* fall through */ 810 case BREAK_WRITE: 811 error = xfs_break_leased_layouts(inode, iolock, &retry); 812 break; 813 default: 814 WARN_ON_ONCE(1); 815 error = -EINVAL; 816 } 817 } while (error == 0 && retry); 818 819 return error; 820 } 821 822 #define XFS_FALLOC_FL_SUPPORTED \ 823 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 824 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ 825 FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) 826 827 STATIC long 828 xfs_file_fallocate( 829 struct file *file, 830 int mode, 831 loff_t offset, 832 loff_t len) 833 { 834 struct inode *inode = file_inode(file); 835 struct xfs_inode *ip = XFS_I(inode); 836 long error; 837 enum xfs_prealloc_flags flags = 0; 838 uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 839 loff_t new_size = 0; 840 bool do_file_insert = false; 841 842 if (!S_ISREG(inode->i_mode)) 843 return -EINVAL; 844 if (mode & ~XFS_FALLOC_FL_SUPPORTED) 845 return -EOPNOTSUPP; 846 847 xfs_ilock(ip, iolock); 848 error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); 849 if (error) 850 goto out_unlock; 851 852 /* 853 * Must wait for all AIO to complete before we continue as AIO can 854 * change the file size on completion without holding any locks we 855 * currently hold. We must do this first because AIO can update both 856 * the on disk and in memory inode sizes, and the operations that follow 857 * require the in-memory size to be fully up-to-date. 858 */ 859 inode_dio_wait(inode); 860 861 /* 862 * Now AIO and DIO has drained we flush and (if necessary) invalidate 863 * the cached range over the first operation we are about to run. 864 * 865 * We care about zero and collapse here because they both run a hole 866 * punch over the range first. Because that can zero data, and the range 867 * of invalidation for the shift operations is much larger, we still do 868 * the required flush for collapse in xfs_prepare_shift(). 869 * 870 * Insert has the same range requirements as collapse, and we extend the 871 * file first which can zero data. Hence insert has the same 872 * flush/invalidate requirements as collapse and so they are both 873 * handled at the right time by xfs_prepare_shift(). 874 */ 875 if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE | 876 FALLOC_FL_COLLAPSE_RANGE)) { 877 error = xfs_flush_unmap_range(ip, offset, len); 878 if (error) 879 goto out_unlock; 880 } 881 882 if (mode & FALLOC_FL_PUNCH_HOLE) { 883 error = xfs_free_file_space(ip, offset, len); 884 if (error) 885 goto out_unlock; 886 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 887 if (!xfs_is_falloc_aligned(ip, offset, len)) { 888 error = -EINVAL; 889 goto out_unlock; 890 } 891 892 /* 893 * There is no need to overlap collapse range with EOF, 894 * in which case it is effectively a truncate operation 895 */ 896 if (offset + len >= i_size_read(inode)) { 897 error = -EINVAL; 898 goto out_unlock; 899 } 900 901 new_size = i_size_read(inode) - len; 902 903 error = xfs_collapse_file_space(ip, offset, len); 904 if (error) 905 goto out_unlock; 906 } else if (mode & FALLOC_FL_INSERT_RANGE) { 907 loff_t isize = i_size_read(inode); 908 909 if (!xfs_is_falloc_aligned(ip, offset, len)) { 910 error = -EINVAL; 911 goto out_unlock; 912 } 913 914 /* 915 * New inode size must not exceed ->s_maxbytes, accounting for 916 * possible signed overflow. 917 */ 918 if (inode->i_sb->s_maxbytes - isize < len) { 919 error = -EFBIG; 920 goto out_unlock; 921 } 922 new_size = isize + len; 923 924 /* Offset should be less than i_size */ 925 if (offset >= isize) { 926 error = -EINVAL; 927 goto out_unlock; 928 } 929 do_file_insert = true; 930 } else { 931 flags |= XFS_PREALLOC_SET; 932 933 if (!(mode & FALLOC_FL_KEEP_SIZE) && 934 offset + len > i_size_read(inode)) { 935 new_size = offset + len; 936 error = inode_newsize_ok(inode, new_size); 937 if (error) 938 goto out_unlock; 939 } 940 941 if (mode & FALLOC_FL_ZERO_RANGE) { 942 /* 943 * Punch a hole and prealloc the range. We use a hole 944 * punch rather than unwritten extent conversion for two 945 * reasons: 946 * 947 * 1.) Hole punch handles partial block zeroing for us. 948 * 2.) If prealloc returns ENOSPC, the file range is 949 * still zero-valued by virtue of the hole punch. 950 */ 951 unsigned int blksize = i_blocksize(inode); 952 953 trace_xfs_zero_file_space(ip); 954 955 error = xfs_free_file_space(ip, offset, len); 956 if (error) 957 goto out_unlock; 958 959 len = round_up(offset + len, blksize) - 960 round_down(offset, blksize); 961 offset = round_down(offset, blksize); 962 } else if (mode & FALLOC_FL_UNSHARE_RANGE) { 963 error = xfs_reflink_unshare(ip, offset, len); 964 if (error) 965 goto out_unlock; 966 } else { 967 /* 968 * If always_cow mode we can't use preallocations and 969 * thus should not create them. 970 */ 971 if (xfs_is_always_cow_inode(ip)) { 972 error = -EOPNOTSUPP; 973 goto out_unlock; 974 } 975 } 976 977 if (!xfs_is_always_cow_inode(ip)) { 978 error = xfs_alloc_file_space(ip, offset, len, 979 XFS_BMAPI_PREALLOC); 980 if (error) 981 goto out_unlock; 982 } 983 } 984 985 if (file->f_flags & O_DSYNC) 986 flags |= XFS_PREALLOC_SYNC; 987 988 error = xfs_update_prealloc_flags(ip, flags); 989 if (error) 990 goto out_unlock; 991 992 /* Change file size if needed */ 993 if (new_size) { 994 struct iattr iattr; 995 996 iattr.ia_valid = ATTR_SIZE; 997 iattr.ia_size = new_size; 998 error = xfs_vn_setattr_size(file_mnt_user_ns(file), 999 file_dentry(file), &iattr); 1000 if (error) 1001 goto out_unlock; 1002 } 1003 1004 /* 1005 * Perform hole insertion now that the file size has been 1006 * updated so that if we crash during the operation we don't 1007 * leave shifted extents past EOF and hence losing access to 1008 * the data that is contained within them. 1009 */ 1010 if (do_file_insert) 1011 error = xfs_insert_file_space(ip, offset, len); 1012 1013 out_unlock: 1014 xfs_iunlock(ip, iolock); 1015 return error; 1016 } 1017 1018 STATIC int 1019 xfs_file_fadvise( 1020 struct file *file, 1021 loff_t start, 1022 loff_t end, 1023 int advice) 1024 { 1025 struct xfs_inode *ip = XFS_I(file_inode(file)); 1026 int ret; 1027 int lockflags = 0; 1028 1029 /* 1030 * Operations creating pages in page cache need protection from hole 1031 * punching and similar ops 1032 */ 1033 if (advice == POSIX_FADV_WILLNEED) { 1034 lockflags = XFS_IOLOCK_SHARED; 1035 xfs_ilock(ip, lockflags); 1036 } 1037 ret = generic_fadvise(file, start, end, advice); 1038 if (lockflags) 1039 xfs_iunlock(ip, lockflags); 1040 return ret; 1041 } 1042 1043 /* Does this file, inode, or mount want synchronous writes? */ 1044 static inline bool xfs_file_sync_writes(struct file *filp) 1045 { 1046 struct xfs_inode *ip = XFS_I(file_inode(filp)); 1047 1048 if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC) 1049 return true; 1050 if (filp->f_flags & (__O_SYNC | O_DSYNC)) 1051 return true; 1052 if (IS_SYNC(file_inode(filp))) 1053 return true; 1054 1055 return false; 1056 } 1057 1058 STATIC loff_t 1059 xfs_file_remap_range( 1060 struct file *file_in, 1061 loff_t pos_in, 1062 struct file *file_out, 1063 loff_t pos_out, 1064 loff_t len, 1065 unsigned int remap_flags) 1066 { 1067 struct inode *inode_in = file_inode(file_in); 1068 struct xfs_inode *src = XFS_I(inode_in); 1069 struct inode *inode_out = file_inode(file_out); 1070 struct xfs_inode *dest = XFS_I(inode_out); 1071 struct xfs_mount *mp = src->i_mount; 1072 loff_t remapped = 0; 1073 xfs_extlen_t cowextsize; 1074 int ret; 1075 1076 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 1077 return -EINVAL; 1078 1079 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1080 return -EOPNOTSUPP; 1081 1082 if (XFS_FORCED_SHUTDOWN(mp)) 1083 return -EIO; 1084 1085 /* Prepare and then clone file data. */ 1086 ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, 1087 &len, remap_flags); 1088 if (ret || len == 0) 1089 return ret; 1090 1091 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1092 1093 ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, 1094 &remapped); 1095 if (ret) 1096 goto out_unlock; 1097 1098 /* 1099 * Carry the cowextsize hint from src to dest if we're sharing the 1100 * entire source file to the entire destination file, the source file 1101 * has a cowextsize hint, and the destination file does not. 1102 */ 1103 cowextsize = 0; 1104 if (pos_in == 0 && len == i_size_read(inode_in) && 1105 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && 1106 pos_out == 0 && len >= i_size_read(inode_out) && 1107 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 1108 cowextsize = src->i_d.di_cowextsize; 1109 1110 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, 1111 remap_flags); 1112 if (ret) 1113 goto out_unlock; 1114 1115 if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) 1116 xfs_log_force_inode(dest); 1117 out_unlock: 1118 xfs_iunlock2_io_mmap(src, dest); 1119 if (ret) 1120 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1121 return remapped > 0 ? remapped : ret; 1122 } 1123 1124 STATIC int 1125 xfs_file_open( 1126 struct inode *inode, 1127 struct file *file) 1128 { 1129 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1130 return -EFBIG; 1131 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) 1132 return -EIO; 1133 file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 1134 return 0; 1135 } 1136 1137 STATIC int 1138 xfs_dir_open( 1139 struct inode *inode, 1140 struct file *file) 1141 { 1142 struct xfs_inode *ip = XFS_I(inode); 1143 int mode; 1144 int error; 1145 1146 error = xfs_file_open(inode, file); 1147 if (error) 1148 return error; 1149 1150 /* 1151 * If there are any blocks, read-ahead block 0 as we're almost 1152 * certain to have the next operation be a read there. 1153 */ 1154 mode = xfs_ilock_data_map_shared(ip); 1155 if (ip->i_df.if_nextents > 0) 1156 error = xfs_dir3_data_readahead(ip, 0, 0); 1157 xfs_iunlock(ip, mode); 1158 return error; 1159 } 1160 1161 STATIC int 1162 xfs_file_release( 1163 struct inode *inode, 1164 struct file *filp) 1165 { 1166 return xfs_release(XFS_I(inode)); 1167 } 1168 1169 STATIC int 1170 xfs_file_readdir( 1171 struct file *file, 1172 struct dir_context *ctx) 1173 { 1174 struct inode *inode = file_inode(file); 1175 xfs_inode_t *ip = XFS_I(inode); 1176 size_t bufsize; 1177 1178 /* 1179 * The Linux API doesn't pass down the total size of the buffer 1180 * we read into down to the filesystem. With the filldir concept 1181 * it's not needed for correct information, but the XFS dir2 leaf 1182 * code wants an estimate of the buffer size to calculate it's 1183 * readahead window and size the buffers used for mapping to 1184 * physical blocks. 1185 * 1186 * Try to give it an estimate that's good enough, maybe at some 1187 * point we can change the ->readdir prototype to include the 1188 * buffer size. For now we use the current glibc buffer size. 1189 */ 1190 bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size); 1191 1192 return xfs_readdir(NULL, ip, ctx, bufsize); 1193 } 1194 1195 STATIC loff_t 1196 xfs_file_llseek( 1197 struct file *file, 1198 loff_t offset, 1199 int whence) 1200 { 1201 struct inode *inode = file->f_mapping->host; 1202 1203 if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount)) 1204 return -EIO; 1205 1206 switch (whence) { 1207 default: 1208 return generic_file_llseek(file, offset, whence); 1209 case SEEK_HOLE: 1210 offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops); 1211 break; 1212 case SEEK_DATA: 1213 offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops); 1214 break; 1215 } 1216 1217 if (offset < 0) 1218 return offset; 1219 return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 1220 } 1221 1222 /* 1223 * Locking for serialisation of IO during page faults. This results in a lock 1224 * ordering of: 1225 * 1226 * mmap_lock (MM) 1227 * sb_start_pagefault(vfs, freeze) 1228 * i_mmaplock (XFS - truncate serialisation) 1229 * page_lock (MM) 1230 * i_lock (XFS - extent map serialisation) 1231 */ 1232 static vm_fault_t 1233 __xfs_filemap_fault( 1234 struct vm_fault *vmf, 1235 enum page_entry_size pe_size, 1236 bool write_fault) 1237 { 1238 struct inode *inode = file_inode(vmf->vma->vm_file); 1239 struct xfs_inode *ip = XFS_I(inode); 1240 vm_fault_t ret; 1241 1242 trace_xfs_filemap_fault(ip, pe_size, write_fault); 1243 1244 if (write_fault) { 1245 sb_start_pagefault(inode->i_sb); 1246 file_update_time(vmf->vma->vm_file); 1247 } 1248 1249 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1250 if (IS_DAX(inode)) { 1251 pfn_t pfn; 1252 1253 ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, 1254 (write_fault && !vmf->cow_page) ? 1255 &xfs_direct_write_iomap_ops : 1256 &xfs_read_iomap_ops); 1257 if (ret & VM_FAULT_NEEDDSYNC) 1258 ret = dax_finish_sync_fault(vmf, pe_size, pfn); 1259 } else { 1260 if (write_fault) 1261 ret = iomap_page_mkwrite(vmf, 1262 &xfs_buffered_write_iomap_ops); 1263 else 1264 ret = filemap_fault(vmf); 1265 } 1266 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1267 1268 if (write_fault) 1269 sb_end_pagefault(inode->i_sb); 1270 return ret; 1271 } 1272 1273 static inline bool 1274 xfs_is_write_fault( 1275 struct vm_fault *vmf) 1276 { 1277 return (vmf->flags & FAULT_FLAG_WRITE) && 1278 (vmf->vma->vm_flags & VM_SHARED); 1279 } 1280 1281 static vm_fault_t 1282 xfs_filemap_fault( 1283 struct vm_fault *vmf) 1284 { 1285 /* DAX can shortcut the normal fault path on write faults! */ 1286 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, 1287 IS_DAX(file_inode(vmf->vma->vm_file)) && 1288 xfs_is_write_fault(vmf)); 1289 } 1290 1291 static vm_fault_t 1292 xfs_filemap_huge_fault( 1293 struct vm_fault *vmf, 1294 enum page_entry_size pe_size) 1295 { 1296 if (!IS_DAX(file_inode(vmf->vma->vm_file))) 1297 return VM_FAULT_FALLBACK; 1298 1299 /* DAX can shortcut the normal fault path on write faults! */ 1300 return __xfs_filemap_fault(vmf, pe_size, 1301 xfs_is_write_fault(vmf)); 1302 } 1303 1304 static vm_fault_t 1305 xfs_filemap_page_mkwrite( 1306 struct vm_fault *vmf) 1307 { 1308 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1309 } 1310 1311 /* 1312 * pfn_mkwrite was originally intended to ensure we capture time stamp updates 1313 * on write faults. In reality, it needs to serialise against truncate and 1314 * prepare memory for writing so handle is as standard write fault. 1315 */ 1316 static vm_fault_t 1317 xfs_filemap_pfn_mkwrite( 1318 struct vm_fault *vmf) 1319 { 1320 1321 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1322 } 1323 1324 static void 1325 xfs_filemap_map_pages( 1326 struct vm_fault *vmf, 1327 pgoff_t start_pgoff, 1328 pgoff_t end_pgoff) 1329 { 1330 struct inode *inode = file_inode(vmf->vma->vm_file); 1331 1332 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1333 filemap_map_pages(vmf, start_pgoff, end_pgoff); 1334 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1335 } 1336 1337 static const struct vm_operations_struct xfs_file_vm_ops = { 1338 .fault = xfs_filemap_fault, 1339 .huge_fault = xfs_filemap_huge_fault, 1340 .map_pages = xfs_filemap_map_pages, 1341 .page_mkwrite = xfs_filemap_page_mkwrite, 1342 .pfn_mkwrite = xfs_filemap_pfn_mkwrite, 1343 }; 1344 1345 STATIC int 1346 xfs_file_mmap( 1347 struct file *file, 1348 struct vm_area_struct *vma) 1349 { 1350 struct inode *inode = file_inode(file); 1351 struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode)); 1352 1353 /* 1354 * We don't support synchronous mappings for non-DAX files and 1355 * for DAX files if underneath dax_device is not synchronous. 1356 */ 1357 if (!daxdev_mapping_supported(vma, target->bt_daxdev)) 1358 return -EOPNOTSUPP; 1359 1360 file_accessed(file); 1361 vma->vm_ops = &xfs_file_vm_ops; 1362 if (IS_DAX(inode)) 1363 vma->vm_flags |= VM_HUGEPAGE; 1364 return 0; 1365 } 1366 1367 const struct file_operations xfs_file_operations = { 1368 .llseek = xfs_file_llseek, 1369 .read_iter = xfs_file_read_iter, 1370 .write_iter = xfs_file_write_iter, 1371 .splice_read = generic_file_splice_read, 1372 .splice_write = iter_file_splice_write, 1373 .iopoll = iomap_dio_iopoll, 1374 .unlocked_ioctl = xfs_file_ioctl, 1375 #ifdef CONFIG_COMPAT 1376 .compat_ioctl = xfs_file_compat_ioctl, 1377 #endif 1378 .mmap = xfs_file_mmap, 1379 .mmap_supported_flags = MAP_SYNC, 1380 .open = xfs_file_open, 1381 .release = xfs_file_release, 1382 .fsync = xfs_file_fsync, 1383 .get_unmapped_area = thp_get_unmapped_area, 1384 .fallocate = xfs_file_fallocate, 1385 .fadvise = xfs_file_fadvise, 1386 .remap_file_range = xfs_file_remap_range, 1387 }; 1388 1389 const struct file_operations xfs_dir_file_operations = { 1390 .open = xfs_dir_open, 1391 .read = generic_read_dir, 1392 .iterate_shared = xfs_file_readdir, 1393 .llseek = generic_file_llseek, 1394 .unlocked_ioctl = xfs_file_ioctl, 1395 #ifdef CONFIG_COMPAT 1396 .compat_ioctl = xfs_file_compat_ioctl, 1397 #endif 1398 .fsync = xfs_dir_fsync, 1399 }; 1400