1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_trans.h" 15 #include "xfs_inode_item.h" 16 #include "xfs_bmap.h" 17 #include "xfs_bmap_util.h" 18 #include "xfs_dir2.h" 19 #include "xfs_dir2_priv.h" 20 #include "xfs_ioctl.h" 21 #include "xfs_trace.h" 22 #include "xfs_log.h" 23 #include "xfs_icache.h" 24 #include "xfs_pnfs.h" 25 #include "xfs_iomap.h" 26 #include "xfs_reflink.h" 27 28 #include <linux/falloc.h> 29 #include <linux/backing-dev.h> 30 #include <linux/mman.h> 31 #include <linux/fadvise.h> 32 33 static const struct vm_operations_struct xfs_file_vm_ops; 34 35 /* 36 * Decide if the given file range is aligned to the size of the fundamental 37 * allocation unit for the file. 38 */ 39 static bool 40 xfs_is_falloc_aligned( 41 struct xfs_inode *ip, 42 loff_t pos, 43 long long int len) 44 { 45 struct xfs_mount *mp = ip->i_mount; 46 uint64_t mask; 47 48 if (XFS_IS_REALTIME_INODE(ip)) { 49 if (!is_power_of_2(mp->m_sb.sb_rextsize)) { 50 u64 rextbytes; 51 u32 mod; 52 53 rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 54 div_u64_rem(pos, rextbytes, &mod); 55 if (mod) 56 return false; 57 div_u64_rem(len, rextbytes, &mod); 58 return mod == 0; 59 } 60 mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1; 61 } else { 62 mask = mp->m_sb.sb_blocksize - 1; 63 } 64 65 return !((pos | len) & mask); 66 } 67 68 int 69 xfs_update_prealloc_flags( 70 struct xfs_inode *ip, 71 enum xfs_prealloc_flags flags) 72 { 73 struct xfs_trans *tp; 74 int error; 75 76 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, 77 0, 0, 0, &tp); 78 if (error) 79 return error; 80 81 xfs_ilock(ip, XFS_ILOCK_EXCL); 82 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 83 84 if (!(flags & XFS_PREALLOC_INVISIBLE)) { 85 VFS_I(ip)->i_mode &= ~S_ISUID; 86 if (VFS_I(ip)->i_mode & S_IXGRP) 87 VFS_I(ip)->i_mode &= ~S_ISGID; 88 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 89 } 90 91 if (flags & XFS_PREALLOC_SET) 92 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 93 if (flags & XFS_PREALLOC_CLEAR) 94 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 95 96 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 97 if (flags & XFS_PREALLOC_SYNC) 98 xfs_trans_set_sync(tp); 99 return xfs_trans_commit(tp); 100 } 101 102 /* 103 * Fsync operations on directories are much simpler than on regular files, 104 * as there is no file data to flush, and thus also no need for explicit 105 * cache flush operations, and there are no non-transaction metadata updates 106 * on directories either. 107 */ 108 STATIC int 109 xfs_dir_fsync( 110 struct file *file, 111 loff_t start, 112 loff_t end, 113 int datasync) 114 { 115 struct xfs_inode *ip = XFS_I(file->f_mapping->host); 116 117 trace_xfs_dir_fsync(ip); 118 return xfs_log_force_inode(ip); 119 } 120 121 STATIC int 122 xfs_file_fsync( 123 struct file *file, 124 loff_t start, 125 loff_t end, 126 int datasync) 127 { 128 struct inode *inode = file->f_mapping->host; 129 struct xfs_inode *ip = XFS_I(inode); 130 struct xfs_inode_log_item *iip = ip->i_itemp; 131 struct xfs_mount *mp = ip->i_mount; 132 int error = 0; 133 int log_flushed = 0; 134 xfs_lsn_t lsn = 0; 135 136 trace_xfs_file_fsync(ip); 137 138 error = file_write_and_wait_range(file, start, end); 139 if (error) 140 return error; 141 142 if (XFS_FORCED_SHUTDOWN(mp)) 143 return -EIO; 144 145 xfs_iflags_clear(ip, XFS_ITRUNCATED); 146 147 /* 148 * If we have an RT and/or log subvolume we need to make sure to flush 149 * the write cache the device used for file data first. This is to 150 * ensure newly written file data make it to disk before logging the new 151 * inode size in case of an extending write. 152 */ 153 if (XFS_IS_REALTIME_INODE(ip)) 154 xfs_blkdev_issue_flush(mp->m_rtdev_targp); 155 else if (mp->m_logdev_targp != mp->m_ddev_targp) 156 xfs_blkdev_issue_flush(mp->m_ddev_targp); 157 158 /* 159 * All metadata updates are logged, which means that we just have to 160 * flush the log up to the latest LSN that touched the inode. If we have 161 * concurrent fsync/fdatasync() calls, we need them to all block on the 162 * log force before we clear the ili_fsync_fields field. This ensures 163 * that we don't get a racing sync operation that does not wait for the 164 * metadata to hit the journal before returning. If we race with 165 * clearing the ili_fsync_fields, then all that will happen is the log 166 * force will do nothing as the lsn will already be on disk. We can't 167 * race with setting ili_fsync_fields because that is done under 168 * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared 169 * until after the ili_fsync_fields is cleared. 170 */ 171 xfs_ilock(ip, XFS_ILOCK_SHARED); 172 if (xfs_ipincount(ip)) { 173 if (!datasync || 174 (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 175 lsn = iip->ili_last_lsn; 176 } 177 178 if (lsn) { 179 error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 180 spin_lock(&iip->ili_lock); 181 iip->ili_fsync_fields = 0; 182 spin_unlock(&iip->ili_lock); 183 } 184 xfs_iunlock(ip, XFS_ILOCK_SHARED); 185 186 /* 187 * If we only have a single device, and the log force about was 188 * a no-op we might have to flush the data device cache here. 189 * This can only happen for fdatasync/O_DSYNC if we were overwriting 190 * an already allocated file and thus do not have any metadata to 191 * commit. 192 */ 193 if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && 194 mp->m_logdev_targp == mp->m_ddev_targp) 195 xfs_blkdev_issue_flush(mp->m_ddev_targp); 196 197 return error; 198 } 199 200 STATIC ssize_t 201 xfs_file_dio_aio_read( 202 struct kiocb *iocb, 203 struct iov_iter *to) 204 { 205 struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 206 size_t count = iov_iter_count(to); 207 ssize_t ret; 208 209 trace_xfs_file_direct_read(ip, count, iocb->ki_pos); 210 211 if (!count) 212 return 0; /* skip atime */ 213 214 file_accessed(iocb->ki_filp); 215 216 if (iocb->ki_flags & IOCB_NOWAIT) { 217 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 218 return -EAGAIN; 219 } else { 220 xfs_ilock(ip, XFS_IOLOCK_SHARED); 221 } 222 ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0); 223 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 224 225 return ret; 226 } 227 228 static noinline ssize_t 229 xfs_file_dax_read( 230 struct kiocb *iocb, 231 struct iov_iter *to) 232 { 233 struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); 234 size_t count = iov_iter_count(to); 235 ssize_t ret = 0; 236 237 trace_xfs_file_dax_read(ip, count, iocb->ki_pos); 238 239 if (!count) 240 return 0; /* skip atime */ 241 242 if (iocb->ki_flags & IOCB_NOWAIT) { 243 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 244 return -EAGAIN; 245 } else { 246 xfs_ilock(ip, XFS_IOLOCK_SHARED); 247 } 248 249 ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops); 250 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 251 252 file_accessed(iocb->ki_filp); 253 return ret; 254 } 255 256 STATIC ssize_t 257 xfs_file_buffered_aio_read( 258 struct kiocb *iocb, 259 struct iov_iter *to) 260 { 261 struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 262 ssize_t ret; 263 264 trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); 265 266 if (iocb->ki_flags & IOCB_NOWAIT) { 267 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 268 return -EAGAIN; 269 } else { 270 xfs_ilock(ip, XFS_IOLOCK_SHARED); 271 } 272 ret = generic_file_read_iter(iocb, to); 273 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 274 275 return ret; 276 } 277 278 STATIC ssize_t 279 xfs_file_read_iter( 280 struct kiocb *iocb, 281 struct iov_iter *to) 282 { 283 struct inode *inode = file_inode(iocb->ki_filp); 284 struct xfs_mount *mp = XFS_I(inode)->i_mount; 285 ssize_t ret = 0; 286 287 XFS_STATS_INC(mp, xs_read_calls); 288 289 if (XFS_FORCED_SHUTDOWN(mp)) 290 return -EIO; 291 292 if (IS_DAX(inode)) 293 ret = xfs_file_dax_read(iocb, to); 294 else if (iocb->ki_flags & IOCB_DIRECT) 295 ret = xfs_file_dio_aio_read(iocb, to); 296 else 297 ret = xfs_file_buffered_aio_read(iocb, to); 298 299 if (ret > 0) 300 XFS_STATS_ADD(mp, xs_read_bytes, ret); 301 return ret; 302 } 303 304 /* 305 * Common pre-write limit and setup checks. 306 * 307 * Called with the iolocked held either shared and exclusive according to 308 * @iolock, and returns with it held. Might upgrade the iolock to exclusive 309 * if called for a direct write beyond i_size. 310 */ 311 STATIC ssize_t 312 xfs_file_aio_write_checks( 313 struct kiocb *iocb, 314 struct iov_iter *from, 315 int *iolock) 316 { 317 struct file *file = iocb->ki_filp; 318 struct inode *inode = file->f_mapping->host; 319 struct xfs_inode *ip = XFS_I(inode); 320 ssize_t error = 0; 321 size_t count = iov_iter_count(from); 322 bool drained_dio = false; 323 loff_t isize; 324 325 restart: 326 error = generic_write_checks(iocb, from); 327 if (error <= 0) 328 return error; 329 330 error = xfs_break_layouts(inode, iolock, BREAK_WRITE); 331 if (error) 332 return error; 333 334 /* 335 * For changing security info in file_remove_privs() we need i_rwsem 336 * exclusively. 337 */ 338 if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { 339 xfs_iunlock(ip, *iolock); 340 *iolock = XFS_IOLOCK_EXCL; 341 xfs_ilock(ip, *iolock); 342 goto restart; 343 } 344 /* 345 * If the offset is beyond the size of the file, we need to zero any 346 * blocks that fall between the existing EOF and the start of this 347 * write. If zeroing is needed and we are currently holding the 348 * iolock shared, we need to update it to exclusive which implies 349 * having to redo all checks before. 350 * 351 * We need to serialise against EOF updates that occur in IO 352 * completions here. We want to make sure that nobody is changing the 353 * size while we do this check until we have placed an IO barrier (i.e. 354 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. 355 * The spinlock effectively forms a memory barrier once we have the 356 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value 357 * and hence be able to correctly determine if we need to run zeroing. 358 */ 359 spin_lock(&ip->i_flags_lock); 360 isize = i_size_read(inode); 361 if (iocb->ki_pos > isize) { 362 spin_unlock(&ip->i_flags_lock); 363 if (!drained_dio) { 364 if (*iolock == XFS_IOLOCK_SHARED) { 365 xfs_iunlock(ip, *iolock); 366 *iolock = XFS_IOLOCK_EXCL; 367 xfs_ilock(ip, *iolock); 368 iov_iter_reexpand(from, count); 369 } 370 /* 371 * We now have an IO submission barrier in place, but 372 * AIO can do EOF updates during IO completion and hence 373 * we now need to wait for all of them to drain. Non-AIO 374 * DIO will have drained before we are given the 375 * XFS_IOLOCK_EXCL, and so for most cases this wait is a 376 * no-op. 377 */ 378 inode_dio_wait(inode); 379 drained_dio = true; 380 goto restart; 381 } 382 383 trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); 384 error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, 385 NULL, &xfs_buffered_write_iomap_ops); 386 if (error) 387 return error; 388 } else 389 spin_unlock(&ip->i_flags_lock); 390 391 /* 392 * Updating the timestamps will grab the ilock again from 393 * xfs_fs_dirty_inode, so we have to call it after dropping the 394 * lock above. Eventually we should look into a way to avoid 395 * the pointless lock roundtrip. 396 */ 397 return file_modified(file); 398 } 399 400 static int 401 xfs_dio_write_end_io( 402 struct kiocb *iocb, 403 ssize_t size, 404 int error, 405 unsigned flags) 406 { 407 struct inode *inode = file_inode(iocb->ki_filp); 408 struct xfs_inode *ip = XFS_I(inode); 409 loff_t offset = iocb->ki_pos; 410 unsigned int nofs_flag; 411 412 trace_xfs_end_io_direct_write(ip, offset, size); 413 414 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 415 return -EIO; 416 417 if (error) 418 return error; 419 if (!size) 420 return 0; 421 422 /* 423 * Capture amount written on completion as we can't reliably account 424 * for it on submission. 425 */ 426 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); 427 428 /* 429 * We can allocate memory here while doing writeback on behalf of 430 * memory reclaim. To avoid memory allocation deadlocks set the 431 * task-wide nofs context for the following operations. 432 */ 433 nofs_flag = memalloc_nofs_save(); 434 435 if (flags & IOMAP_DIO_COW) { 436 error = xfs_reflink_end_cow(ip, offset, size); 437 if (error) 438 goto out; 439 } 440 441 /* 442 * Unwritten conversion updates the in-core isize after extent 443 * conversion but before updating the on-disk size. Updating isize any 444 * earlier allows a racing dio read to find unwritten extents before 445 * they are converted. 446 */ 447 if (flags & IOMAP_DIO_UNWRITTEN) { 448 error = xfs_iomap_write_unwritten(ip, offset, size, true); 449 goto out; 450 } 451 452 /* 453 * We need to update the in-core inode size here so that we don't end up 454 * with the on-disk inode size being outside the in-core inode size. We 455 * have no other method of updating EOF for AIO, so always do it here 456 * if necessary. 457 * 458 * We need to lock the test/set EOF update as we can be racing with 459 * other IO completions here to update the EOF. Failing to serialise 460 * here can result in EOF moving backwards and Bad Things Happen when 461 * that occurs. 462 */ 463 spin_lock(&ip->i_flags_lock); 464 if (offset + size > i_size_read(inode)) { 465 i_size_write(inode, offset + size); 466 spin_unlock(&ip->i_flags_lock); 467 error = xfs_setfilesize(ip, offset, size); 468 } else { 469 spin_unlock(&ip->i_flags_lock); 470 } 471 472 out: 473 memalloc_nofs_restore(nofs_flag); 474 return error; 475 } 476 477 static const struct iomap_dio_ops xfs_dio_write_ops = { 478 .end_io = xfs_dio_write_end_io, 479 }; 480 481 /* 482 * xfs_file_dio_aio_write - handle direct IO writes 483 * 484 * Lock the inode appropriately to prepare for and issue a direct IO write. 485 * By separating it from the buffered write path we remove all the tricky to 486 * follow locking changes and looping. 487 * 488 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL 489 * until we're sure the bytes at the new EOF have been zeroed and/or the cached 490 * pages are flushed out. 491 * 492 * In most cases the direct IO writes will be done holding IOLOCK_SHARED 493 * allowing them to be done in parallel with reads and other direct IO writes. 494 * However, if the IO is not aligned to filesystem blocks, the direct IO layer 495 * needs to do sub-block zeroing and that requires serialisation against other 496 * direct IOs to the same block. In this case we need to serialise the 497 * submission of the unaligned IOs so that we don't get racing block zeroing in 498 * the dio layer. To avoid the problem with aio, we also need to wait for 499 * outstanding IOs to complete so that unwritten extent conversion is completed 500 * before we try to map the overlapping block. This is currently implemented by 501 * hitting it with a big hammer (i.e. inode_dio_wait()). 502 * 503 * Returns with locks held indicated by @iolock and errors indicated by 504 * negative return values. 505 */ 506 STATIC ssize_t 507 xfs_file_dio_aio_write( 508 struct kiocb *iocb, 509 struct iov_iter *from) 510 { 511 struct file *file = iocb->ki_filp; 512 struct address_space *mapping = file->f_mapping; 513 struct inode *inode = mapping->host; 514 struct xfs_inode *ip = XFS_I(inode); 515 struct xfs_mount *mp = ip->i_mount; 516 ssize_t ret = 0; 517 int unaligned_io = 0; 518 int iolock; 519 size_t count = iov_iter_count(from); 520 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 521 522 /* DIO must be aligned to device logical sector size */ 523 if ((iocb->ki_pos | count) & target->bt_logical_sectormask) 524 return -EINVAL; 525 526 /* 527 * Don't take the exclusive iolock here unless the I/O is unaligned to 528 * the file system block size. We don't need to consider the EOF 529 * extension case here because xfs_file_aio_write_checks() will relock 530 * the inode as necessary for EOF zeroing cases and fill out the new 531 * inode size as appropriate. 532 */ 533 if ((iocb->ki_pos & mp->m_blockmask) || 534 ((iocb->ki_pos + count) & mp->m_blockmask)) { 535 unaligned_io = 1; 536 537 /* 538 * We can't properly handle unaligned direct I/O to reflink 539 * files yet, as we can't unshare a partial block. 540 */ 541 if (xfs_is_cow_inode(ip)) { 542 trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); 543 return -ENOTBLK; 544 } 545 iolock = XFS_IOLOCK_EXCL; 546 } else { 547 iolock = XFS_IOLOCK_SHARED; 548 } 549 550 if (iocb->ki_flags & IOCB_NOWAIT) { 551 /* unaligned dio always waits, bail */ 552 if (unaligned_io) 553 return -EAGAIN; 554 if (!xfs_ilock_nowait(ip, iolock)) 555 return -EAGAIN; 556 } else { 557 xfs_ilock(ip, iolock); 558 } 559 560 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 561 if (ret) 562 goto out; 563 count = iov_iter_count(from); 564 565 /* 566 * If we are doing unaligned IO, we can't allow any other overlapping IO 567 * in-flight at the same time or we risk data corruption. Wait for all 568 * other IO to drain before we submit. If the IO is aligned, demote the 569 * iolock if we had to take the exclusive lock in 570 * xfs_file_aio_write_checks() for other reasons. 571 */ 572 if (unaligned_io) { 573 inode_dio_wait(inode); 574 } else if (iolock == XFS_IOLOCK_EXCL) { 575 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 576 iolock = XFS_IOLOCK_SHARED; 577 } 578 579 trace_xfs_file_direct_write(ip, count, iocb->ki_pos); 580 /* 581 * If unaligned, this is the only IO in-flight. Wait on it before we 582 * release the iolock to prevent subsequent overlapping IO. 583 */ 584 ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, 585 &xfs_dio_write_ops, 586 unaligned_io ? IOMAP_DIO_FORCE_WAIT : 0); 587 out: 588 xfs_iunlock(ip, iolock); 589 590 /* 591 * No fallback to buffered IO after short writes for XFS, direct I/O 592 * will either complete fully or return an error. 593 */ 594 ASSERT(ret < 0 || ret == count); 595 return ret; 596 } 597 598 static noinline ssize_t 599 xfs_file_dax_write( 600 struct kiocb *iocb, 601 struct iov_iter *from) 602 { 603 struct inode *inode = iocb->ki_filp->f_mapping->host; 604 struct xfs_inode *ip = XFS_I(inode); 605 int iolock = XFS_IOLOCK_EXCL; 606 ssize_t ret, error = 0; 607 size_t count; 608 loff_t pos; 609 610 if (iocb->ki_flags & IOCB_NOWAIT) { 611 if (!xfs_ilock_nowait(ip, iolock)) 612 return -EAGAIN; 613 } else { 614 xfs_ilock(ip, iolock); 615 } 616 617 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 618 if (ret) 619 goto out; 620 621 pos = iocb->ki_pos; 622 count = iov_iter_count(from); 623 624 trace_xfs_file_dax_write(ip, count, pos); 625 ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); 626 if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { 627 i_size_write(inode, iocb->ki_pos); 628 error = xfs_setfilesize(ip, pos, ret); 629 } 630 out: 631 xfs_iunlock(ip, iolock); 632 if (error) 633 return error; 634 635 if (ret > 0) { 636 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 637 638 /* Handle various SYNC-type writes */ 639 ret = generic_write_sync(iocb, ret); 640 } 641 return ret; 642 } 643 644 STATIC ssize_t 645 xfs_file_buffered_aio_write( 646 struct kiocb *iocb, 647 struct iov_iter *from) 648 { 649 struct file *file = iocb->ki_filp; 650 struct address_space *mapping = file->f_mapping; 651 struct inode *inode = mapping->host; 652 struct xfs_inode *ip = XFS_I(inode); 653 ssize_t ret; 654 int enospc = 0; 655 int iolock; 656 657 if (iocb->ki_flags & IOCB_NOWAIT) 658 return -EOPNOTSUPP; 659 660 write_retry: 661 iolock = XFS_IOLOCK_EXCL; 662 xfs_ilock(ip, iolock); 663 664 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 665 if (ret) 666 goto out; 667 668 /* We can write back this queue in page reclaim */ 669 current->backing_dev_info = inode_to_bdi(inode); 670 671 trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); 672 ret = iomap_file_buffered_write(iocb, from, 673 &xfs_buffered_write_iomap_ops); 674 if (likely(ret >= 0)) 675 iocb->ki_pos += ret; 676 677 /* 678 * If we hit a space limit, try to free up some lingering preallocated 679 * space before returning an error. In the case of ENOSPC, first try to 680 * write back all dirty inodes to free up some of the excess reserved 681 * metadata space. This reduces the chances that the eofblocks scan 682 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this 683 * also behaves as a filter to prevent too many eofblocks scans from 684 * running at the same time. 685 */ 686 if (ret == -EDQUOT && !enospc) { 687 xfs_iunlock(ip, iolock); 688 enospc = xfs_inode_free_quota_eofblocks(ip); 689 if (enospc) 690 goto write_retry; 691 enospc = xfs_inode_free_quota_cowblocks(ip); 692 if (enospc) 693 goto write_retry; 694 iolock = 0; 695 } else if (ret == -ENOSPC && !enospc) { 696 struct xfs_eofblocks eofb = {0}; 697 698 enospc = 1; 699 xfs_flush_inodes(ip->i_mount); 700 701 xfs_iunlock(ip, iolock); 702 eofb.eof_flags = XFS_EOF_FLAGS_SYNC; 703 xfs_icache_free_eofblocks(ip->i_mount, &eofb); 704 xfs_icache_free_cowblocks(ip->i_mount, &eofb); 705 goto write_retry; 706 } 707 708 current->backing_dev_info = NULL; 709 out: 710 if (iolock) 711 xfs_iunlock(ip, iolock); 712 713 if (ret > 0) { 714 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 715 /* Handle various SYNC-type writes */ 716 ret = generic_write_sync(iocb, ret); 717 } 718 return ret; 719 } 720 721 STATIC ssize_t 722 xfs_file_write_iter( 723 struct kiocb *iocb, 724 struct iov_iter *from) 725 { 726 struct file *file = iocb->ki_filp; 727 struct address_space *mapping = file->f_mapping; 728 struct inode *inode = mapping->host; 729 struct xfs_inode *ip = XFS_I(inode); 730 ssize_t ret; 731 size_t ocount = iov_iter_count(from); 732 733 XFS_STATS_INC(ip->i_mount, xs_write_calls); 734 735 if (ocount == 0) 736 return 0; 737 738 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 739 return -EIO; 740 741 if (IS_DAX(inode)) 742 return xfs_file_dax_write(iocb, from); 743 744 if (iocb->ki_flags & IOCB_DIRECT) { 745 /* 746 * Allow a directio write to fall back to a buffered 747 * write *only* in the case that we're doing a reflink 748 * CoW. In all other directio scenarios we do not 749 * allow an operation to fall back to buffered mode. 750 */ 751 ret = xfs_file_dio_aio_write(iocb, from); 752 if (ret != -ENOTBLK) 753 return ret; 754 } 755 756 return xfs_file_buffered_aio_write(iocb, from); 757 } 758 759 static void 760 xfs_wait_dax_page( 761 struct inode *inode) 762 { 763 struct xfs_inode *ip = XFS_I(inode); 764 765 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 766 schedule(); 767 xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 768 } 769 770 static int 771 xfs_break_dax_layouts( 772 struct inode *inode, 773 bool *retry) 774 { 775 struct page *page; 776 777 ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); 778 779 page = dax_layout_busy_page(inode->i_mapping); 780 if (!page) 781 return 0; 782 783 *retry = true; 784 return ___wait_var_event(&page->_refcount, 785 atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 786 0, 0, xfs_wait_dax_page(inode)); 787 } 788 789 int 790 xfs_break_layouts( 791 struct inode *inode, 792 uint *iolock, 793 enum layout_break_reason reason) 794 { 795 bool retry; 796 int error; 797 798 ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); 799 800 do { 801 retry = false; 802 switch (reason) { 803 case BREAK_UNMAP: 804 error = xfs_break_dax_layouts(inode, &retry); 805 if (error || retry) 806 break; 807 /* fall through */ 808 case BREAK_WRITE: 809 error = xfs_break_leased_layouts(inode, iolock, &retry); 810 break; 811 default: 812 WARN_ON_ONCE(1); 813 error = -EINVAL; 814 } 815 } while (error == 0 && retry); 816 817 return error; 818 } 819 820 #define XFS_FALLOC_FL_SUPPORTED \ 821 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 822 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ 823 FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) 824 825 STATIC long 826 xfs_file_fallocate( 827 struct file *file, 828 int mode, 829 loff_t offset, 830 loff_t len) 831 { 832 struct inode *inode = file_inode(file); 833 struct xfs_inode *ip = XFS_I(inode); 834 long error; 835 enum xfs_prealloc_flags flags = 0; 836 uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 837 loff_t new_size = 0; 838 bool do_file_insert = false; 839 840 if (!S_ISREG(inode->i_mode)) 841 return -EINVAL; 842 if (mode & ~XFS_FALLOC_FL_SUPPORTED) 843 return -EOPNOTSUPP; 844 845 xfs_ilock(ip, iolock); 846 error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); 847 if (error) 848 goto out_unlock; 849 850 /* 851 * Must wait for all AIO to complete before we continue as AIO can 852 * change the file size on completion without holding any locks we 853 * currently hold. We must do this first because AIO can update both 854 * the on disk and in memory inode sizes, and the operations that follow 855 * require the in-memory size to be fully up-to-date. 856 */ 857 inode_dio_wait(inode); 858 859 /* 860 * Now AIO and DIO has drained we flush and (if necessary) invalidate 861 * the cached range over the first operation we are about to run. 862 * 863 * We care about zero and collapse here because they both run a hole 864 * punch over the range first. Because that can zero data, and the range 865 * of invalidation for the shift operations is much larger, we still do 866 * the required flush for collapse in xfs_prepare_shift(). 867 * 868 * Insert has the same range requirements as collapse, and we extend the 869 * file first which can zero data. Hence insert has the same 870 * flush/invalidate requirements as collapse and so they are both 871 * handled at the right time by xfs_prepare_shift(). 872 */ 873 if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE | 874 FALLOC_FL_COLLAPSE_RANGE)) { 875 error = xfs_flush_unmap_range(ip, offset, len); 876 if (error) 877 goto out_unlock; 878 } 879 880 if (mode & FALLOC_FL_PUNCH_HOLE) { 881 error = xfs_free_file_space(ip, offset, len); 882 if (error) 883 goto out_unlock; 884 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 885 if (!xfs_is_falloc_aligned(ip, offset, len)) { 886 error = -EINVAL; 887 goto out_unlock; 888 } 889 890 /* 891 * There is no need to overlap collapse range with EOF, 892 * in which case it is effectively a truncate operation 893 */ 894 if (offset + len >= i_size_read(inode)) { 895 error = -EINVAL; 896 goto out_unlock; 897 } 898 899 new_size = i_size_read(inode) - len; 900 901 error = xfs_collapse_file_space(ip, offset, len); 902 if (error) 903 goto out_unlock; 904 } else if (mode & FALLOC_FL_INSERT_RANGE) { 905 loff_t isize = i_size_read(inode); 906 907 if (!xfs_is_falloc_aligned(ip, offset, len)) { 908 error = -EINVAL; 909 goto out_unlock; 910 } 911 912 /* 913 * New inode size must not exceed ->s_maxbytes, accounting for 914 * possible signed overflow. 915 */ 916 if (inode->i_sb->s_maxbytes - isize < len) { 917 error = -EFBIG; 918 goto out_unlock; 919 } 920 new_size = isize + len; 921 922 /* Offset should be less than i_size */ 923 if (offset >= isize) { 924 error = -EINVAL; 925 goto out_unlock; 926 } 927 do_file_insert = true; 928 } else { 929 flags |= XFS_PREALLOC_SET; 930 931 if (!(mode & FALLOC_FL_KEEP_SIZE) && 932 offset + len > i_size_read(inode)) { 933 new_size = offset + len; 934 error = inode_newsize_ok(inode, new_size); 935 if (error) 936 goto out_unlock; 937 } 938 939 if (mode & FALLOC_FL_ZERO_RANGE) { 940 /* 941 * Punch a hole and prealloc the range. We use a hole 942 * punch rather than unwritten extent conversion for two 943 * reasons: 944 * 945 * 1.) Hole punch handles partial block zeroing for us. 946 * 2.) If prealloc returns ENOSPC, the file range is 947 * still zero-valued by virtue of the hole punch. 948 */ 949 unsigned int blksize = i_blocksize(inode); 950 951 trace_xfs_zero_file_space(ip); 952 953 error = xfs_free_file_space(ip, offset, len); 954 if (error) 955 goto out_unlock; 956 957 len = round_up(offset + len, blksize) - 958 round_down(offset, blksize); 959 offset = round_down(offset, blksize); 960 } else if (mode & FALLOC_FL_UNSHARE_RANGE) { 961 error = xfs_reflink_unshare(ip, offset, len); 962 if (error) 963 goto out_unlock; 964 } else { 965 /* 966 * If always_cow mode we can't use preallocations and 967 * thus should not create them. 968 */ 969 if (xfs_is_always_cow_inode(ip)) { 970 error = -EOPNOTSUPP; 971 goto out_unlock; 972 } 973 } 974 975 if (!xfs_is_always_cow_inode(ip)) { 976 error = xfs_alloc_file_space(ip, offset, len, 977 XFS_BMAPI_PREALLOC); 978 if (error) 979 goto out_unlock; 980 } 981 } 982 983 if (file->f_flags & O_DSYNC) 984 flags |= XFS_PREALLOC_SYNC; 985 986 error = xfs_update_prealloc_flags(ip, flags); 987 if (error) 988 goto out_unlock; 989 990 /* Change file size if needed */ 991 if (new_size) { 992 struct iattr iattr; 993 994 iattr.ia_valid = ATTR_SIZE; 995 iattr.ia_size = new_size; 996 error = xfs_vn_setattr_size(file_dentry(file), &iattr); 997 if (error) 998 goto out_unlock; 999 } 1000 1001 /* 1002 * Perform hole insertion now that the file size has been 1003 * updated so that if we crash during the operation we don't 1004 * leave shifted extents past EOF and hence losing access to 1005 * the data that is contained within them. 1006 */ 1007 if (do_file_insert) 1008 error = xfs_insert_file_space(ip, offset, len); 1009 1010 out_unlock: 1011 xfs_iunlock(ip, iolock); 1012 return error; 1013 } 1014 1015 STATIC int 1016 xfs_file_fadvise( 1017 struct file *file, 1018 loff_t start, 1019 loff_t end, 1020 int advice) 1021 { 1022 struct xfs_inode *ip = XFS_I(file_inode(file)); 1023 int ret; 1024 int lockflags = 0; 1025 1026 /* 1027 * Operations creating pages in page cache need protection from hole 1028 * punching and similar ops 1029 */ 1030 if (advice == POSIX_FADV_WILLNEED) { 1031 lockflags = XFS_IOLOCK_SHARED; 1032 xfs_ilock(ip, lockflags); 1033 } 1034 ret = generic_fadvise(file, start, end, advice); 1035 if (lockflags) 1036 xfs_iunlock(ip, lockflags); 1037 return ret; 1038 } 1039 1040 /* Does this file, inode, or mount want synchronous writes? */ 1041 static inline bool xfs_file_sync_writes(struct file *filp) 1042 { 1043 struct xfs_inode *ip = XFS_I(file_inode(filp)); 1044 1045 if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC) 1046 return true; 1047 if (filp->f_flags & (__O_SYNC | O_DSYNC)) 1048 return true; 1049 if (IS_SYNC(file_inode(filp))) 1050 return true; 1051 1052 return false; 1053 } 1054 1055 STATIC loff_t 1056 xfs_file_remap_range( 1057 struct file *file_in, 1058 loff_t pos_in, 1059 struct file *file_out, 1060 loff_t pos_out, 1061 loff_t len, 1062 unsigned int remap_flags) 1063 { 1064 struct inode *inode_in = file_inode(file_in); 1065 struct xfs_inode *src = XFS_I(inode_in); 1066 struct inode *inode_out = file_inode(file_out); 1067 struct xfs_inode *dest = XFS_I(inode_out); 1068 struct xfs_mount *mp = src->i_mount; 1069 loff_t remapped = 0; 1070 xfs_extlen_t cowextsize; 1071 int ret; 1072 1073 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 1074 return -EINVAL; 1075 1076 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1077 return -EOPNOTSUPP; 1078 1079 if (XFS_FORCED_SHUTDOWN(mp)) 1080 return -EIO; 1081 1082 /* Prepare and then clone file data. */ 1083 ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, 1084 &len, remap_flags); 1085 if (ret || len == 0) 1086 return ret; 1087 1088 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1089 1090 ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, 1091 &remapped); 1092 if (ret) 1093 goto out_unlock; 1094 1095 /* 1096 * Carry the cowextsize hint from src to dest if we're sharing the 1097 * entire source file to the entire destination file, the source file 1098 * has a cowextsize hint, and the destination file does not. 1099 */ 1100 cowextsize = 0; 1101 if (pos_in == 0 && len == i_size_read(inode_in) && 1102 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && 1103 pos_out == 0 && len >= i_size_read(inode_out) && 1104 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 1105 cowextsize = src->i_d.di_cowextsize; 1106 1107 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, 1108 remap_flags); 1109 if (ret) 1110 goto out_unlock; 1111 1112 if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) 1113 xfs_log_force_inode(dest); 1114 out_unlock: 1115 xfs_iunlock2_io_mmap(src, dest); 1116 if (ret) 1117 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1118 return remapped > 0 ? remapped : ret; 1119 } 1120 1121 STATIC int 1122 xfs_file_open( 1123 struct inode *inode, 1124 struct file *file) 1125 { 1126 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1127 return -EFBIG; 1128 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) 1129 return -EIO; 1130 file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 1131 return 0; 1132 } 1133 1134 STATIC int 1135 xfs_dir_open( 1136 struct inode *inode, 1137 struct file *file) 1138 { 1139 struct xfs_inode *ip = XFS_I(inode); 1140 int mode; 1141 int error; 1142 1143 error = xfs_file_open(inode, file); 1144 if (error) 1145 return error; 1146 1147 /* 1148 * If there are any blocks, read-ahead block 0 as we're almost 1149 * certain to have the next operation be a read there. 1150 */ 1151 mode = xfs_ilock_data_map_shared(ip); 1152 if (ip->i_df.if_nextents > 0) 1153 error = xfs_dir3_data_readahead(ip, 0, 0); 1154 xfs_iunlock(ip, mode); 1155 return error; 1156 } 1157 1158 STATIC int 1159 xfs_file_release( 1160 struct inode *inode, 1161 struct file *filp) 1162 { 1163 return xfs_release(XFS_I(inode)); 1164 } 1165 1166 STATIC int 1167 xfs_file_readdir( 1168 struct file *file, 1169 struct dir_context *ctx) 1170 { 1171 struct inode *inode = file_inode(file); 1172 xfs_inode_t *ip = XFS_I(inode); 1173 size_t bufsize; 1174 1175 /* 1176 * The Linux API doesn't pass down the total size of the buffer 1177 * we read into down to the filesystem. With the filldir concept 1178 * it's not needed for correct information, but the XFS dir2 leaf 1179 * code wants an estimate of the buffer size to calculate it's 1180 * readahead window and size the buffers used for mapping to 1181 * physical blocks. 1182 * 1183 * Try to give it an estimate that's good enough, maybe at some 1184 * point we can change the ->readdir prototype to include the 1185 * buffer size. For now we use the current glibc buffer size. 1186 */ 1187 bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size); 1188 1189 return xfs_readdir(NULL, ip, ctx, bufsize); 1190 } 1191 1192 STATIC loff_t 1193 xfs_file_llseek( 1194 struct file *file, 1195 loff_t offset, 1196 int whence) 1197 { 1198 struct inode *inode = file->f_mapping->host; 1199 1200 if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount)) 1201 return -EIO; 1202 1203 switch (whence) { 1204 default: 1205 return generic_file_llseek(file, offset, whence); 1206 case SEEK_HOLE: 1207 offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops); 1208 break; 1209 case SEEK_DATA: 1210 offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops); 1211 break; 1212 } 1213 1214 if (offset < 0) 1215 return offset; 1216 return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 1217 } 1218 1219 /* 1220 * Locking for serialisation of IO during page faults. This results in a lock 1221 * ordering of: 1222 * 1223 * mmap_lock (MM) 1224 * sb_start_pagefault(vfs, freeze) 1225 * i_mmaplock (XFS - truncate serialisation) 1226 * page_lock (MM) 1227 * i_lock (XFS - extent map serialisation) 1228 */ 1229 static vm_fault_t 1230 __xfs_filemap_fault( 1231 struct vm_fault *vmf, 1232 enum page_entry_size pe_size, 1233 bool write_fault) 1234 { 1235 struct inode *inode = file_inode(vmf->vma->vm_file); 1236 struct xfs_inode *ip = XFS_I(inode); 1237 vm_fault_t ret; 1238 1239 trace_xfs_filemap_fault(ip, pe_size, write_fault); 1240 1241 if (write_fault) { 1242 sb_start_pagefault(inode->i_sb); 1243 file_update_time(vmf->vma->vm_file); 1244 } 1245 1246 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1247 if (IS_DAX(inode)) { 1248 pfn_t pfn; 1249 1250 ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, 1251 (write_fault && !vmf->cow_page) ? 1252 &xfs_direct_write_iomap_ops : 1253 &xfs_read_iomap_ops); 1254 if (ret & VM_FAULT_NEEDDSYNC) 1255 ret = dax_finish_sync_fault(vmf, pe_size, pfn); 1256 } else { 1257 if (write_fault) 1258 ret = iomap_page_mkwrite(vmf, 1259 &xfs_buffered_write_iomap_ops); 1260 else 1261 ret = filemap_fault(vmf); 1262 } 1263 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1264 1265 if (write_fault) 1266 sb_end_pagefault(inode->i_sb); 1267 return ret; 1268 } 1269 1270 static inline bool 1271 xfs_is_write_fault( 1272 struct vm_fault *vmf) 1273 { 1274 return (vmf->flags & FAULT_FLAG_WRITE) && 1275 (vmf->vma->vm_flags & VM_SHARED); 1276 } 1277 1278 static vm_fault_t 1279 xfs_filemap_fault( 1280 struct vm_fault *vmf) 1281 { 1282 /* DAX can shortcut the normal fault path on write faults! */ 1283 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, 1284 IS_DAX(file_inode(vmf->vma->vm_file)) && 1285 xfs_is_write_fault(vmf)); 1286 } 1287 1288 static vm_fault_t 1289 xfs_filemap_huge_fault( 1290 struct vm_fault *vmf, 1291 enum page_entry_size pe_size) 1292 { 1293 if (!IS_DAX(file_inode(vmf->vma->vm_file))) 1294 return VM_FAULT_FALLBACK; 1295 1296 /* DAX can shortcut the normal fault path on write faults! */ 1297 return __xfs_filemap_fault(vmf, pe_size, 1298 xfs_is_write_fault(vmf)); 1299 } 1300 1301 static vm_fault_t 1302 xfs_filemap_page_mkwrite( 1303 struct vm_fault *vmf) 1304 { 1305 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1306 } 1307 1308 /* 1309 * pfn_mkwrite was originally intended to ensure we capture time stamp updates 1310 * on write faults. In reality, it needs to serialise against truncate and 1311 * prepare memory for writing so handle is as standard write fault. 1312 */ 1313 static vm_fault_t 1314 xfs_filemap_pfn_mkwrite( 1315 struct vm_fault *vmf) 1316 { 1317 1318 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1319 } 1320 1321 static void 1322 xfs_filemap_map_pages( 1323 struct vm_fault *vmf, 1324 pgoff_t start_pgoff, 1325 pgoff_t end_pgoff) 1326 { 1327 struct inode *inode = file_inode(vmf->vma->vm_file); 1328 1329 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1330 filemap_map_pages(vmf, start_pgoff, end_pgoff); 1331 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1332 } 1333 1334 static const struct vm_operations_struct xfs_file_vm_ops = { 1335 .fault = xfs_filemap_fault, 1336 .huge_fault = xfs_filemap_huge_fault, 1337 .map_pages = xfs_filemap_map_pages, 1338 .page_mkwrite = xfs_filemap_page_mkwrite, 1339 .pfn_mkwrite = xfs_filemap_pfn_mkwrite, 1340 }; 1341 1342 STATIC int 1343 xfs_file_mmap( 1344 struct file *file, 1345 struct vm_area_struct *vma) 1346 { 1347 struct inode *inode = file_inode(file); 1348 struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode)); 1349 1350 /* 1351 * We don't support synchronous mappings for non-DAX files and 1352 * for DAX files if underneath dax_device is not synchronous. 1353 */ 1354 if (!daxdev_mapping_supported(vma, target->bt_daxdev)) 1355 return -EOPNOTSUPP; 1356 1357 file_accessed(file); 1358 vma->vm_ops = &xfs_file_vm_ops; 1359 if (IS_DAX(inode)) 1360 vma->vm_flags |= VM_HUGEPAGE; 1361 return 0; 1362 } 1363 1364 const struct file_operations xfs_file_operations = { 1365 .llseek = xfs_file_llseek, 1366 .read_iter = xfs_file_read_iter, 1367 .write_iter = xfs_file_write_iter, 1368 .splice_read = generic_file_splice_read, 1369 .splice_write = iter_file_splice_write, 1370 .iopoll = iomap_dio_iopoll, 1371 .unlocked_ioctl = xfs_file_ioctl, 1372 #ifdef CONFIG_COMPAT 1373 .compat_ioctl = xfs_file_compat_ioctl, 1374 #endif 1375 .mmap = xfs_file_mmap, 1376 .mmap_supported_flags = MAP_SYNC, 1377 .open = xfs_file_open, 1378 .release = xfs_file_release, 1379 .fsync = xfs_file_fsync, 1380 .get_unmapped_area = thp_get_unmapped_area, 1381 .fallocate = xfs_file_fallocate, 1382 .fadvise = xfs_file_fadvise, 1383 .remap_file_range = xfs_file_remap_range, 1384 }; 1385 1386 const struct file_operations xfs_dir_file_operations = { 1387 .open = xfs_dir_open, 1388 .read = generic_read_dir, 1389 .iterate_shared = xfs_file_readdir, 1390 .llseek = generic_file_llseek, 1391 .unlocked_ioctl = xfs_file_ioctl, 1392 #ifdef CONFIG_COMPAT 1393 .compat_ioctl = xfs_file_compat_ioctl, 1394 #endif 1395 .fsync = xfs_dir_fsync, 1396 }; 1397