1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * Copyright (c) 2016-2018 Christoph Hellwig. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap_btree.h" 17 #include "xfs_bmap.h" 18 #include "xfs_bmap_util.h" 19 #include "xfs_errortag.h" 20 #include "xfs_error.h" 21 #include "xfs_trans.h" 22 #include "xfs_trans_space.h" 23 #include "xfs_inode_item.h" 24 #include "xfs_iomap.h" 25 #include "xfs_trace.h" 26 #include "xfs_quota.h" 27 #include "xfs_dquot_item.h" 28 #include "xfs_dquot.h" 29 #include "xfs_reflink.h" 30 31 32 #define XFS_ALLOC_ALIGN(mp, off) \ 33 (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) 34 35 static int 36 xfs_alert_fsblock_zero( 37 xfs_inode_t *ip, 38 xfs_bmbt_irec_t *imap) 39 { 40 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, 41 "Access to block zero in inode %llu " 42 "start_block: %llx start_off: %llx " 43 "blkcnt: %llx extent-state: %x", 44 (unsigned long long)ip->i_ino, 45 (unsigned long long)imap->br_startblock, 46 (unsigned long long)imap->br_startoff, 47 (unsigned long long)imap->br_blockcount, 48 imap->br_state); 49 return -EFSCORRUPTED; 50 } 51 52 int 53 xfs_bmbt_to_iomap( 54 struct xfs_inode *ip, 55 struct iomap *iomap, 56 struct xfs_bmbt_irec *imap, 57 u16 flags) 58 { 59 struct xfs_mount *mp = ip->i_mount; 60 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 61 62 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) 63 return xfs_alert_fsblock_zero(ip, imap); 64 65 if (imap->br_startblock == HOLESTARTBLOCK) { 66 iomap->addr = IOMAP_NULL_ADDR; 67 iomap->type = IOMAP_HOLE; 68 } else if (imap->br_startblock == DELAYSTARTBLOCK || 69 isnullstartblock(imap->br_startblock)) { 70 iomap->addr = IOMAP_NULL_ADDR; 71 iomap->type = IOMAP_DELALLOC; 72 } else { 73 iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock)); 74 if (imap->br_state == XFS_EXT_UNWRITTEN) 75 iomap->type = IOMAP_UNWRITTEN; 76 else 77 iomap->type = IOMAP_MAPPED; 78 } 79 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); 80 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); 81 iomap->bdev = target->bt_bdev; 82 iomap->dax_dev = target->bt_daxdev; 83 iomap->flags = flags; 84 85 if (xfs_ipincount(ip) && 86 (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 87 iomap->flags |= IOMAP_F_DIRTY; 88 return 0; 89 } 90 91 static void 92 xfs_hole_to_iomap( 93 struct xfs_inode *ip, 94 struct iomap *iomap, 95 xfs_fileoff_t offset_fsb, 96 xfs_fileoff_t end_fsb) 97 { 98 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 99 100 iomap->addr = IOMAP_NULL_ADDR; 101 iomap->type = IOMAP_HOLE; 102 iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb); 103 iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb); 104 iomap->bdev = target->bt_bdev; 105 iomap->dax_dev = target->bt_daxdev; 106 } 107 108 static inline xfs_fileoff_t 109 xfs_iomap_end_fsb( 110 struct xfs_mount *mp, 111 loff_t offset, 112 loff_t count) 113 { 114 ASSERT(offset <= mp->m_super->s_maxbytes); 115 return min(XFS_B_TO_FSB(mp, offset + count), 116 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); 117 } 118 119 static xfs_extlen_t 120 xfs_eof_alignment( 121 struct xfs_inode *ip) 122 { 123 struct xfs_mount *mp = ip->i_mount; 124 xfs_extlen_t align = 0; 125 126 if (!XFS_IS_REALTIME_INODE(ip)) { 127 /* 128 * Round up the allocation request to a stripe unit 129 * (m_dalign) boundary if the file size is >= stripe unit 130 * size, and we are allocating past the allocation eof. 131 * 132 * If mounted with the "-o swalloc" option the alignment is 133 * increased from the strip unit size to the stripe width. 134 */ 135 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) 136 align = mp->m_swidth; 137 else if (mp->m_dalign) 138 align = mp->m_dalign; 139 140 if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align)) 141 align = 0; 142 } 143 144 return align; 145 } 146 147 /* 148 * Check if last_fsb is outside the last extent, and if so grow it to the next 149 * stripe unit boundary. 150 */ 151 xfs_fileoff_t 152 xfs_iomap_eof_align_last_fsb( 153 struct xfs_inode *ip, 154 xfs_fileoff_t end_fsb) 155 { 156 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 157 xfs_extlen_t extsz = xfs_get_extsz_hint(ip); 158 xfs_extlen_t align = xfs_eof_alignment(ip); 159 struct xfs_bmbt_irec irec; 160 struct xfs_iext_cursor icur; 161 162 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 163 164 /* 165 * Always round up the allocation request to the extent hint boundary. 166 */ 167 if (extsz) { 168 if (align) 169 align = roundup_64(align, extsz); 170 else 171 align = extsz; 172 } 173 174 if (align) { 175 xfs_fileoff_t aligned_end_fsb = roundup_64(end_fsb, align); 176 177 xfs_iext_last(ifp, &icur); 178 if (!xfs_iext_get_extent(ifp, &icur, &irec) || 179 aligned_end_fsb >= irec.br_startoff + irec.br_blockcount) 180 return aligned_end_fsb; 181 } 182 183 return end_fsb; 184 } 185 186 int 187 xfs_iomap_write_direct( 188 struct xfs_inode *ip, 189 xfs_fileoff_t offset_fsb, 190 xfs_fileoff_t count_fsb, 191 struct xfs_bmbt_irec *imap) 192 { 193 struct xfs_mount *mp = ip->i_mount; 194 struct xfs_trans *tp; 195 xfs_filblks_t resaligned; 196 int nimaps; 197 unsigned int dblocks, rblocks; 198 bool force = false; 199 int error; 200 int bmapi_flags = XFS_BMAPI_PREALLOC; 201 202 ASSERT(count_fsb > 0); 203 204 resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, 205 xfs_get_extsz_hint(ip)); 206 if (unlikely(XFS_IS_REALTIME_INODE(ip))) { 207 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 208 rblocks = resaligned; 209 } else { 210 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 211 rblocks = 0; 212 } 213 214 error = xfs_qm_dqattach(ip); 215 if (error) 216 return error; 217 218 /* 219 * For DAX, we do not allocate unwritten extents, but instead we zero 220 * the block before we commit the transaction. Ideally we'd like to do 221 * this outside the transaction context, but if we commit and then crash 222 * we may not have zeroed the blocks and this will be exposed on 223 * recovery of the allocation. Hence we must zero before commit. 224 * 225 * Further, if we are mapping unwritten extents here, we need to zero 226 * and convert them to written so that we don't need an unwritten extent 227 * callback for DAX. This also means that we need to be able to dip into 228 * the reserve block pool for bmbt block allocation if there is no space 229 * left but we need to do unwritten extent conversion. 230 */ 231 if (IS_DAX(VFS_I(ip))) { 232 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 233 if (imap->br_state == XFS_EXT_UNWRITTEN) { 234 force = true; 235 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 236 } 237 } 238 239 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks, 240 rblocks, force, &tp); 241 if (error) 242 return error; 243 244 error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, 245 XFS_IEXT_ADD_NOSPLIT_CNT); 246 if (error) 247 goto out_trans_cancel; 248 249 /* 250 * From this point onwards we overwrite the imap pointer that the 251 * caller gave to us. 252 */ 253 nimaps = 1; 254 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0, 255 imap, &nimaps); 256 if (error) 257 goto out_trans_cancel; 258 259 /* 260 * Complete the transaction 261 */ 262 error = xfs_trans_commit(tp); 263 if (error) 264 goto out_unlock; 265 266 /* 267 * Copy any maps to caller's array and return any error. 268 */ 269 if (nimaps == 0) { 270 error = -ENOSPC; 271 goto out_unlock; 272 } 273 274 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) 275 error = xfs_alert_fsblock_zero(ip, imap); 276 277 out_unlock: 278 xfs_iunlock(ip, XFS_ILOCK_EXCL); 279 return error; 280 281 out_trans_cancel: 282 xfs_trans_cancel(tp); 283 goto out_unlock; 284 } 285 286 STATIC bool 287 xfs_quota_need_throttle( 288 struct xfs_inode *ip, 289 xfs_dqtype_t type, 290 xfs_fsblock_t alloc_blocks) 291 { 292 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 293 294 if (!dq || !xfs_this_quota_on(ip->i_mount, type)) 295 return false; 296 297 /* no hi watermark, no throttle */ 298 if (!dq->q_prealloc_hi_wmark) 299 return false; 300 301 /* under the lo watermark, no throttle */ 302 if (dq->q_blk.reserved + alloc_blocks < dq->q_prealloc_lo_wmark) 303 return false; 304 305 return true; 306 } 307 308 STATIC void 309 xfs_quota_calc_throttle( 310 struct xfs_inode *ip, 311 xfs_dqtype_t type, 312 xfs_fsblock_t *qblocks, 313 int *qshift, 314 int64_t *qfreesp) 315 { 316 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 317 int64_t freesp; 318 int shift = 0; 319 320 /* no dq, or over hi wmark, squash the prealloc completely */ 321 if (!dq || dq->q_blk.reserved >= dq->q_prealloc_hi_wmark) { 322 *qblocks = 0; 323 *qfreesp = 0; 324 return; 325 } 326 327 freesp = dq->q_prealloc_hi_wmark - dq->q_blk.reserved; 328 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) { 329 shift = 2; 330 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT]) 331 shift += 2; 332 if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT]) 333 shift += 2; 334 } 335 336 if (freesp < *qfreesp) 337 *qfreesp = freesp; 338 339 /* only overwrite the throttle values if we are more aggressive */ 340 if ((freesp >> shift) < (*qblocks >> *qshift)) { 341 *qblocks = freesp; 342 *qshift = shift; 343 } 344 } 345 346 /* 347 * If we don't have a user specified preallocation size, dynamically increase 348 * the preallocation size as the size of the file grows. Cap the maximum size 349 * at a single extent or less if the filesystem is near full. The closer the 350 * filesystem is to being full, the smaller the maximum preallocation. 351 */ 352 STATIC xfs_fsblock_t 353 xfs_iomap_prealloc_size( 354 struct xfs_inode *ip, 355 int whichfork, 356 loff_t offset, 357 loff_t count, 358 struct xfs_iext_cursor *icur) 359 { 360 struct xfs_iext_cursor ncur = *icur; 361 struct xfs_bmbt_irec prev, got; 362 struct xfs_mount *mp = ip->i_mount; 363 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 364 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 365 int64_t freesp; 366 xfs_fsblock_t qblocks; 367 xfs_fsblock_t alloc_blocks = 0; 368 xfs_extlen_t plen; 369 int shift = 0; 370 int qshift = 0; 371 372 /* 373 * As an exception we don't do any preallocation at all if the file is 374 * smaller than the minimum preallocation and we are using the default 375 * dynamic preallocation scheme, as it is likely this is the only write 376 * to the file that is going to be done. 377 */ 378 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks)) 379 return 0; 380 381 /* 382 * Use the minimum preallocation size for small files or if we are 383 * writing right after a hole. 384 */ 385 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 386 !xfs_iext_prev_extent(ifp, &ncur, &prev) || 387 prev.br_startoff + prev.br_blockcount < offset_fsb) 388 return mp->m_allocsize_blocks; 389 390 /* 391 * Take the size of the preceding data extents as the basis for the 392 * preallocation size. Note that we don't care if the previous extents 393 * are written or not. 394 */ 395 plen = prev.br_blockcount; 396 while (xfs_iext_prev_extent(ifp, &ncur, &got)) { 397 if (plen > MAXEXTLEN / 2 || 398 isnullstartblock(got.br_startblock) || 399 got.br_startoff + got.br_blockcount != prev.br_startoff || 400 got.br_startblock + got.br_blockcount != prev.br_startblock) 401 break; 402 plen += got.br_blockcount; 403 prev = got; 404 } 405 406 /* 407 * If the size of the extents is greater than half the maximum extent 408 * length, then use the current offset as the basis. This ensures that 409 * for large files the preallocation size always extends to MAXEXTLEN 410 * rather than falling short due to things like stripe unit/width 411 * alignment of real extents. 412 */ 413 alloc_blocks = plen * 2; 414 if (alloc_blocks > MAXEXTLEN) 415 alloc_blocks = XFS_B_TO_FSB(mp, offset); 416 qblocks = alloc_blocks; 417 418 /* 419 * MAXEXTLEN is not a power of two value but we round the prealloc down 420 * to the nearest power of two value after throttling. To prevent the 421 * round down from unconditionally reducing the maximum supported 422 * prealloc size, we round up first, apply appropriate throttling, 423 * round down and cap the value to MAXEXTLEN. 424 */ 425 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), 426 alloc_blocks); 427 428 freesp = percpu_counter_read_positive(&mp->m_fdblocks); 429 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 430 shift = 2; 431 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 432 shift++; 433 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) 434 shift++; 435 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) 436 shift++; 437 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) 438 shift++; 439 } 440 441 /* 442 * Check each quota to cap the prealloc size, provide a shift value to 443 * throttle with and adjust amount of available space. 444 */ 445 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_USER, alloc_blocks)) 446 xfs_quota_calc_throttle(ip, XFS_DQTYPE_USER, &qblocks, &qshift, 447 &freesp); 448 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_GROUP, alloc_blocks)) 449 xfs_quota_calc_throttle(ip, XFS_DQTYPE_GROUP, &qblocks, &qshift, 450 &freesp); 451 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_PROJ, alloc_blocks)) 452 xfs_quota_calc_throttle(ip, XFS_DQTYPE_PROJ, &qblocks, &qshift, 453 &freesp); 454 455 /* 456 * The final prealloc size is set to the minimum of free space available 457 * in each of the quotas and the overall filesystem. 458 * 459 * The shift throttle value is set to the maximum value as determined by 460 * the global low free space values and per-quota low free space values. 461 */ 462 alloc_blocks = min(alloc_blocks, qblocks); 463 shift = max(shift, qshift); 464 465 if (shift) 466 alloc_blocks >>= shift; 467 /* 468 * rounddown_pow_of_two() returns an undefined result if we pass in 469 * alloc_blocks = 0. 470 */ 471 if (alloc_blocks) 472 alloc_blocks = rounddown_pow_of_two(alloc_blocks); 473 if (alloc_blocks > MAXEXTLEN) 474 alloc_blocks = MAXEXTLEN; 475 476 /* 477 * If we are still trying to allocate more space than is 478 * available, squash the prealloc hard. This can happen if we 479 * have a large file on a small filesystem and the above 480 * lowspace thresholds are smaller than MAXEXTLEN. 481 */ 482 while (alloc_blocks && alloc_blocks >= freesp) 483 alloc_blocks >>= 4; 484 if (alloc_blocks < mp->m_allocsize_blocks) 485 alloc_blocks = mp->m_allocsize_blocks; 486 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, 487 mp->m_allocsize_blocks); 488 return alloc_blocks; 489 } 490 491 int 492 xfs_iomap_write_unwritten( 493 xfs_inode_t *ip, 494 xfs_off_t offset, 495 xfs_off_t count, 496 bool update_isize) 497 { 498 xfs_mount_t *mp = ip->i_mount; 499 xfs_fileoff_t offset_fsb; 500 xfs_filblks_t count_fsb; 501 xfs_filblks_t numblks_fsb; 502 int nimaps; 503 xfs_trans_t *tp; 504 xfs_bmbt_irec_t imap; 505 struct inode *inode = VFS_I(ip); 506 xfs_fsize_t i_size; 507 uint resblks; 508 int error; 509 510 trace_xfs_unwritten_convert(ip, offset, count); 511 512 offset_fsb = XFS_B_TO_FSBT(mp, offset); 513 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 514 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 515 516 /* 517 * Reserve enough blocks in this transaction for two complete extent 518 * btree splits. We may be converting the middle part of an unwritten 519 * extent and in this case we will insert two new extents in the btree 520 * each of which could cause a full split. 521 * 522 * This reservation amount will be used in the first call to 523 * xfs_bmbt_split() to select an AG with enough space to satisfy the 524 * rest of the operation. 525 */ 526 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 527 528 /* Attach dquots so that bmbt splits are accounted correctly. */ 529 error = xfs_qm_dqattach(ip); 530 if (error) 531 return error; 532 533 do { 534 /* 535 * Set up a transaction to convert the range of extents 536 * from unwritten to real. Do allocations in a loop until 537 * we have covered the range passed in. 538 * 539 * Note that we can't risk to recursing back into the filesystem 540 * here as we might be asked to write out the same inode that we 541 * complete here and might deadlock on the iolock. 542 */ 543 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 544 0, true, &tp); 545 if (error) 546 return error; 547 548 error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, 549 XFS_IEXT_WRITE_UNWRITTEN_CNT); 550 if (error) 551 goto error_on_bmapi_transaction; 552 553 /* 554 * Modify the unwritten extent state of the buffer. 555 */ 556 nimaps = 1; 557 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 558 XFS_BMAPI_CONVERT, resblks, &imap, 559 &nimaps); 560 if (error) 561 goto error_on_bmapi_transaction; 562 563 /* 564 * Log the updated inode size as we go. We have to be careful 565 * to only log it up to the actual write offset if it is 566 * halfway into a block. 567 */ 568 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 569 if (i_size > offset + count) 570 i_size = offset + count; 571 if (update_isize && i_size > i_size_read(inode)) 572 i_size_write(inode, i_size); 573 i_size = xfs_new_eof(ip, i_size); 574 if (i_size) { 575 ip->i_d.di_size = i_size; 576 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 577 } 578 579 error = xfs_trans_commit(tp); 580 xfs_iunlock(ip, XFS_ILOCK_EXCL); 581 if (error) 582 return error; 583 584 if (unlikely(!xfs_valid_startblock(ip, imap.br_startblock))) 585 return xfs_alert_fsblock_zero(ip, &imap); 586 587 if ((numblks_fsb = imap.br_blockcount) == 0) { 588 /* 589 * The numblks_fsb value should always get 590 * smaller, otherwise the loop is stuck. 591 */ 592 ASSERT(imap.br_blockcount); 593 break; 594 } 595 offset_fsb += numblks_fsb; 596 count_fsb -= numblks_fsb; 597 } while (count_fsb > 0); 598 599 return 0; 600 601 error_on_bmapi_transaction: 602 xfs_trans_cancel(tp); 603 xfs_iunlock(ip, XFS_ILOCK_EXCL); 604 return error; 605 } 606 607 static inline bool 608 imap_needs_alloc( 609 struct inode *inode, 610 unsigned flags, 611 struct xfs_bmbt_irec *imap, 612 int nimaps) 613 { 614 /* don't allocate blocks when just zeroing */ 615 if (flags & IOMAP_ZERO) 616 return false; 617 if (!nimaps || 618 imap->br_startblock == HOLESTARTBLOCK || 619 imap->br_startblock == DELAYSTARTBLOCK) 620 return true; 621 /* we convert unwritten extents before copying the data for DAX */ 622 if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN) 623 return true; 624 return false; 625 } 626 627 static inline bool 628 imap_needs_cow( 629 struct xfs_inode *ip, 630 unsigned int flags, 631 struct xfs_bmbt_irec *imap, 632 int nimaps) 633 { 634 if (!xfs_is_cow_inode(ip)) 635 return false; 636 637 /* when zeroing we don't have to COW holes or unwritten extents */ 638 if (flags & IOMAP_ZERO) { 639 if (!nimaps || 640 imap->br_startblock == HOLESTARTBLOCK || 641 imap->br_state == XFS_EXT_UNWRITTEN) 642 return false; 643 } 644 645 return true; 646 } 647 648 static int 649 xfs_ilock_for_iomap( 650 struct xfs_inode *ip, 651 unsigned flags, 652 unsigned *lockmode) 653 { 654 unsigned mode = XFS_ILOCK_SHARED; 655 bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO); 656 657 /* 658 * COW writes may allocate delalloc space or convert unwritten COW 659 * extents, so we need to make sure to take the lock exclusively here. 660 */ 661 if (xfs_is_cow_inode(ip) && is_write) 662 mode = XFS_ILOCK_EXCL; 663 664 /* 665 * Extents not yet cached requires exclusive access, don't block. This 666 * is an opencoded xfs_ilock_data_map_shared() call but with 667 * non-blocking behaviour. 668 */ 669 if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { 670 if (flags & IOMAP_NOWAIT) 671 return -EAGAIN; 672 mode = XFS_ILOCK_EXCL; 673 } 674 675 relock: 676 if (flags & IOMAP_NOWAIT) { 677 if (!xfs_ilock_nowait(ip, mode)) 678 return -EAGAIN; 679 } else { 680 xfs_ilock(ip, mode); 681 } 682 683 /* 684 * The reflink iflag could have changed since the earlier unlocked 685 * check, so if we got ILOCK_SHARED for a write and but we're now a 686 * reflink inode we have to switch to ILOCK_EXCL and relock. 687 */ 688 if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) { 689 xfs_iunlock(ip, mode); 690 mode = XFS_ILOCK_EXCL; 691 goto relock; 692 } 693 694 *lockmode = mode; 695 return 0; 696 } 697 698 /* 699 * Check that the imap we are going to return to the caller spans the entire 700 * range that the caller requested for the IO. 701 */ 702 static bool 703 imap_spans_range( 704 struct xfs_bmbt_irec *imap, 705 xfs_fileoff_t offset_fsb, 706 xfs_fileoff_t end_fsb) 707 { 708 if (imap->br_startoff > offset_fsb) 709 return false; 710 if (imap->br_startoff + imap->br_blockcount < end_fsb) 711 return false; 712 return true; 713 } 714 715 static int 716 xfs_direct_write_iomap_begin( 717 struct inode *inode, 718 loff_t offset, 719 loff_t length, 720 unsigned flags, 721 struct iomap *iomap, 722 struct iomap *srcmap) 723 { 724 struct xfs_inode *ip = XFS_I(inode); 725 struct xfs_mount *mp = ip->i_mount; 726 struct xfs_bmbt_irec imap, cmap; 727 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 728 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 729 int nimaps = 1, error = 0; 730 bool shared = false; 731 u16 iomap_flags = 0; 732 unsigned lockmode; 733 734 ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); 735 736 if (XFS_FORCED_SHUTDOWN(mp)) 737 return -EIO; 738 739 /* 740 * Writes that span EOF might trigger an IO size update on completion, 741 * so consider them to be dirty for the purposes of O_DSYNC even if 742 * there is no other metadata changes pending or have been made here. 743 */ 744 if (offset + length > i_size_read(inode)) 745 iomap_flags |= IOMAP_F_DIRTY; 746 747 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 748 if (error) 749 return error; 750 751 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 752 &nimaps, 0); 753 if (error) 754 goto out_unlock; 755 756 if (imap_needs_cow(ip, flags, &imap, nimaps)) { 757 error = -EAGAIN; 758 if (flags & IOMAP_NOWAIT) 759 goto out_unlock; 760 761 /* may drop and re-acquire the ilock */ 762 error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, 763 &lockmode, flags & IOMAP_DIRECT); 764 if (error) 765 goto out_unlock; 766 if (shared) 767 goto out_found_cow; 768 end_fsb = imap.br_startoff + imap.br_blockcount; 769 length = XFS_FSB_TO_B(mp, end_fsb) - offset; 770 } 771 772 if (imap_needs_alloc(inode, flags, &imap, nimaps)) 773 goto allocate_blocks; 774 775 /* 776 * NOWAIT and OVERWRITE I/O needs to span the entire requested I/O with 777 * a single map so that we avoid partial IO failures due to the rest of 778 * the I/O range not covered by this map triggering an EAGAIN condition 779 * when it is subsequently mapped and aborting the I/O. 780 */ 781 if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) { 782 error = -EAGAIN; 783 if (!imap_spans_range(&imap, offset_fsb, end_fsb)) 784 goto out_unlock; 785 } 786 787 /* 788 * For overwrite only I/O, we cannot convert unwritten extents without 789 * requiring sub-block zeroing. This can only be done under an 790 * exclusive IOLOCK, hence return -EAGAIN if this is not a written 791 * extent to tell the caller to try again. 792 */ 793 if (flags & IOMAP_OVERWRITE_ONLY) { 794 error = -EAGAIN; 795 if (imap.br_state != XFS_EXT_NORM && 796 ((offset | length) & mp->m_blockmask)) 797 goto out_unlock; 798 } 799 800 xfs_iunlock(ip, lockmode); 801 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); 802 return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags); 803 804 allocate_blocks: 805 error = -EAGAIN; 806 if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) 807 goto out_unlock; 808 809 /* 810 * We cap the maximum length we map to a sane size to keep the chunks 811 * of work done where somewhat symmetric with the work writeback does. 812 * This is a completely arbitrary number pulled out of thin air as a 813 * best guess for initial testing. 814 * 815 * Note that the values needs to be less than 32-bits wide until the 816 * lower level functions are updated. 817 */ 818 length = min_t(loff_t, length, 1024 * PAGE_SIZE); 819 end_fsb = xfs_iomap_end_fsb(mp, offset, length); 820 821 if (offset + length > XFS_ISIZE(ip)) 822 end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); 823 else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) 824 end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); 825 xfs_iunlock(ip, lockmode); 826 827 error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, 828 &imap); 829 if (error) 830 return error; 831 832 trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); 833 return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW); 834 835 out_found_cow: 836 xfs_iunlock(ip, lockmode); 837 length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); 838 trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); 839 if (imap.br_startblock != HOLESTARTBLOCK) { 840 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); 841 if (error) 842 return error; 843 } 844 return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); 845 846 out_unlock: 847 if (lockmode) 848 xfs_iunlock(ip, lockmode); 849 return error; 850 } 851 852 const struct iomap_ops xfs_direct_write_iomap_ops = { 853 .iomap_begin = xfs_direct_write_iomap_begin, 854 }; 855 856 static int 857 xfs_buffered_write_iomap_begin( 858 struct inode *inode, 859 loff_t offset, 860 loff_t count, 861 unsigned flags, 862 struct iomap *iomap, 863 struct iomap *srcmap) 864 { 865 struct xfs_inode *ip = XFS_I(inode); 866 struct xfs_mount *mp = ip->i_mount; 867 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 868 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count); 869 struct xfs_bmbt_irec imap, cmap; 870 struct xfs_iext_cursor icur, ccur; 871 xfs_fsblock_t prealloc_blocks = 0; 872 bool eof = false, cow_eof = false, shared = false; 873 int allocfork = XFS_DATA_FORK; 874 int error = 0; 875 876 if (XFS_FORCED_SHUTDOWN(mp)) 877 return -EIO; 878 879 /* we can't use delayed allocations when using extent size hints */ 880 if (xfs_get_extsz_hint(ip)) 881 return xfs_direct_write_iomap_begin(inode, offset, count, 882 flags, iomap, srcmap); 883 884 ASSERT(!XFS_IS_REALTIME_INODE(ip)); 885 886 xfs_ilock(ip, XFS_ILOCK_EXCL); 887 888 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) || 889 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 890 error = -EFSCORRUPTED; 891 goto out_unlock; 892 } 893 894 XFS_STATS_INC(mp, xs_blk_mapw); 895 896 if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { 897 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 898 if (error) 899 goto out_unlock; 900 } 901 902 /* 903 * Search the data fork first to look up our source mapping. We 904 * always need the data fork map, as we have to return it to the 905 * iomap code so that the higher level write code can read data in to 906 * perform read-modify-write cycles for unaligned writes. 907 */ 908 eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); 909 if (eof) 910 imap.br_startoff = end_fsb; /* fake hole until the end */ 911 912 /* We never need to allocate blocks for zeroing a hole. */ 913 if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { 914 xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); 915 goto out_unlock; 916 } 917 918 /* 919 * Search the COW fork extent list even if we did not find a data fork 920 * extent. This serves two purposes: first this implements the 921 * speculative preallocation using cowextsize, so that we also unshare 922 * block adjacent to shared blocks instead of just the shared blocks 923 * themselves. Second the lookup in the extent list is generally faster 924 * than going out to the shared extent tree. 925 */ 926 if (xfs_is_cow_inode(ip)) { 927 if (!ip->i_cowfp) { 928 ASSERT(!xfs_is_reflink_inode(ip)); 929 xfs_ifork_init_cow(ip); 930 } 931 cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, 932 &ccur, &cmap); 933 if (!cow_eof && cmap.br_startoff <= offset_fsb) { 934 trace_xfs_reflink_cow_found(ip, &cmap); 935 goto found_cow; 936 } 937 } 938 939 if (imap.br_startoff <= offset_fsb) { 940 /* 941 * For reflink files we may need a delalloc reservation when 942 * overwriting shared extents. This includes zeroing of 943 * existing extents that contain data. 944 */ 945 if (!xfs_is_cow_inode(ip) || 946 ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { 947 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, 948 &imap); 949 goto found_imap; 950 } 951 952 xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); 953 954 /* Trim the mapping to the nearest shared extent boundary. */ 955 error = xfs_bmap_trim_cow(ip, &imap, &shared); 956 if (error) 957 goto out_unlock; 958 959 /* Not shared? Just report the (potentially capped) extent. */ 960 if (!shared) { 961 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, 962 &imap); 963 goto found_imap; 964 } 965 966 /* 967 * Fork all the shared blocks from our write offset until the 968 * end of the extent. 969 */ 970 allocfork = XFS_COW_FORK; 971 end_fsb = imap.br_startoff + imap.br_blockcount; 972 } else { 973 /* 974 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES 975 * pages to keep the chunks of work done where somewhat 976 * symmetric with the work writeback does. This is a completely 977 * arbitrary number pulled out of thin air. 978 * 979 * Note that the values needs to be less than 32-bits wide until 980 * the lower level functions are updated. 981 */ 982 count = min_t(loff_t, count, 1024 * PAGE_SIZE); 983 end_fsb = xfs_iomap_end_fsb(mp, offset, count); 984 985 if (xfs_is_always_cow_inode(ip)) 986 allocfork = XFS_COW_FORK; 987 } 988 989 error = xfs_qm_dqattach_locked(ip, false); 990 if (error) 991 goto out_unlock; 992 993 if (eof && offset + count > XFS_ISIZE(ip)) { 994 /* 995 * Determine the initial size of the preallocation. 996 * We clean up any extra preallocation when the file is closed. 997 */ 998 if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) 999 prealloc_blocks = mp->m_allocsize_blocks; 1000 else 1001 prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, 1002 offset, count, &icur); 1003 if (prealloc_blocks) { 1004 xfs_extlen_t align; 1005 xfs_off_t end_offset; 1006 xfs_fileoff_t p_end_fsb; 1007 1008 end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1); 1009 p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + 1010 prealloc_blocks; 1011 1012 align = xfs_eof_alignment(ip); 1013 if (align) 1014 p_end_fsb = roundup_64(p_end_fsb, align); 1015 1016 p_end_fsb = min(p_end_fsb, 1017 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); 1018 ASSERT(p_end_fsb > offset_fsb); 1019 prealloc_blocks = p_end_fsb - end_fsb; 1020 } 1021 } 1022 1023 retry: 1024 error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, 1025 end_fsb - offset_fsb, prealloc_blocks, 1026 allocfork == XFS_DATA_FORK ? &imap : &cmap, 1027 allocfork == XFS_DATA_FORK ? &icur : &ccur, 1028 allocfork == XFS_DATA_FORK ? eof : cow_eof); 1029 switch (error) { 1030 case 0: 1031 break; 1032 case -ENOSPC: 1033 case -EDQUOT: 1034 /* retry without any preallocation */ 1035 trace_xfs_delalloc_enospc(ip, offset, count); 1036 if (prealloc_blocks) { 1037 prealloc_blocks = 0; 1038 goto retry; 1039 } 1040 /*FALLTHRU*/ 1041 default: 1042 goto out_unlock; 1043 } 1044 1045 if (allocfork == XFS_COW_FORK) { 1046 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); 1047 goto found_cow; 1048 } 1049 1050 /* 1051 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch 1052 * them out if the write happens to fail. 1053 */ 1054 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1055 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); 1056 return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW); 1057 1058 found_imap: 1059 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1060 return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); 1061 1062 found_cow: 1063 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1064 if (imap.br_startoff <= offset_fsb) { 1065 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0); 1066 if (error) 1067 return error; 1068 } else { 1069 xfs_trim_extent(&cmap, offset_fsb, 1070 imap.br_startoff - offset_fsb); 1071 } 1072 return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); 1073 1074 out_unlock: 1075 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1076 return error; 1077 } 1078 1079 static int 1080 xfs_buffered_write_iomap_end( 1081 struct inode *inode, 1082 loff_t offset, 1083 loff_t length, 1084 ssize_t written, 1085 unsigned flags, 1086 struct iomap *iomap) 1087 { 1088 struct xfs_inode *ip = XFS_I(inode); 1089 struct xfs_mount *mp = ip->i_mount; 1090 xfs_fileoff_t start_fsb; 1091 xfs_fileoff_t end_fsb; 1092 int error = 0; 1093 1094 if (iomap->type != IOMAP_DELALLOC) 1095 return 0; 1096 1097 /* 1098 * Behave as if the write failed if drop writes is enabled. Set the NEW 1099 * flag to force delalloc cleanup. 1100 */ 1101 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { 1102 iomap->flags |= IOMAP_F_NEW; 1103 written = 0; 1104 } 1105 1106 /* 1107 * start_fsb refers to the first unused block after a short write. If 1108 * nothing was written, round offset down to point at the first block in 1109 * the range. 1110 */ 1111 if (unlikely(!written)) 1112 start_fsb = XFS_B_TO_FSBT(mp, offset); 1113 else 1114 start_fsb = XFS_B_TO_FSB(mp, offset + written); 1115 end_fsb = XFS_B_TO_FSB(mp, offset + length); 1116 1117 /* 1118 * Trim delalloc blocks if they were allocated by this write and we 1119 * didn't manage to write the whole range. 1120 * 1121 * We don't need to care about racing delalloc as we hold i_mutex 1122 * across the reserve/allocate/unreserve calls. If there are delalloc 1123 * blocks in the range, they are ours. 1124 */ 1125 if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { 1126 truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), 1127 XFS_FSB_TO_B(mp, end_fsb) - 1); 1128 1129 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1130 end_fsb - start_fsb); 1131 if (error && !XFS_FORCED_SHUTDOWN(mp)) { 1132 xfs_alert(mp, "%s: unable to clean up ino %lld", 1133 __func__, ip->i_ino); 1134 return error; 1135 } 1136 } 1137 1138 return 0; 1139 } 1140 1141 const struct iomap_ops xfs_buffered_write_iomap_ops = { 1142 .iomap_begin = xfs_buffered_write_iomap_begin, 1143 .iomap_end = xfs_buffered_write_iomap_end, 1144 }; 1145 1146 static int 1147 xfs_read_iomap_begin( 1148 struct inode *inode, 1149 loff_t offset, 1150 loff_t length, 1151 unsigned flags, 1152 struct iomap *iomap, 1153 struct iomap *srcmap) 1154 { 1155 struct xfs_inode *ip = XFS_I(inode); 1156 struct xfs_mount *mp = ip->i_mount; 1157 struct xfs_bmbt_irec imap; 1158 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1159 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 1160 int nimaps = 1, error = 0; 1161 bool shared = false; 1162 unsigned lockmode; 1163 1164 ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); 1165 1166 if (XFS_FORCED_SHUTDOWN(mp)) 1167 return -EIO; 1168 1169 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 1170 if (error) 1171 return error; 1172 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1173 &nimaps, 0); 1174 if (!error && (flags & IOMAP_REPORT)) 1175 error = xfs_reflink_trim_around_shared(ip, &imap, &shared); 1176 xfs_iunlock(ip, lockmode); 1177 1178 if (error) 1179 return error; 1180 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); 1181 return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0); 1182 } 1183 1184 const struct iomap_ops xfs_read_iomap_ops = { 1185 .iomap_begin = xfs_read_iomap_begin, 1186 }; 1187 1188 static int 1189 xfs_seek_iomap_begin( 1190 struct inode *inode, 1191 loff_t offset, 1192 loff_t length, 1193 unsigned flags, 1194 struct iomap *iomap, 1195 struct iomap *srcmap) 1196 { 1197 struct xfs_inode *ip = XFS_I(inode); 1198 struct xfs_mount *mp = ip->i_mount; 1199 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1200 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1201 xfs_fileoff_t cow_fsb = NULLFILEOFF, data_fsb = NULLFILEOFF; 1202 struct xfs_iext_cursor icur; 1203 struct xfs_bmbt_irec imap, cmap; 1204 int error = 0; 1205 unsigned lockmode; 1206 1207 if (XFS_FORCED_SHUTDOWN(mp)) 1208 return -EIO; 1209 1210 lockmode = xfs_ilock_data_map_shared(ip); 1211 if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) { 1212 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 1213 if (error) 1214 goto out_unlock; 1215 } 1216 1217 if (xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) { 1218 /* 1219 * If we found a data extent we are done. 1220 */ 1221 if (imap.br_startoff <= offset_fsb) 1222 goto done; 1223 data_fsb = imap.br_startoff; 1224 } else { 1225 /* 1226 * Fake a hole until the end of the file. 1227 */ 1228 data_fsb = xfs_iomap_end_fsb(mp, offset, length); 1229 } 1230 1231 /* 1232 * If a COW fork extent covers the hole, report it - capped to the next 1233 * data fork extent: 1234 */ 1235 if (xfs_inode_has_cow_data(ip) && 1236 xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) 1237 cow_fsb = cmap.br_startoff; 1238 if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { 1239 if (data_fsb < cow_fsb + cmap.br_blockcount) 1240 end_fsb = min(end_fsb, data_fsb); 1241 xfs_trim_extent(&cmap, offset_fsb, end_fsb); 1242 error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED); 1243 /* 1244 * This is a COW extent, so we must probe the page cache 1245 * because there could be dirty page cache being backed 1246 * by this extent. 1247 */ 1248 iomap->type = IOMAP_UNWRITTEN; 1249 goto out_unlock; 1250 } 1251 1252 /* 1253 * Else report a hole, capped to the next found data or COW extent. 1254 */ 1255 if (cow_fsb != NULLFILEOFF && cow_fsb < data_fsb) 1256 imap.br_blockcount = cow_fsb - offset_fsb; 1257 else 1258 imap.br_blockcount = data_fsb - offset_fsb; 1259 imap.br_startoff = offset_fsb; 1260 imap.br_startblock = HOLESTARTBLOCK; 1261 imap.br_state = XFS_EXT_NORM; 1262 done: 1263 xfs_trim_extent(&imap, offset_fsb, end_fsb); 1264 error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0); 1265 out_unlock: 1266 xfs_iunlock(ip, lockmode); 1267 return error; 1268 } 1269 1270 const struct iomap_ops xfs_seek_iomap_ops = { 1271 .iomap_begin = xfs_seek_iomap_begin, 1272 }; 1273 1274 static int 1275 xfs_xattr_iomap_begin( 1276 struct inode *inode, 1277 loff_t offset, 1278 loff_t length, 1279 unsigned flags, 1280 struct iomap *iomap, 1281 struct iomap *srcmap) 1282 { 1283 struct xfs_inode *ip = XFS_I(inode); 1284 struct xfs_mount *mp = ip->i_mount; 1285 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1286 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1287 struct xfs_bmbt_irec imap; 1288 int nimaps = 1, error = 0; 1289 unsigned lockmode; 1290 1291 if (XFS_FORCED_SHUTDOWN(mp)) 1292 return -EIO; 1293 1294 lockmode = xfs_ilock_attr_map_shared(ip); 1295 1296 /* if there are no attribute fork or extents, return ENOENT */ 1297 if (!XFS_IFORK_Q(ip) || !ip->i_afp->if_nextents) { 1298 error = -ENOENT; 1299 goto out_unlock; 1300 } 1301 1302 ASSERT(ip->i_afp->if_format != XFS_DINODE_FMT_LOCAL); 1303 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1304 &nimaps, XFS_BMAPI_ATTRFORK); 1305 out_unlock: 1306 xfs_iunlock(ip, lockmode); 1307 1308 if (error) 1309 return error; 1310 ASSERT(nimaps); 1311 return xfs_bmbt_to_iomap(ip, iomap, &imap, 0); 1312 } 1313 1314 const struct iomap_ops xfs_xattr_iomap_ops = { 1315 .iomap_begin = xfs_xattr_iomap_begin, 1316 }; 1317