1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * Copyright (c) 2016-2018 Christoph Hellwig. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap_btree.h" 17 #include "xfs_bmap.h" 18 #include "xfs_bmap_util.h" 19 #include "xfs_errortag.h" 20 #include "xfs_error.h" 21 #include "xfs_trans.h" 22 #include "xfs_trans_space.h" 23 #include "xfs_inode_item.h" 24 #include "xfs_iomap.h" 25 #include "xfs_trace.h" 26 #include "xfs_quota.h" 27 #include "xfs_dquot_item.h" 28 #include "xfs_dquot.h" 29 #include "xfs_reflink.h" 30 31 #define XFS_ALLOC_ALIGN(mp, off) \ 32 (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) 33 34 static int 35 xfs_alert_fsblock_zero( 36 xfs_inode_t *ip, 37 xfs_bmbt_irec_t *imap) 38 { 39 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, 40 "Access to block zero in inode %llu " 41 "start_block: %llx start_off: %llx " 42 "blkcnt: %llx extent-state: %x", 43 (unsigned long long)ip->i_ino, 44 (unsigned long long)imap->br_startblock, 45 (unsigned long long)imap->br_startoff, 46 (unsigned long long)imap->br_blockcount, 47 imap->br_state); 48 return -EFSCORRUPTED; 49 } 50 51 u64 52 xfs_iomap_inode_sequence( 53 struct xfs_inode *ip, 54 u16 iomap_flags) 55 { 56 u64 cookie = 0; 57 58 if (iomap_flags & IOMAP_F_XATTR) 59 return READ_ONCE(ip->i_af.if_seq); 60 if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp) 61 cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32; 62 return cookie | READ_ONCE(ip->i_df.if_seq); 63 } 64 65 /* 66 * Check that the iomap passed to us is still valid for the given offset and 67 * length. 68 */ 69 static bool 70 xfs_iomap_valid( 71 struct inode *inode, 72 const struct iomap *iomap) 73 { 74 struct xfs_inode *ip = XFS_I(inode); 75 76 if (iomap->validity_cookie != 77 xfs_iomap_inode_sequence(ip, iomap->flags)) { 78 trace_xfs_iomap_invalid(ip, iomap); 79 return false; 80 } 81 82 XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS); 83 return true; 84 } 85 86 static const struct iomap_folio_ops xfs_iomap_folio_ops = { 87 .iomap_valid = xfs_iomap_valid, 88 }; 89 90 int 91 xfs_bmbt_to_iomap( 92 struct xfs_inode *ip, 93 struct iomap *iomap, 94 struct xfs_bmbt_irec *imap, 95 unsigned int mapping_flags, 96 u16 iomap_flags, 97 u64 sequence_cookie) 98 { 99 struct xfs_mount *mp = ip->i_mount; 100 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 101 102 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) 103 return xfs_alert_fsblock_zero(ip, imap); 104 105 if (imap->br_startblock == HOLESTARTBLOCK) { 106 iomap->addr = IOMAP_NULL_ADDR; 107 iomap->type = IOMAP_HOLE; 108 } else if (imap->br_startblock == DELAYSTARTBLOCK || 109 isnullstartblock(imap->br_startblock)) { 110 iomap->addr = IOMAP_NULL_ADDR; 111 iomap->type = IOMAP_DELALLOC; 112 } else { 113 iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock)); 114 if (mapping_flags & IOMAP_DAX) 115 iomap->addr += target->bt_dax_part_off; 116 117 if (imap->br_state == XFS_EXT_UNWRITTEN) 118 iomap->type = IOMAP_UNWRITTEN; 119 else 120 iomap->type = IOMAP_MAPPED; 121 122 } 123 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); 124 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); 125 if (mapping_flags & IOMAP_DAX) 126 iomap->dax_dev = target->bt_daxdev; 127 else 128 iomap->bdev = target->bt_bdev; 129 iomap->flags = iomap_flags; 130 131 if (xfs_ipincount(ip) && 132 (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 133 iomap->flags |= IOMAP_F_DIRTY; 134 135 iomap->validity_cookie = sequence_cookie; 136 iomap->folio_ops = &xfs_iomap_folio_ops; 137 return 0; 138 } 139 140 static void 141 xfs_hole_to_iomap( 142 struct xfs_inode *ip, 143 struct iomap *iomap, 144 xfs_fileoff_t offset_fsb, 145 xfs_fileoff_t end_fsb) 146 { 147 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 148 149 iomap->addr = IOMAP_NULL_ADDR; 150 iomap->type = IOMAP_HOLE; 151 iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb); 152 iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb); 153 iomap->bdev = target->bt_bdev; 154 iomap->dax_dev = target->bt_daxdev; 155 } 156 157 static inline xfs_fileoff_t 158 xfs_iomap_end_fsb( 159 struct xfs_mount *mp, 160 loff_t offset, 161 loff_t count) 162 { 163 ASSERT(offset <= mp->m_super->s_maxbytes); 164 return min(XFS_B_TO_FSB(mp, offset + count), 165 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); 166 } 167 168 static xfs_extlen_t 169 xfs_eof_alignment( 170 struct xfs_inode *ip) 171 { 172 struct xfs_mount *mp = ip->i_mount; 173 xfs_extlen_t align = 0; 174 175 if (!XFS_IS_REALTIME_INODE(ip)) { 176 /* 177 * Round up the allocation request to a stripe unit 178 * (m_dalign) boundary if the file size is >= stripe unit 179 * size, and we are allocating past the allocation eof. 180 * 181 * If mounted with the "-o swalloc" option the alignment is 182 * increased from the strip unit size to the stripe width. 183 */ 184 if (mp->m_swidth && xfs_has_swalloc(mp)) 185 align = mp->m_swidth; 186 else if (mp->m_dalign) 187 align = mp->m_dalign; 188 189 if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align)) 190 align = 0; 191 } 192 193 return align; 194 } 195 196 /* 197 * Check if last_fsb is outside the last extent, and if so grow it to the next 198 * stripe unit boundary. 199 */ 200 xfs_fileoff_t 201 xfs_iomap_eof_align_last_fsb( 202 struct xfs_inode *ip, 203 xfs_fileoff_t end_fsb) 204 { 205 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 206 xfs_extlen_t extsz = xfs_get_extsz_hint(ip); 207 xfs_extlen_t align = xfs_eof_alignment(ip); 208 struct xfs_bmbt_irec irec; 209 struct xfs_iext_cursor icur; 210 211 ASSERT(!xfs_need_iread_extents(ifp)); 212 213 /* 214 * Always round up the allocation request to the extent hint boundary. 215 */ 216 if (extsz) { 217 if (align) 218 align = roundup_64(align, extsz); 219 else 220 align = extsz; 221 } 222 223 if (align) { 224 xfs_fileoff_t aligned_end_fsb = roundup_64(end_fsb, align); 225 226 xfs_iext_last(ifp, &icur); 227 if (!xfs_iext_get_extent(ifp, &icur, &irec) || 228 aligned_end_fsb >= irec.br_startoff + irec.br_blockcount) 229 return aligned_end_fsb; 230 } 231 232 return end_fsb; 233 } 234 235 int 236 xfs_iomap_write_direct( 237 struct xfs_inode *ip, 238 xfs_fileoff_t offset_fsb, 239 xfs_fileoff_t count_fsb, 240 unsigned int flags, 241 struct xfs_bmbt_irec *imap, 242 u64 *seq) 243 { 244 struct xfs_mount *mp = ip->i_mount; 245 struct xfs_trans *tp; 246 xfs_filblks_t resaligned; 247 int nimaps; 248 unsigned int dblocks, rblocks; 249 bool force = false; 250 int error; 251 int bmapi_flags = XFS_BMAPI_PREALLOC; 252 int nr_exts = XFS_IEXT_ADD_NOSPLIT_CNT; 253 254 ASSERT(count_fsb > 0); 255 256 resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, 257 xfs_get_extsz_hint(ip)); 258 if (unlikely(XFS_IS_REALTIME_INODE(ip))) { 259 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 260 rblocks = resaligned; 261 } else { 262 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 263 rblocks = 0; 264 } 265 266 error = xfs_qm_dqattach(ip); 267 if (error) 268 return error; 269 270 /* 271 * For DAX, we do not allocate unwritten extents, but instead we zero 272 * the block before we commit the transaction. Ideally we'd like to do 273 * this outside the transaction context, but if we commit and then crash 274 * we may not have zeroed the blocks and this will be exposed on 275 * recovery of the allocation. Hence we must zero before commit. 276 * 277 * Further, if we are mapping unwritten extents here, we need to zero 278 * and convert them to written so that we don't need an unwritten extent 279 * callback for DAX. This also means that we need to be able to dip into 280 * the reserve block pool for bmbt block allocation if there is no space 281 * left but we need to do unwritten extent conversion. 282 */ 283 if (flags & IOMAP_DAX) { 284 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 285 if (imap->br_state == XFS_EXT_UNWRITTEN) { 286 force = true; 287 nr_exts = XFS_IEXT_WRITE_UNWRITTEN_CNT; 288 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 289 } 290 } 291 292 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks, 293 rblocks, force, &tp); 294 if (error) 295 return error; 296 297 error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, nr_exts); 298 if (error == -EFBIG) 299 error = xfs_iext_count_upgrade(tp, ip, nr_exts); 300 if (error) 301 goto out_trans_cancel; 302 303 /* 304 * From this point onwards we overwrite the imap pointer that the 305 * caller gave to us. 306 */ 307 nimaps = 1; 308 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0, 309 imap, &nimaps); 310 if (error) 311 goto out_trans_cancel; 312 313 /* 314 * Complete the transaction 315 */ 316 error = xfs_trans_commit(tp); 317 if (error) 318 goto out_unlock; 319 320 /* 321 * Copy any maps to caller's array and return any error. 322 */ 323 if (nimaps == 0) { 324 error = -ENOSPC; 325 goto out_unlock; 326 } 327 328 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) 329 error = xfs_alert_fsblock_zero(ip, imap); 330 331 out_unlock: 332 *seq = xfs_iomap_inode_sequence(ip, 0); 333 xfs_iunlock(ip, XFS_ILOCK_EXCL); 334 return error; 335 336 out_trans_cancel: 337 xfs_trans_cancel(tp); 338 goto out_unlock; 339 } 340 341 STATIC bool 342 xfs_quota_need_throttle( 343 struct xfs_inode *ip, 344 xfs_dqtype_t type, 345 xfs_fsblock_t alloc_blocks) 346 { 347 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 348 349 if (!dq || !xfs_this_quota_on(ip->i_mount, type)) 350 return false; 351 352 /* no hi watermark, no throttle */ 353 if (!dq->q_prealloc_hi_wmark) 354 return false; 355 356 /* under the lo watermark, no throttle */ 357 if (dq->q_blk.reserved + alloc_blocks < dq->q_prealloc_lo_wmark) 358 return false; 359 360 return true; 361 } 362 363 STATIC void 364 xfs_quota_calc_throttle( 365 struct xfs_inode *ip, 366 xfs_dqtype_t type, 367 xfs_fsblock_t *qblocks, 368 int *qshift, 369 int64_t *qfreesp) 370 { 371 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 372 int64_t freesp; 373 int shift = 0; 374 375 /* no dq, or over hi wmark, squash the prealloc completely */ 376 if (!dq || dq->q_blk.reserved >= dq->q_prealloc_hi_wmark) { 377 *qblocks = 0; 378 *qfreesp = 0; 379 return; 380 } 381 382 freesp = dq->q_prealloc_hi_wmark - dq->q_blk.reserved; 383 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) { 384 shift = 2; 385 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT]) 386 shift += 2; 387 if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT]) 388 shift += 2; 389 } 390 391 if (freesp < *qfreesp) 392 *qfreesp = freesp; 393 394 /* only overwrite the throttle values if we are more aggressive */ 395 if ((freesp >> shift) < (*qblocks >> *qshift)) { 396 *qblocks = freesp; 397 *qshift = shift; 398 } 399 } 400 401 /* 402 * If we don't have a user specified preallocation size, dynamically increase 403 * the preallocation size as the size of the file grows. Cap the maximum size 404 * at a single extent or less if the filesystem is near full. The closer the 405 * filesystem is to being full, the smaller the maximum preallocation. 406 */ 407 STATIC xfs_fsblock_t 408 xfs_iomap_prealloc_size( 409 struct xfs_inode *ip, 410 int whichfork, 411 loff_t offset, 412 loff_t count, 413 struct xfs_iext_cursor *icur) 414 { 415 struct xfs_iext_cursor ncur = *icur; 416 struct xfs_bmbt_irec prev, got; 417 struct xfs_mount *mp = ip->i_mount; 418 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 419 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 420 int64_t freesp; 421 xfs_fsblock_t qblocks; 422 xfs_fsblock_t alloc_blocks = 0; 423 xfs_extlen_t plen; 424 int shift = 0; 425 int qshift = 0; 426 427 /* 428 * As an exception we don't do any preallocation at all if the file is 429 * smaller than the minimum preallocation and we are using the default 430 * dynamic preallocation scheme, as it is likely this is the only write 431 * to the file that is going to be done. 432 */ 433 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks)) 434 return 0; 435 436 /* 437 * Use the minimum preallocation size for small files or if we are 438 * writing right after a hole. 439 */ 440 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 441 !xfs_iext_prev_extent(ifp, &ncur, &prev) || 442 prev.br_startoff + prev.br_blockcount < offset_fsb) 443 return mp->m_allocsize_blocks; 444 445 /* 446 * Take the size of the preceding data extents as the basis for the 447 * preallocation size. Note that we don't care if the previous extents 448 * are written or not. 449 */ 450 plen = prev.br_blockcount; 451 while (xfs_iext_prev_extent(ifp, &ncur, &got)) { 452 if (plen > XFS_MAX_BMBT_EXTLEN / 2 || 453 isnullstartblock(got.br_startblock) || 454 got.br_startoff + got.br_blockcount != prev.br_startoff || 455 got.br_startblock + got.br_blockcount != prev.br_startblock) 456 break; 457 plen += got.br_blockcount; 458 prev = got; 459 } 460 461 /* 462 * If the size of the extents is greater than half the maximum extent 463 * length, then use the current offset as the basis. This ensures that 464 * for large files the preallocation size always extends to 465 * XFS_BMBT_MAX_EXTLEN rather than falling short due to things like stripe 466 * unit/width alignment of real extents. 467 */ 468 alloc_blocks = plen * 2; 469 if (alloc_blocks > XFS_MAX_BMBT_EXTLEN) 470 alloc_blocks = XFS_B_TO_FSB(mp, offset); 471 qblocks = alloc_blocks; 472 473 /* 474 * XFS_BMBT_MAX_EXTLEN is not a power of two value but we round the prealloc 475 * down to the nearest power of two value after throttling. To prevent 476 * the round down from unconditionally reducing the maximum supported 477 * prealloc size, we round up first, apply appropriate throttling, round 478 * down and cap the value to XFS_BMBT_MAX_EXTLEN. 479 */ 480 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(XFS_MAX_BMBT_EXTLEN), 481 alloc_blocks); 482 483 freesp = percpu_counter_read_positive(&mp->m_fdblocks); 484 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 485 shift = 2; 486 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 487 shift++; 488 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) 489 shift++; 490 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) 491 shift++; 492 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) 493 shift++; 494 } 495 496 /* 497 * Check each quota to cap the prealloc size, provide a shift value to 498 * throttle with and adjust amount of available space. 499 */ 500 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_USER, alloc_blocks)) 501 xfs_quota_calc_throttle(ip, XFS_DQTYPE_USER, &qblocks, &qshift, 502 &freesp); 503 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_GROUP, alloc_blocks)) 504 xfs_quota_calc_throttle(ip, XFS_DQTYPE_GROUP, &qblocks, &qshift, 505 &freesp); 506 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_PROJ, alloc_blocks)) 507 xfs_quota_calc_throttle(ip, XFS_DQTYPE_PROJ, &qblocks, &qshift, 508 &freesp); 509 510 /* 511 * The final prealloc size is set to the minimum of free space available 512 * in each of the quotas and the overall filesystem. 513 * 514 * The shift throttle value is set to the maximum value as determined by 515 * the global low free space values and per-quota low free space values. 516 */ 517 alloc_blocks = min(alloc_blocks, qblocks); 518 shift = max(shift, qshift); 519 520 if (shift) 521 alloc_blocks >>= shift; 522 /* 523 * rounddown_pow_of_two() returns an undefined result if we pass in 524 * alloc_blocks = 0. 525 */ 526 if (alloc_blocks) 527 alloc_blocks = rounddown_pow_of_two(alloc_blocks); 528 if (alloc_blocks > XFS_MAX_BMBT_EXTLEN) 529 alloc_blocks = XFS_MAX_BMBT_EXTLEN; 530 531 /* 532 * If we are still trying to allocate more space than is 533 * available, squash the prealloc hard. This can happen if we 534 * have a large file on a small filesystem and the above 535 * lowspace thresholds are smaller than XFS_BMBT_MAX_EXTLEN. 536 */ 537 while (alloc_blocks && alloc_blocks >= freesp) 538 alloc_blocks >>= 4; 539 if (alloc_blocks < mp->m_allocsize_blocks) 540 alloc_blocks = mp->m_allocsize_blocks; 541 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, 542 mp->m_allocsize_blocks); 543 return alloc_blocks; 544 } 545 546 int 547 xfs_iomap_write_unwritten( 548 xfs_inode_t *ip, 549 xfs_off_t offset, 550 xfs_off_t count, 551 bool update_isize) 552 { 553 xfs_mount_t *mp = ip->i_mount; 554 xfs_fileoff_t offset_fsb; 555 xfs_filblks_t count_fsb; 556 xfs_filblks_t numblks_fsb; 557 int nimaps; 558 xfs_trans_t *tp; 559 xfs_bmbt_irec_t imap; 560 struct inode *inode = VFS_I(ip); 561 xfs_fsize_t i_size; 562 uint resblks; 563 int error; 564 565 trace_xfs_unwritten_convert(ip, offset, count); 566 567 offset_fsb = XFS_B_TO_FSBT(mp, offset); 568 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 569 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 570 571 /* 572 * Reserve enough blocks in this transaction for two complete extent 573 * btree splits. We may be converting the middle part of an unwritten 574 * extent and in this case we will insert two new extents in the btree 575 * each of which could cause a full split. 576 * 577 * This reservation amount will be used in the first call to 578 * xfs_bmbt_split() to select an AG with enough space to satisfy the 579 * rest of the operation. 580 */ 581 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 582 583 /* Attach dquots so that bmbt splits are accounted correctly. */ 584 error = xfs_qm_dqattach(ip); 585 if (error) 586 return error; 587 588 do { 589 /* 590 * Set up a transaction to convert the range of extents 591 * from unwritten to real. Do allocations in a loop until 592 * we have covered the range passed in. 593 * 594 * Note that we can't risk to recursing back into the filesystem 595 * here as we might be asked to write out the same inode that we 596 * complete here and might deadlock on the iolock. 597 */ 598 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 599 0, true, &tp); 600 if (error) 601 return error; 602 603 error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, 604 XFS_IEXT_WRITE_UNWRITTEN_CNT); 605 if (error == -EFBIG) 606 error = xfs_iext_count_upgrade(tp, ip, 607 XFS_IEXT_WRITE_UNWRITTEN_CNT); 608 if (error) 609 goto error_on_bmapi_transaction; 610 611 /* 612 * Modify the unwritten extent state of the buffer. 613 */ 614 nimaps = 1; 615 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 616 XFS_BMAPI_CONVERT, resblks, &imap, 617 &nimaps); 618 if (error) 619 goto error_on_bmapi_transaction; 620 621 /* 622 * Log the updated inode size as we go. We have to be careful 623 * to only log it up to the actual write offset if it is 624 * halfway into a block. 625 */ 626 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 627 if (i_size > offset + count) 628 i_size = offset + count; 629 if (update_isize && i_size > i_size_read(inode)) 630 i_size_write(inode, i_size); 631 i_size = xfs_new_eof(ip, i_size); 632 if (i_size) { 633 ip->i_disk_size = i_size; 634 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 635 } 636 637 error = xfs_trans_commit(tp); 638 xfs_iunlock(ip, XFS_ILOCK_EXCL); 639 if (error) 640 return error; 641 642 if (unlikely(!xfs_valid_startblock(ip, imap.br_startblock))) 643 return xfs_alert_fsblock_zero(ip, &imap); 644 645 if ((numblks_fsb = imap.br_blockcount) == 0) { 646 /* 647 * The numblks_fsb value should always get 648 * smaller, otherwise the loop is stuck. 649 */ 650 ASSERT(imap.br_blockcount); 651 break; 652 } 653 offset_fsb += numblks_fsb; 654 count_fsb -= numblks_fsb; 655 } while (count_fsb > 0); 656 657 return 0; 658 659 error_on_bmapi_transaction: 660 xfs_trans_cancel(tp); 661 xfs_iunlock(ip, XFS_ILOCK_EXCL); 662 return error; 663 } 664 665 static inline bool 666 imap_needs_alloc( 667 struct inode *inode, 668 unsigned flags, 669 struct xfs_bmbt_irec *imap, 670 int nimaps) 671 { 672 /* don't allocate blocks when just zeroing */ 673 if (flags & IOMAP_ZERO) 674 return false; 675 if (!nimaps || 676 imap->br_startblock == HOLESTARTBLOCK || 677 imap->br_startblock == DELAYSTARTBLOCK) 678 return true; 679 /* we convert unwritten extents before copying the data for DAX */ 680 if ((flags & IOMAP_DAX) && imap->br_state == XFS_EXT_UNWRITTEN) 681 return true; 682 return false; 683 } 684 685 static inline bool 686 imap_needs_cow( 687 struct xfs_inode *ip, 688 unsigned int flags, 689 struct xfs_bmbt_irec *imap, 690 int nimaps) 691 { 692 if (!xfs_is_cow_inode(ip)) 693 return false; 694 695 /* when zeroing we don't have to COW holes or unwritten extents */ 696 if (flags & IOMAP_ZERO) { 697 if (!nimaps || 698 imap->br_startblock == HOLESTARTBLOCK || 699 imap->br_state == XFS_EXT_UNWRITTEN) 700 return false; 701 } 702 703 return true; 704 } 705 706 static int 707 xfs_ilock_for_iomap( 708 struct xfs_inode *ip, 709 unsigned flags, 710 unsigned *lockmode) 711 { 712 unsigned int mode = *lockmode; 713 bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO); 714 715 /* 716 * COW writes may allocate delalloc space or convert unwritten COW 717 * extents, so we need to make sure to take the lock exclusively here. 718 */ 719 if (xfs_is_cow_inode(ip) && is_write) 720 mode = XFS_ILOCK_EXCL; 721 722 /* 723 * Extents not yet cached requires exclusive access, don't block. This 724 * is an opencoded xfs_ilock_data_map_shared() call but with 725 * non-blocking behaviour. 726 */ 727 if (xfs_need_iread_extents(&ip->i_df)) { 728 if (flags & IOMAP_NOWAIT) 729 return -EAGAIN; 730 mode = XFS_ILOCK_EXCL; 731 } 732 733 relock: 734 if (flags & IOMAP_NOWAIT) { 735 if (!xfs_ilock_nowait(ip, mode)) 736 return -EAGAIN; 737 } else { 738 xfs_ilock(ip, mode); 739 } 740 741 /* 742 * The reflink iflag could have changed since the earlier unlocked 743 * check, so if we got ILOCK_SHARED for a write and but we're now a 744 * reflink inode we have to switch to ILOCK_EXCL and relock. 745 */ 746 if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) { 747 xfs_iunlock(ip, mode); 748 mode = XFS_ILOCK_EXCL; 749 goto relock; 750 } 751 752 *lockmode = mode; 753 return 0; 754 } 755 756 /* 757 * Check that the imap we are going to return to the caller spans the entire 758 * range that the caller requested for the IO. 759 */ 760 static bool 761 imap_spans_range( 762 struct xfs_bmbt_irec *imap, 763 xfs_fileoff_t offset_fsb, 764 xfs_fileoff_t end_fsb) 765 { 766 if (imap->br_startoff > offset_fsb) 767 return false; 768 if (imap->br_startoff + imap->br_blockcount < end_fsb) 769 return false; 770 return true; 771 } 772 773 static int 774 xfs_direct_write_iomap_begin( 775 struct inode *inode, 776 loff_t offset, 777 loff_t length, 778 unsigned flags, 779 struct iomap *iomap, 780 struct iomap *srcmap) 781 { 782 struct xfs_inode *ip = XFS_I(inode); 783 struct xfs_mount *mp = ip->i_mount; 784 struct xfs_bmbt_irec imap, cmap; 785 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 786 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 787 int nimaps = 1, error = 0; 788 bool shared = false; 789 u16 iomap_flags = 0; 790 unsigned int lockmode = XFS_ILOCK_SHARED; 791 u64 seq; 792 793 ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); 794 795 if (xfs_is_shutdown(mp)) 796 return -EIO; 797 798 /* 799 * Writes that span EOF might trigger an IO size update on completion, 800 * so consider them to be dirty for the purposes of O_DSYNC even if 801 * there is no other metadata changes pending or have been made here. 802 */ 803 if (offset + length > i_size_read(inode)) 804 iomap_flags |= IOMAP_F_DIRTY; 805 806 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 807 if (error) 808 return error; 809 810 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 811 &nimaps, 0); 812 if (error) 813 goto out_unlock; 814 815 if (imap_needs_cow(ip, flags, &imap, nimaps)) { 816 error = -EAGAIN; 817 if (flags & IOMAP_NOWAIT) 818 goto out_unlock; 819 820 /* may drop and re-acquire the ilock */ 821 error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, 822 &lockmode, 823 (flags & IOMAP_DIRECT) || IS_DAX(inode)); 824 if (error) 825 goto out_unlock; 826 if (shared) 827 goto out_found_cow; 828 end_fsb = imap.br_startoff + imap.br_blockcount; 829 length = XFS_FSB_TO_B(mp, end_fsb) - offset; 830 } 831 832 if (imap_needs_alloc(inode, flags, &imap, nimaps)) 833 goto allocate_blocks; 834 835 /* 836 * NOWAIT and OVERWRITE I/O needs to span the entire requested I/O with 837 * a single map so that we avoid partial IO failures due to the rest of 838 * the I/O range not covered by this map triggering an EAGAIN condition 839 * when it is subsequently mapped and aborting the I/O. 840 */ 841 if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) { 842 error = -EAGAIN; 843 if (!imap_spans_range(&imap, offset_fsb, end_fsb)) 844 goto out_unlock; 845 } 846 847 /* 848 * For overwrite only I/O, we cannot convert unwritten extents without 849 * requiring sub-block zeroing. This can only be done under an 850 * exclusive IOLOCK, hence return -EAGAIN if this is not a written 851 * extent to tell the caller to try again. 852 */ 853 if (flags & IOMAP_OVERWRITE_ONLY) { 854 error = -EAGAIN; 855 if (imap.br_state != XFS_EXT_NORM && 856 ((offset | length) & mp->m_blockmask)) 857 goto out_unlock; 858 } 859 860 seq = xfs_iomap_inode_sequence(ip, iomap_flags); 861 xfs_iunlock(ip, lockmode); 862 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); 863 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); 864 865 allocate_blocks: 866 error = -EAGAIN; 867 if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) 868 goto out_unlock; 869 870 /* 871 * We cap the maximum length we map to a sane size to keep the chunks 872 * of work done where somewhat symmetric with the work writeback does. 873 * This is a completely arbitrary number pulled out of thin air as a 874 * best guess for initial testing. 875 * 876 * Note that the values needs to be less than 32-bits wide until the 877 * lower level functions are updated. 878 */ 879 length = min_t(loff_t, length, 1024 * PAGE_SIZE); 880 end_fsb = xfs_iomap_end_fsb(mp, offset, length); 881 882 if (offset + length > XFS_ISIZE(ip)) 883 end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); 884 else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) 885 end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); 886 xfs_iunlock(ip, lockmode); 887 888 error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, 889 flags, &imap, &seq); 890 if (error) 891 return error; 892 893 trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); 894 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 895 iomap_flags | IOMAP_F_NEW, seq); 896 897 out_found_cow: 898 length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); 899 trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); 900 if (imap.br_startblock != HOLESTARTBLOCK) { 901 seq = xfs_iomap_inode_sequence(ip, 0); 902 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); 903 if (error) 904 goto out_unlock; 905 } 906 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 907 xfs_iunlock(ip, lockmode); 908 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); 909 910 out_unlock: 911 if (lockmode) 912 xfs_iunlock(ip, lockmode); 913 return error; 914 } 915 916 const struct iomap_ops xfs_direct_write_iomap_ops = { 917 .iomap_begin = xfs_direct_write_iomap_begin, 918 }; 919 920 static int 921 xfs_dax_write_iomap_end( 922 struct inode *inode, 923 loff_t pos, 924 loff_t length, 925 ssize_t written, 926 unsigned flags, 927 struct iomap *iomap) 928 { 929 struct xfs_inode *ip = XFS_I(inode); 930 931 if (!xfs_is_cow_inode(ip)) 932 return 0; 933 934 if (!written) { 935 xfs_reflink_cancel_cow_range(ip, pos, length, true); 936 return 0; 937 } 938 939 return xfs_reflink_end_cow(ip, pos, written); 940 } 941 942 const struct iomap_ops xfs_dax_write_iomap_ops = { 943 .iomap_begin = xfs_direct_write_iomap_begin, 944 .iomap_end = xfs_dax_write_iomap_end, 945 }; 946 947 static int 948 xfs_buffered_write_iomap_begin( 949 struct inode *inode, 950 loff_t offset, 951 loff_t count, 952 unsigned flags, 953 struct iomap *iomap, 954 struct iomap *srcmap) 955 { 956 struct xfs_inode *ip = XFS_I(inode); 957 struct xfs_mount *mp = ip->i_mount; 958 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 959 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count); 960 struct xfs_bmbt_irec imap, cmap; 961 struct xfs_iext_cursor icur, ccur; 962 xfs_fsblock_t prealloc_blocks = 0; 963 bool eof = false, cow_eof = false, shared = false; 964 int allocfork = XFS_DATA_FORK; 965 int error = 0; 966 unsigned int lockmode = XFS_ILOCK_EXCL; 967 u64 seq; 968 969 if (xfs_is_shutdown(mp)) 970 return -EIO; 971 972 /* we can't use delayed allocations when using extent size hints */ 973 if (xfs_get_extsz_hint(ip)) 974 return xfs_direct_write_iomap_begin(inode, offset, count, 975 flags, iomap, srcmap); 976 977 ASSERT(!XFS_IS_REALTIME_INODE(ip)); 978 979 error = xfs_qm_dqattach(ip); 980 if (error) 981 return error; 982 983 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 984 if (error) 985 return error; 986 987 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) || 988 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 989 error = -EFSCORRUPTED; 990 goto out_unlock; 991 } 992 993 XFS_STATS_INC(mp, xs_blk_mapw); 994 995 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 996 if (error) 997 goto out_unlock; 998 999 /* 1000 * Search the data fork first to look up our source mapping. We 1001 * always need the data fork map, as we have to return it to the 1002 * iomap code so that the higher level write code can read data in to 1003 * perform read-modify-write cycles for unaligned writes. 1004 */ 1005 eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); 1006 if (eof) 1007 imap.br_startoff = end_fsb; /* fake hole until the end */ 1008 1009 /* We never need to allocate blocks for zeroing a hole. */ 1010 if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { 1011 xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); 1012 goto out_unlock; 1013 } 1014 1015 /* 1016 * Search the COW fork extent list even if we did not find a data fork 1017 * extent. This serves two purposes: first this implements the 1018 * speculative preallocation using cowextsize, so that we also unshare 1019 * block adjacent to shared blocks instead of just the shared blocks 1020 * themselves. Second the lookup in the extent list is generally faster 1021 * than going out to the shared extent tree. 1022 */ 1023 if (xfs_is_cow_inode(ip)) { 1024 if (!ip->i_cowfp) { 1025 ASSERT(!xfs_is_reflink_inode(ip)); 1026 xfs_ifork_init_cow(ip); 1027 } 1028 cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, 1029 &ccur, &cmap); 1030 if (!cow_eof && cmap.br_startoff <= offset_fsb) { 1031 trace_xfs_reflink_cow_found(ip, &cmap); 1032 goto found_cow; 1033 } 1034 } 1035 1036 if (imap.br_startoff <= offset_fsb) { 1037 /* 1038 * For reflink files we may need a delalloc reservation when 1039 * overwriting shared extents. This includes zeroing of 1040 * existing extents that contain data. 1041 */ 1042 if (!xfs_is_cow_inode(ip) || 1043 ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { 1044 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, 1045 &imap); 1046 goto found_imap; 1047 } 1048 1049 xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); 1050 1051 /* Trim the mapping to the nearest shared extent boundary. */ 1052 error = xfs_bmap_trim_cow(ip, &imap, &shared); 1053 if (error) 1054 goto out_unlock; 1055 1056 /* Not shared? Just report the (potentially capped) extent. */ 1057 if (!shared) { 1058 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, 1059 &imap); 1060 goto found_imap; 1061 } 1062 1063 /* 1064 * Fork all the shared blocks from our write offset until the 1065 * end of the extent. 1066 */ 1067 allocfork = XFS_COW_FORK; 1068 end_fsb = imap.br_startoff + imap.br_blockcount; 1069 } else { 1070 /* 1071 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES 1072 * pages to keep the chunks of work done where somewhat 1073 * symmetric with the work writeback does. This is a completely 1074 * arbitrary number pulled out of thin air. 1075 * 1076 * Note that the values needs to be less than 32-bits wide until 1077 * the lower level functions are updated. 1078 */ 1079 count = min_t(loff_t, count, 1024 * PAGE_SIZE); 1080 end_fsb = xfs_iomap_end_fsb(mp, offset, count); 1081 1082 if (xfs_is_always_cow_inode(ip)) 1083 allocfork = XFS_COW_FORK; 1084 } 1085 1086 if (eof && offset + count > XFS_ISIZE(ip)) { 1087 /* 1088 * Determine the initial size of the preallocation. 1089 * We clean up any extra preallocation when the file is closed. 1090 */ 1091 if (xfs_has_allocsize(mp)) 1092 prealloc_blocks = mp->m_allocsize_blocks; 1093 else if (allocfork == XFS_DATA_FORK) 1094 prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, 1095 offset, count, &icur); 1096 else 1097 prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, 1098 offset, count, &ccur); 1099 if (prealloc_blocks) { 1100 xfs_extlen_t align; 1101 xfs_off_t end_offset; 1102 xfs_fileoff_t p_end_fsb; 1103 1104 end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1); 1105 p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + 1106 prealloc_blocks; 1107 1108 align = xfs_eof_alignment(ip); 1109 if (align) 1110 p_end_fsb = roundup_64(p_end_fsb, align); 1111 1112 p_end_fsb = min(p_end_fsb, 1113 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); 1114 ASSERT(p_end_fsb > offset_fsb); 1115 prealloc_blocks = p_end_fsb - end_fsb; 1116 } 1117 } 1118 1119 retry: 1120 error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, 1121 end_fsb - offset_fsb, prealloc_blocks, 1122 allocfork == XFS_DATA_FORK ? &imap : &cmap, 1123 allocfork == XFS_DATA_FORK ? &icur : &ccur, 1124 allocfork == XFS_DATA_FORK ? eof : cow_eof); 1125 switch (error) { 1126 case 0: 1127 break; 1128 case -ENOSPC: 1129 case -EDQUOT: 1130 /* retry without any preallocation */ 1131 trace_xfs_delalloc_enospc(ip, offset, count); 1132 if (prealloc_blocks) { 1133 prealloc_blocks = 0; 1134 goto retry; 1135 } 1136 fallthrough; 1137 default: 1138 goto out_unlock; 1139 } 1140 1141 if (allocfork == XFS_COW_FORK) { 1142 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); 1143 goto found_cow; 1144 } 1145 1146 /* 1147 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch 1148 * them out if the write happens to fail. 1149 */ 1150 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); 1151 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1152 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); 1153 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); 1154 1155 found_imap: 1156 seq = xfs_iomap_inode_sequence(ip, 0); 1157 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1158 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); 1159 1160 found_cow: 1161 seq = xfs_iomap_inode_sequence(ip, 0); 1162 if (imap.br_startoff <= offset_fsb) { 1163 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); 1164 if (error) 1165 goto out_unlock; 1166 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 1167 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1168 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 1169 IOMAP_F_SHARED, seq); 1170 } 1171 1172 xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); 1173 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1174 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); 1175 1176 out_unlock: 1177 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1178 return error; 1179 } 1180 1181 static int 1182 xfs_buffered_write_delalloc_punch( 1183 struct inode *inode, 1184 loff_t offset, 1185 loff_t length) 1186 { 1187 return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, 1188 offset + length); 1189 } 1190 1191 static int 1192 xfs_buffered_write_iomap_end( 1193 struct inode *inode, 1194 loff_t offset, 1195 loff_t length, 1196 ssize_t written, 1197 unsigned flags, 1198 struct iomap *iomap) 1199 { 1200 1201 struct xfs_mount *mp = XFS_M(inode->i_sb); 1202 int error; 1203 1204 error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, 1205 length, written, &xfs_buffered_write_delalloc_punch); 1206 if (error && !xfs_is_shutdown(mp)) { 1207 xfs_alert(mp, "%s: unable to clean up ino 0x%llx", 1208 __func__, XFS_I(inode)->i_ino); 1209 return error; 1210 } 1211 return 0; 1212 } 1213 1214 const struct iomap_ops xfs_buffered_write_iomap_ops = { 1215 .iomap_begin = xfs_buffered_write_iomap_begin, 1216 .iomap_end = xfs_buffered_write_iomap_end, 1217 }; 1218 1219 /* 1220 * iomap_page_mkwrite() will never fail in a way that requires delalloc extents 1221 * that it allocated to be revoked. Hence we do not need an .iomap_end method 1222 * for this operation. 1223 */ 1224 const struct iomap_ops xfs_page_mkwrite_iomap_ops = { 1225 .iomap_begin = xfs_buffered_write_iomap_begin, 1226 }; 1227 1228 static int 1229 xfs_read_iomap_begin( 1230 struct inode *inode, 1231 loff_t offset, 1232 loff_t length, 1233 unsigned flags, 1234 struct iomap *iomap, 1235 struct iomap *srcmap) 1236 { 1237 struct xfs_inode *ip = XFS_I(inode); 1238 struct xfs_mount *mp = ip->i_mount; 1239 struct xfs_bmbt_irec imap; 1240 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1241 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 1242 int nimaps = 1, error = 0; 1243 bool shared = false; 1244 unsigned int lockmode = XFS_ILOCK_SHARED; 1245 u64 seq; 1246 1247 ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); 1248 1249 if (xfs_is_shutdown(mp)) 1250 return -EIO; 1251 1252 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 1253 if (error) 1254 return error; 1255 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1256 &nimaps, 0); 1257 if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode))) 1258 error = xfs_reflink_trim_around_shared(ip, &imap, &shared); 1259 seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); 1260 xfs_iunlock(ip, lockmode); 1261 1262 if (error) 1263 return error; 1264 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); 1265 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 1266 shared ? IOMAP_F_SHARED : 0, seq); 1267 } 1268 1269 const struct iomap_ops xfs_read_iomap_ops = { 1270 .iomap_begin = xfs_read_iomap_begin, 1271 }; 1272 1273 static int 1274 xfs_seek_iomap_begin( 1275 struct inode *inode, 1276 loff_t offset, 1277 loff_t length, 1278 unsigned flags, 1279 struct iomap *iomap, 1280 struct iomap *srcmap) 1281 { 1282 struct xfs_inode *ip = XFS_I(inode); 1283 struct xfs_mount *mp = ip->i_mount; 1284 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1285 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1286 xfs_fileoff_t cow_fsb = NULLFILEOFF, data_fsb = NULLFILEOFF; 1287 struct xfs_iext_cursor icur; 1288 struct xfs_bmbt_irec imap, cmap; 1289 int error = 0; 1290 unsigned lockmode; 1291 u64 seq; 1292 1293 if (xfs_is_shutdown(mp)) 1294 return -EIO; 1295 1296 lockmode = xfs_ilock_data_map_shared(ip); 1297 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 1298 if (error) 1299 goto out_unlock; 1300 1301 if (xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) { 1302 /* 1303 * If we found a data extent we are done. 1304 */ 1305 if (imap.br_startoff <= offset_fsb) 1306 goto done; 1307 data_fsb = imap.br_startoff; 1308 } else { 1309 /* 1310 * Fake a hole until the end of the file. 1311 */ 1312 data_fsb = xfs_iomap_end_fsb(mp, offset, length); 1313 } 1314 1315 /* 1316 * If a COW fork extent covers the hole, report it - capped to the next 1317 * data fork extent: 1318 */ 1319 if (xfs_inode_has_cow_data(ip) && 1320 xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) 1321 cow_fsb = cmap.br_startoff; 1322 if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { 1323 if (data_fsb < cow_fsb + cmap.br_blockcount) 1324 end_fsb = min(end_fsb, data_fsb); 1325 xfs_trim_extent(&cmap, offset_fsb, end_fsb); 1326 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 1327 error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 1328 IOMAP_F_SHARED, seq); 1329 /* 1330 * This is a COW extent, so we must probe the page cache 1331 * because there could be dirty page cache being backed 1332 * by this extent. 1333 */ 1334 iomap->type = IOMAP_UNWRITTEN; 1335 goto out_unlock; 1336 } 1337 1338 /* 1339 * Else report a hole, capped to the next found data or COW extent. 1340 */ 1341 if (cow_fsb != NULLFILEOFF && cow_fsb < data_fsb) 1342 imap.br_blockcount = cow_fsb - offset_fsb; 1343 else 1344 imap.br_blockcount = data_fsb - offset_fsb; 1345 imap.br_startoff = offset_fsb; 1346 imap.br_startblock = HOLESTARTBLOCK; 1347 imap.br_state = XFS_EXT_NORM; 1348 done: 1349 seq = xfs_iomap_inode_sequence(ip, 0); 1350 xfs_trim_extent(&imap, offset_fsb, end_fsb); 1351 error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); 1352 out_unlock: 1353 xfs_iunlock(ip, lockmode); 1354 return error; 1355 } 1356 1357 const struct iomap_ops xfs_seek_iomap_ops = { 1358 .iomap_begin = xfs_seek_iomap_begin, 1359 }; 1360 1361 static int 1362 xfs_xattr_iomap_begin( 1363 struct inode *inode, 1364 loff_t offset, 1365 loff_t length, 1366 unsigned flags, 1367 struct iomap *iomap, 1368 struct iomap *srcmap) 1369 { 1370 struct xfs_inode *ip = XFS_I(inode); 1371 struct xfs_mount *mp = ip->i_mount; 1372 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1373 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1374 struct xfs_bmbt_irec imap; 1375 int nimaps = 1, error = 0; 1376 unsigned lockmode; 1377 int seq; 1378 1379 if (xfs_is_shutdown(mp)) 1380 return -EIO; 1381 1382 lockmode = xfs_ilock_attr_map_shared(ip); 1383 1384 /* if there are no attribute fork or extents, return ENOENT */ 1385 if (!xfs_inode_has_attr_fork(ip) || !ip->i_af.if_nextents) { 1386 error = -ENOENT; 1387 goto out_unlock; 1388 } 1389 1390 ASSERT(ip->i_af.if_format != XFS_DINODE_FMT_LOCAL); 1391 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1392 &nimaps, XFS_BMAPI_ATTRFORK); 1393 out_unlock: 1394 1395 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR); 1396 xfs_iunlock(ip, lockmode); 1397 1398 if (error) 1399 return error; 1400 ASSERT(nimaps); 1401 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq); 1402 } 1403 1404 const struct iomap_ops xfs_xattr_iomap_ops = { 1405 .iomap_begin = xfs_xattr_iomap_begin, 1406 }; 1407 1408 int 1409 xfs_zero_range( 1410 struct xfs_inode *ip, 1411 loff_t pos, 1412 loff_t len, 1413 bool *did_zero) 1414 { 1415 struct inode *inode = VFS_I(ip); 1416 1417 if (IS_DAX(inode)) 1418 return dax_zero_range(inode, pos, len, did_zero, 1419 &xfs_dax_write_iomap_ops); 1420 return iomap_zero_range(inode, pos, len, did_zero, 1421 &xfs_buffered_write_iomap_ops); 1422 } 1423 1424 int 1425 xfs_truncate_page( 1426 struct xfs_inode *ip, 1427 loff_t pos, 1428 bool *did_zero) 1429 { 1430 struct inode *inode = VFS_I(ip); 1431 1432 if (IS_DAX(inode)) 1433 return dax_truncate_page(inode, pos, did_zero, 1434 &xfs_dax_write_iomap_ops); 1435 return iomap_truncate_page(inode, pos, did_zero, 1436 &xfs_buffered_write_iomap_ops); 1437 } 1438