1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_da_format.h" 17 #include "xfs_da_btree.h" 18 #include "xfs_dir2.h" 19 #include "xfs_inode.h" 20 #include "xfs_btree.h" 21 #include "xfs_trans.h" 22 #include "xfs_inode_item.h" 23 #include "xfs_extfree_item.h" 24 #include "xfs_alloc.h" 25 #include "xfs_bmap.h" 26 #include "xfs_bmap_util.h" 27 #include "xfs_bmap_btree.h" 28 #include "xfs_rtalloc.h" 29 #include "xfs_errortag.h" 30 #include "xfs_error.h" 31 #include "xfs_quota.h" 32 #include "xfs_trans_space.h" 33 #include "xfs_buf_item.h" 34 #include "xfs_trace.h" 35 #include "xfs_symlink.h" 36 #include "xfs_attr_leaf.h" 37 #include "xfs_filestream.h" 38 #include "xfs_rmap.h" 39 #include "xfs_ag_resv.h" 40 #include "xfs_refcount.h" 41 #include "xfs_icache.h" 42 43 44 kmem_zone_t *xfs_bmap_free_item_zone; 45 46 /* 47 * Miscellaneous helper functions 48 */ 49 50 /* 51 * Compute and fill in the value of the maximum depth of a bmap btree 52 * in this filesystem. Done once, during mount. 53 */ 54 void 55 xfs_bmap_compute_maxlevels( 56 xfs_mount_t *mp, /* file system mount structure */ 57 int whichfork) /* data or attr fork */ 58 { 59 int level; /* btree level */ 60 uint maxblocks; /* max blocks at this level */ 61 uint maxleafents; /* max leaf entries possible */ 62 int maxrootrecs; /* max records in root block */ 63 int minleafrecs; /* min records in leaf block */ 64 int minnoderecs; /* min records in node block */ 65 int sz; /* root block size */ 66 67 /* 68 * The maximum number of extents in a file, hence the maximum 69 * number of leaf entries, is controlled by the type of di_nextents 70 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents 71 * (a signed 16-bit number, xfs_aextnum_t). 72 * 73 * Note that we can no longer assume that if we are in ATTR1 that 74 * the fork offset of all the inodes will be 75 * (xfs_default_attroffset(ip) >> 3) because we could have mounted 76 * with ATTR2 and then mounted back with ATTR1, keeping the 77 * di_forkoff's fixed but probably at various positions. Therefore, 78 * for both ATTR1 and ATTR2 we have to assume the worst case scenario 79 * of a minimum size available. 80 */ 81 if (whichfork == XFS_DATA_FORK) { 82 maxleafents = MAXEXTNUM; 83 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); 84 } else { 85 maxleafents = MAXAEXTNUM; 86 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); 87 } 88 maxrootrecs = xfs_bmdr_maxrecs(sz, 0); 89 minleafrecs = mp->m_bmap_dmnr[0]; 90 minnoderecs = mp->m_bmap_dmnr[1]; 91 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 92 for (level = 1; maxblocks > 1; level++) { 93 if (maxblocks <= maxrootrecs) 94 maxblocks = 1; 95 else 96 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 97 } 98 mp->m_bm_maxlevels[whichfork] = level; 99 } 100 101 STATIC int /* error */ 102 xfs_bmbt_lookup_eq( 103 struct xfs_btree_cur *cur, 104 struct xfs_bmbt_irec *irec, 105 int *stat) /* success/failure */ 106 { 107 cur->bc_rec.b = *irec; 108 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 109 } 110 111 STATIC int /* error */ 112 xfs_bmbt_lookup_first( 113 struct xfs_btree_cur *cur, 114 int *stat) /* success/failure */ 115 { 116 cur->bc_rec.b.br_startoff = 0; 117 cur->bc_rec.b.br_startblock = 0; 118 cur->bc_rec.b.br_blockcount = 0; 119 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 120 } 121 122 /* 123 * Check if the inode needs to be converted to btree format. 124 */ 125 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) 126 { 127 return whichfork != XFS_COW_FORK && 128 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 129 XFS_IFORK_NEXTENTS(ip, whichfork) > 130 XFS_IFORK_MAXEXT(ip, whichfork); 131 } 132 133 /* 134 * Check if the inode should be converted to extent format. 135 */ 136 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) 137 { 138 return whichfork != XFS_COW_FORK && 139 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && 140 XFS_IFORK_NEXTENTS(ip, whichfork) <= 141 XFS_IFORK_MAXEXT(ip, whichfork); 142 } 143 144 /* 145 * Update the record referred to by cur to the value given by irec 146 * This either works (return 0) or gets an EFSCORRUPTED error. 147 */ 148 STATIC int 149 xfs_bmbt_update( 150 struct xfs_btree_cur *cur, 151 struct xfs_bmbt_irec *irec) 152 { 153 union xfs_btree_rec rec; 154 155 xfs_bmbt_disk_set_all(&rec.bmbt, irec); 156 return xfs_btree_update(cur, &rec); 157 } 158 159 /* 160 * Compute the worst-case number of indirect blocks that will be used 161 * for ip's delayed extent of length "len". 162 */ 163 STATIC xfs_filblks_t 164 xfs_bmap_worst_indlen( 165 xfs_inode_t *ip, /* incore inode pointer */ 166 xfs_filblks_t len) /* delayed extent length */ 167 { 168 int level; /* btree level number */ 169 int maxrecs; /* maximum record count at this level */ 170 xfs_mount_t *mp; /* mount structure */ 171 xfs_filblks_t rval; /* return value */ 172 173 mp = ip->i_mount; 174 maxrecs = mp->m_bmap_dmxr[0]; 175 for (level = 0, rval = 0; 176 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); 177 level++) { 178 len += maxrecs - 1; 179 do_div(len, maxrecs); 180 rval += len; 181 if (len == 1) 182 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 183 level - 1; 184 if (level == 0) 185 maxrecs = mp->m_bmap_dmxr[1]; 186 } 187 return rval; 188 } 189 190 /* 191 * Calculate the default attribute fork offset for newly created inodes. 192 */ 193 uint 194 xfs_default_attroffset( 195 struct xfs_inode *ip) 196 { 197 struct xfs_mount *mp = ip->i_mount; 198 uint offset; 199 200 if (mp->m_sb.sb_inodesize == 256) { 201 offset = XFS_LITINO(mp, ip->i_d.di_version) - 202 XFS_BMDR_SPACE_CALC(MINABTPTRS); 203 } else { 204 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); 205 } 206 207 ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version)); 208 return offset; 209 } 210 211 /* 212 * Helper routine to reset inode di_forkoff field when switching 213 * attribute fork from local to extent format - we reset it where 214 * possible to make space available for inline data fork extents. 215 */ 216 STATIC void 217 xfs_bmap_forkoff_reset( 218 xfs_inode_t *ip, 219 int whichfork) 220 { 221 if (whichfork == XFS_ATTR_FORK && 222 ip->i_d.di_format != XFS_DINODE_FMT_DEV && 223 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { 224 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; 225 226 if (dfl_forkoff > ip->i_d.di_forkoff) 227 ip->i_d.di_forkoff = dfl_forkoff; 228 } 229 } 230 231 #ifdef DEBUG 232 STATIC struct xfs_buf * 233 xfs_bmap_get_bp( 234 struct xfs_btree_cur *cur, 235 xfs_fsblock_t bno) 236 { 237 struct xfs_log_item *lip; 238 int i; 239 240 if (!cur) 241 return NULL; 242 243 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { 244 if (!cur->bc_bufs[i]) 245 break; 246 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) 247 return cur->bc_bufs[i]; 248 } 249 250 /* Chase down all the log items to see if the bp is there */ 251 list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) { 252 struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip; 253 254 if (bip->bli_item.li_type == XFS_LI_BUF && 255 XFS_BUF_ADDR(bip->bli_buf) == bno) 256 return bip->bli_buf; 257 } 258 259 return NULL; 260 } 261 262 STATIC void 263 xfs_check_block( 264 struct xfs_btree_block *block, 265 xfs_mount_t *mp, 266 int root, 267 short sz) 268 { 269 int i, j, dmxr; 270 __be64 *pp, *thispa; /* pointer to block address */ 271 xfs_bmbt_key_t *prevp, *keyp; 272 273 ASSERT(be16_to_cpu(block->bb_level) > 0); 274 275 prevp = NULL; 276 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { 277 dmxr = mp->m_bmap_dmxr[0]; 278 keyp = XFS_BMBT_KEY_ADDR(mp, block, i); 279 280 if (prevp) { 281 ASSERT(be64_to_cpu(prevp->br_startoff) < 282 be64_to_cpu(keyp->br_startoff)); 283 } 284 prevp = keyp; 285 286 /* 287 * Compare the block numbers to see if there are dups. 288 */ 289 if (root) 290 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); 291 else 292 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); 293 294 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { 295 if (root) 296 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); 297 else 298 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); 299 if (*thispa == *pp) { 300 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", 301 __func__, j, i, 302 (unsigned long long)be64_to_cpu(*thispa)); 303 xfs_err(mp, "%s: ptrs are equal in node\n", 304 __func__); 305 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 306 } 307 } 308 } 309 } 310 311 /* 312 * Check that the extents for the inode ip are in the right order in all 313 * btree leaves. THis becomes prohibitively expensive for large extent count 314 * files, so don't bother with inodes that have more than 10,000 extents in 315 * them. The btree record ordering checks will still be done, so for such large 316 * bmapbt constructs that is going to catch most corruptions. 317 */ 318 STATIC void 319 xfs_bmap_check_leaf_extents( 320 xfs_btree_cur_t *cur, /* btree cursor or null */ 321 xfs_inode_t *ip, /* incore inode pointer */ 322 int whichfork) /* data or attr fork */ 323 { 324 struct xfs_btree_block *block; /* current btree block */ 325 xfs_fsblock_t bno; /* block # of "block" */ 326 xfs_buf_t *bp; /* buffer for "block" */ 327 int error; /* error return value */ 328 xfs_extnum_t i=0, j; /* index into the extents list */ 329 xfs_ifork_t *ifp; /* fork structure */ 330 int level; /* btree level, for checking */ 331 xfs_mount_t *mp; /* file system mount structure */ 332 __be64 *pp; /* pointer to block address */ 333 xfs_bmbt_rec_t *ep; /* pointer to current extent */ 334 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ 335 xfs_bmbt_rec_t *nextp; /* pointer to next extent */ 336 int bp_release = 0; 337 338 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { 339 return; 340 } 341 342 /* skip large extent count inodes */ 343 if (ip->i_d.di_nextents > 10000) 344 return; 345 346 bno = NULLFSBLOCK; 347 mp = ip->i_mount; 348 ifp = XFS_IFORK_PTR(ip, whichfork); 349 block = ifp->if_broot; 350 /* 351 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 352 */ 353 level = be16_to_cpu(block->bb_level); 354 ASSERT(level > 0); 355 xfs_check_block(block, mp, 1, ifp->if_broot_bytes); 356 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 357 bno = be64_to_cpu(*pp); 358 359 ASSERT(bno != NULLFSBLOCK); 360 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 361 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 362 363 /* 364 * Go down the tree until leaf level is reached, following the first 365 * pointer (leftmost) at each level. 366 */ 367 while (level-- > 0) { 368 /* See if buf is in cur first */ 369 bp_release = 0; 370 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 371 if (!bp) { 372 bp_release = 1; 373 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 374 XFS_BMAP_BTREE_REF, 375 &xfs_bmbt_buf_ops); 376 if (error) 377 goto error_norelse; 378 } 379 block = XFS_BUF_TO_BLOCK(bp); 380 if (level == 0) 381 break; 382 383 /* 384 * Check this block for basic sanity (increasing keys and 385 * no duplicate blocks). 386 */ 387 388 xfs_check_block(block, mp, 0, 0); 389 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 390 bno = be64_to_cpu(*pp); 391 XFS_WANT_CORRUPTED_GOTO(mp, 392 xfs_verify_fsbno(mp, bno), error0); 393 if (bp_release) { 394 bp_release = 0; 395 xfs_trans_brelse(NULL, bp); 396 } 397 } 398 399 /* 400 * Here with bp and block set to the leftmost leaf node in the tree. 401 */ 402 i = 0; 403 404 /* 405 * Loop over all leaf nodes checking that all extents are in the right order. 406 */ 407 for (;;) { 408 xfs_fsblock_t nextbno; 409 xfs_extnum_t num_recs; 410 411 412 num_recs = xfs_btree_get_numrecs(block); 413 414 /* 415 * Read-ahead the next leaf block, if any. 416 */ 417 418 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 419 420 /* 421 * Check all the extents to make sure they are OK. 422 * If we had a previous block, the last entry should 423 * conform with the first entry in this one. 424 */ 425 426 ep = XFS_BMBT_REC_ADDR(mp, block, 1); 427 if (i) { 428 ASSERT(xfs_bmbt_disk_get_startoff(&last) + 429 xfs_bmbt_disk_get_blockcount(&last) <= 430 xfs_bmbt_disk_get_startoff(ep)); 431 } 432 for (j = 1; j < num_recs; j++) { 433 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); 434 ASSERT(xfs_bmbt_disk_get_startoff(ep) + 435 xfs_bmbt_disk_get_blockcount(ep) <= 436 xfs_bmbt_disk_get_startoff(nextp)); 437 ep = nextp; 438 } 439 440 last = *ep; 441 i += num_recs; 442 if (bp_release) { 443 bp_release = 0; 444 xfs_trans_brelse(NULL, bp); 445 } 446 bno = nextbno; 447 /* 448 * If we've reached the end, stop. 449 */ 450 if (bno == NULLFSBLOCK) 451 break; 452 453 bp_release = 0; 454 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 455 if (!bp) { 456 bp_release = 1; 457 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 458 XFS_BMAP_BTREE_REF, 459 &xfs_bmbt_buf_ops); 460 if (error) 461 goto error_norelse; 462 } 463 block = XFS_BUF_TO_BLOCK(bp); 464 } 465 466 return; 467 468 error0: 469 xfs_warn(mp, "%s: at error0", __func__); 470 if (bp_release) 471 xfs_trans_brelse(NULL, bp); 472 error_norelse: 473 xfs_warn(mp, "%s: BAD after btree leaves for %d extents", 474 __func__, i); 475 xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); 476 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 477 return; 478 } 479 480 /* 481 * Validate that the bmbt_irecs being returned from bmapi are valid 482 * given the caller's original parameters. Specifically check the 483 * ranges of the returned irecs to ensure that they only extend beyond 484 * the given parameters if the XFS_BMAPI_ENTIRE flag was set. 485 */ 486 STATIC void 487 xfs_bmap_validate_ret( 488 xfs_fileoff_t bno, 489 xfs_filblks_t len, 490 int flags, 491 xfs_bmbt_irec_t *mval, 492 int nmap, 493 int ret_nmap) 494 { 495 int i; /* index to map values */ 496 497 ASSERT(ret_nmap <= nmap); 498 499 for (i = 0; i < ret_nmap; i++) { 500 ASSERT(mval[i].br_blockcount > 0); 501 if (!(flags & XFS_BMAPI_ENTIRE)) { 502 ASSERT(mval[i].br_startoff >= bno); 503 ASSERT(mval[i].br_blockcount <= len); 504 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= 505 bno + len); 506 } else { 507 ASSERT(mval[i].br_startoff < bno + len); 508 ASSERT(mval[i].br_startoff + mval[i].br_blockcount > 509 bno); 510 } 511 ASSERT(i == 0 || 512 mval[i - 1].br_startoff + mval[i - 1].br_blockcount == 513 mval[i].br_startoff); 514 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && 515 mval[i].br_startblock != HOLESTARTBLOCK); 516 ASSERT(mval[i].br_state == XFS_EXT_NORM || 517 mval[i].br_state == XFS_EXT_UNWRITTEN); 518 } 519 } 520 521 #else 522 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) 523 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0) 524 #endif /* DEBUG */ 525 526 /* 527 * bmap free list manipulation functions 528 */ 529 530 /* 531 * Add the extent to the list of extents to be free at transaction end. 532 * The list is maintained sorted (by block number). 533 */ 534 void 535 __xfs_bmap_add_free( 536 struct xfs_mount *mp, 537 struct xfs_defer_ops *dfops, 538 xfs_fsblock_t bno, 539 xfs_filblks_t len, 540 struct xfs_owner_info *oinfo, 541 bool skip_discard) 542 { 543 struct xfs_extent_free_item *new; /* new element */ 544 #ifdef DEBUG 545 xfs_agnumber_t agno; 546 xfs_agblock_t agbno; 547 548 ASSERT(bno != NULLFSBLOCK); 549 ASSERT(len > 0); 550 ASSERT(len <= MAXEXTLEN); 551 ASSERT(!isnullstartblock(bno)); 552 agno = XFS_FSB_TO_AGNO(mp, bno); 553 agbno = XFS_FSB_TO_AGBNO(mp, bno); 554 ASSERT(agno < mp->m_sb.sb_agcount); 555 ASSERT(agbno < mp->m_sb.sb_agblocks); 556 ASSERT(len < mp->m_sb.sb_agblocks); 557 ASSERT(agbno + len <= mp->m_sb.sb_agblocks); 558 #endif 559 ASSERT(xfs_bmap_free_item_zone != NULL); 560 561 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); 562 new->xefi_startblock = bno; 563 new->xefi_blockcount = (xfs_extlen_t)len; 564 if (oinfo) 565 new->xefi_oinfo = *oinfo; 566 else 567 xfs_rmap_skip_owner_update(&new->xefi_oinfo); 568 new->xefi_skip_discard = skip_discard; 569 trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0, 570 XFS_FSB_TO_AGBNO(mp, bno), len); 571 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); 572 } 573 574 /* 575 * Inode fork format manipulation functions 576 */ 577 578 /* 579 * Transform a btree format file with only one leaf node, where the 580 * extents list will fit in the inode, into an extents format file. 581 * Since the file extents are already in-core, all we have to do is 582 * give up the space for the btree root and pitch the leaf block. 583 */ 584 STATIC int /* error */ 585 xfs_bmap_btree_to_extents( 586 xfs_trans_t *tp, /* transaction pointer */ 587 xfs_inode_t *ip, /* incore inode pointer */ 588 xfs_btree_cur_t *cur, /* btree cursor */ 589 int *logflagsp, /* inode logging flags */ 590 int whichfork) /* data or attr fork */ 591 { 592 /* REFERENCED */ 593 struct xfs_btree_block *cblock;/* child btree block */ 594 xfs_fsblock_t cbno; /* child block number */ 595 xfs_buf_t *cbp; /* child block's buffer */ 596 int error; /* error return value */ 597 xfs_ifork_t *ifp; /* inode fork data */ 598 xfs_mount_t *mp; /* mount point structure */ 599 __be64 *pp; /* ptr to block address */ 600 struct xfs_btree_block *rblock;/* root btree block */ 601 struct xfs_owner_info oinfo; 602 603 mp = ip->i_mount; 604 ifp = XFS_IFORK_PTR(ip, whichfork); 605 ASSERT(whichfork != XFS_COW_FORK); 606 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 607 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 608 rblock = ifp->if_broot; 609 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 610 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); 611 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); 612 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); 613 cbno = be64_to_cpu(*pp); 614 *logflagsp = 0; 615 #ifdef DEBUG 616 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 617 xfs_btree_check_lptr(cur, cbno, 1)); 618 #endif 619 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, 620 &xfs_bmbt_buf_ops); 621 if (error) 622 return error; 623 cblock = XFS_BUF_TO_BLOCK(cbp); 624 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) 625 return error; 626 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 627 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo); 628 ip->i_d.di_nblocks--; 629 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 630 xfs_trans_binval(tp, cbp); 631 if (cur->bc_bufs[0] == cbp) 632 cur->bc_bufs[0] = NULL; 633 xfs_iroot_realloc(ip, -1, whichfork); 634 ASSERT(ifp->if_broot == NULL); 635 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); 636 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 637 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 638 return 0; 639 } 640 641 /* 642 * Convert an extents-format file into a btree-format file. 643 * The new file will have a root block (in the inode) and a single child block. 644 */ 645 STATIC int /* error */ 646 xfs_bmap_extents_to_btree( 647 xfs_trans_t *tp, /* transaction pointer */ 648 xfs_inode_t *ip, /* incore inode pointer */ 649 xfs_fsblock_t *firstblock, /* first-block-allocated */ 650 struct xfs_defer_ops *dfops, /* blocks freed in xaction */ 651 xfs_btree_cur_t **curp, /* cursor returned to caller */ 652 int wasdel, /* converting a delayed alloc */ 653 int *logflagsp, /* inode logging flags */ 654 int whichfork) /* data or attr fork */ 655 { 656 struct xfs_btree_block *ablock; /* allocated (child) bt block */ 657 xfs_buf_t *abp; /* buffer for ablock */ 658 xfs_alloc_arg_t args; /* allocation arguments */ 659 xfs_bmbt_rec_t *arp; /* child record pointer */ 660 struct xfs_btree_block *block; /* btree root block */ 661 xfs_btree_cur_t *cur; /* bmap btree cursor */ 662 int error; /* error return value */ 663 xfs_ifork_t *ifp; /* inode fork pointer */ 664 xfs_bmbt_key_t *kp; /* root block key pointer */ 665 xfs_mount_t *mp; /* mount structure */ 666 xfs_bmbt_ptr_t *pp; /* root block address pointer */ 667 struct xfs_iext_cursor icur; 668 struct xfs_bmbt_irec rec; 669 xfs_extnum_t cnt = 0; 670 671 mp = ip->i_mount; 672 ASSERT(whichfork != XFS_COW_FORK); 673 ifp = XFS_IFORK_PTR(ip, whichfork); 674 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); 675 676 /* 677 * Make space in the inode incore. 678 */ 679 xfs_iroot_realloc(ip, 1, whichfork); 680 ifp->if_flags |= XFS_IFBROOT; 681 682 /* 683 * Fill in the root. 684 */ 685 block = ifp->if_broot; 686 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, 687 XFS_BTNUM_BMAP, 1, 1, ip->i_ino, 688 XFS_BTREE_LONG_PTRS); 689 /* 690 * Need a cursor. Can't allocate until bb_level is filled in. 691 */ 692 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 693 cur->bc_private.b.firstblock = *firstblock; 694 cur->bc_private.b.dfops = dfops; 695 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 696 /* 697 * Convert to a btree with two levels, one record in root. 698 */ 699 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 700 memset(&args, 0, sizeof(args)); 701 args.tp = tp; 702 args.mp = mp; 703 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); 704 args.firstblock = *firstblock; 705 if (*firstblock == NULLFSBLOCK) { 706 args.type = XFS_ALLOCTYPE_START_BNO; 707 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 708 } else if (dfops->dop_low) { 709 args.type = XFS_ALLOCTYPE_START_BNO; 710 args.fsbno = *firstblock; 711 } else { 712 args.type = XFS_ALLOCTYPE_NEAR_BNO; 713 args.fsbno = *firstblock; 714 } 715 args.minlen = args.maxlen = args.prod = 1; 716 args.wasdel = wasdel; 717 *logflagsp = 0; 718 if ((error = xfs_alloc_vextent(&args))) { 719 xfs_iroot_realloc(ip, -1, whichfork); 720 ASSERT(ifp->if_broot == NULL); 721 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 722 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 723 return error; 724 } 725 726 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { 727 xfs_iroot_realloc(ip, -1, whichfork); 728 ASSERT(ifp->if_broot == NULL); 729 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 730 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 731 return -ENOSPC; 732 } 733 /* 734 * Allocation can't fail, the space was reserved. 735 */ 736 ASSERT(*firstblock == NULLFSBLOCK || 737 args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); 738 *firstblock = cur->bc_private.b.firstblock = args.fsbno; 739 cur->bc_private.b.allocated++; 740 ip->i_d.di_nblocks++; 741 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 742 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); 743 /* 744 * Fill in the child block. 745 */ 746 abp->b_ops = &xfs_bmbt_buf_ops; 747 ablock = XFS_BUF_TO_BLOCK(abp); 748 xfs_btree_init_block_int(mp, ablock, abp->b_bn, 749 XFS_BTNUM_BMAP, 0, 0, ip->i_ino, 750 XFS_BTREE_LONG_PTRS); 751 752 for_each_xfs_iext(ifp, &icur, &rec) { 753 if (isnullstartblock(rec.br_startblock)) 754 continue; 755 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt); 756 xfs_bmbt_disk_set_all(arp, &rec); 757 cnt++; 758 } 759 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); 760 xfs_btree_set_numrecs(ablock, cnt); 761 762 /* 763 * Fill in the root key and pointer. 764 */ 765 kp = XFS_BMBT_KEY_ADDR(mp, block, 1); 766 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); 767 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); 768 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, 769 be16_to_cpu(block->bb_level))); 770 *pp = cpu_to_be64(args.fsbno); 771 772 /* 773 * Do all this logging at the end so that 774 * the root is at the right level. 775 */ 776 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); 777 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); 778 ASSERT(*curp == NULL); 779 *curp = cur; 780 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); 781 return 0; 782 } 783 784 /* 785 * Convert a local file to an extents file. 786 * This code is out of bounds for data forks of regular files, 787 * since the file data needs to get logged so things will stay consistent. 788 * (The bmap-level manipulations are ok, though). 789 */ 790 void 791 xfs_bmap_local_to_extents_empty( 792 struct xfs_inode *ip, 793 int whichfork) 794 { 795 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 796 797 ASSERT(whichfork != XFS_COW_FORK); 798 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 799 ASSERT(ifp->if_bytes == 0); 800 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); 801 802 xfs_bmap_forkoff_reset(ip, whichfork); 803 ifp->if_flags &= ~XFS_IFINLINE; 804 ifp->if_flags |= XFS_IFEXTENTS; 805 ifp->if_u1.if_root = NULL; 806 ifp->if_height = 0; 807 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 808 } 809 810 811 STATIC int /* error */ 812 xfs_bmap_local_to_extents( 813 xfs_trans_t *tp, /* transaction pointer */ 814 xfs_inode_t *ip, /* incore inode pointer */ 815 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 816 xfs_extlen_t total, /* total blocks needed by transaction */ 817 int *logflagsp, /* inode logging flags */ 818 int whichfork, 819 void (*init_fn)(struct xfs_trans *tp, 820 struct xfs_buf *bp, 821 struct xfs_inode *ip, 822 struct xfs_ifork *ifp)) 823 { 824 int error = 0; 825 int flags; /* logging flags returned */ 826 xfs_ifork_t *ifp; /* inode fork pointer */ 827 xfs_alloc_arg_t args; /* allocation arguments */ 828 xfs_buf_t *bp; /* buffer for extent block */ 829 struct xfs_bmbt_irec rec; 830 struct xfs_iext_cursor icur; 831 832 /* 833 * We don't want to deal with the case of keeping inode data inline yet. 834 * So sending the data fork of a regular inode is invalid. 835 */ 836 ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK)); 837 ifp = XFS_IFORK_PTR(ip, whichfork); 838 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 839 840 if (!ifp->if_bytes) { 841 xfs_bmap_local_to_extents_empty(ip, whichfork); 842 flags = XFS_ILOG_CORE; 843 goto done; 844 } 845 846 flags = 0; 847 error = 0; 848 ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE); 849 memset(&args, 0, sizeof(args)); 850 args.tp = tp; 851 args.mp = ip->i_mount; 852 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); 853 args.firstblock = *firstblock; 854 /* 855 * Allocate a block. We know we need only one, since the 856 * file currently fits in an inode. 857 */ 858 if (*firstblock == NULLFSBLOCK) { 859 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); 860 args.type = XFS_ALLOCTYPE_START_BNO; 861 } else { 862 args.fsbno = *firstblock; 863 args.type = XFS_ALLOCTYPE_NEAR_BNO; 864 } 865 args.total = total; 866 args.minlen = args.maxlen = args.prod = 1; 867 error = xfs_alloc_vextent(&args); 868 if (error) 869 goto done; 870 871 /* Can't fail, the space was reserved. */ 872 ASSERT(args.fsbno != NULLFSBLOCK); 873 ASSERT(args.len == 1); 874 *firstblock = args.fsbno; 875 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 876 877 /* 878 * Initialize the block, copy the data and log the remote buffer. 879 * 880 * The callout is responsible for logging because the remote format 881 * might differ from the local format and thus we don't know how much to 882 * log here. Note that init_fn must also set the buffer log item type 883 * correctly. 884 */ 885 init_fn(tp, bp, ip, ifp); 886 887 /* account for the change in fork size */ 888 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 889 xfs_bmap_local_to_extents_empty(ip, whichfork); 890 flags |= XFS_ILOG_CORE; 891 892 ifp->if_u1.if_root = NULL; 893 ifp->if_height = 0; 894 895 rec.br_startoff = 0; 896 rec.br_startblock = args.fsbno; 897 rec.br_blockcount = 1; 898 rec.br_state = XFS_EXT_NORM; 899 xfs_iext_first(ifp, &icur); 900 xfs_iext_insert(ip, &icur, &rec, 0); 901 902 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 903 ip->i_d.di_nblocks = 1; 904 xfs_trans_mod_dquot_byino(tp, ip, 905 XFS_TRANS_DQ_BCOUNT, 1L); 906 flags |= xfs_ilog_fext(whichfork); 907 908 done: 909 *logflagsp = flags; 910 return error; 911 } 912 913 /* 914 * Called from xfs_bmap_add_attrfork to handle btree format files. 915 */ 916 STATIC int /* error */ 917 xfs_bmap_add_attrfork_btree( 918 xfs_trans_t *tp, /* transaction pointer */ 919 xfs_inode_t *ip, /* incore inode pointer */ 920 xfs_fsblock_t *firstblock, /* first block allocated */ 921 struct xfs_defer_ops *dfops, /* blocks to free at commit */ 922 int *flags) /* inode logging flags */ 923 { 924 xfs_btree_cur_t *cur; /* btree cursor */ 925 int error; /* error return value */ 926 xfs_mount_t *mp; /* file system mount struct */ 927 int stat; /* newroot status */ 928 929 mp = ip->i_mount; 930 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) 931 *flags |= XFS_ILOG_DBROOT; 932 else { 933 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 934 cur->bc_private.b.dfops = dfops; 935 cur->bc_private.b.firstblock = *firstblock; 936 error = xfs_bmbt_lookup_first(cur, &stat); 937 if (error) 938 goto error0; 939 /* must be at least one entry */ 940 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0); 941 if ((error = xfs_btree_new_iroot(cur, flags, &stat))) 942 goto error0; 943 if (stat == 0) { 944 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 945 return -ENOSPC; 946 } 947 *firstblock = cur->bc_private.b.firstblock; 948 cur->bc_private.b.allocated = 0; 949 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 950 } 951 return 0; 952 error0: 953 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 954 return error; 955 } 956 957 /* 958 * Called from xfs_bmap_add_attrfork to handle extents format files. 959 */ 960 STATIC int /* error */ 961 xfs_bmap_add_attrfork_extents( 962 xfs_trans_t *tp, /* transaction pointer */ 963 xfs_inode_t *ip, /* incore inode pointer */ 964 xfs_fsblock_t *firstblock, /* first block allocated */ 965 struct xfs_defer_ops *dfops, /* blocks to free at commit */ 966 int *flags) /* inode logging flags */ 967 { 968 xfs_btree_cur_t *cur; /* bmap btree cursor */ 969 int error; /* error return value */ 970 971 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) 972 return 0; 973 cur = NULL; 974 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0, 975 flags, XFS_DATA_FORK); 976 if (cur) { 977 cur->bc_private.b.allocated = 0; 978 xfs_btree_del_cursor(cur, 979 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 980 } 981 return error; 982 } 983 984 /* 985 * Called from xfs_bmap_add_attrfork to handle local format files. Each 986 * different data fork content type needs a different callout to do the 987 * conversion. Some are basic and only require special block initialisation 988 * callouts for the data formating, others (directories) are so specialised they 989 * handle everything themselves. 990 * 991 * XXX (dgc): investigate whether directory conversion can use the generic 992 * formatting callout. It should be possible - it's just a very complex 993 * formatter. 994 */ 995 STATIC int /* error */ 996 xfs_bmap_add_attrfork_local( 997 xfs_trans_t *tp, /* transaction pointer */ 998 xfs_inode_t *ip, /* incore inode pointer */ 999 xfs_fsblock_t *firstblock, /* first block allocated */ 1000 struct xfs_defer_ops *dfops, /* blocks to free at commit */ 1001 int *flags) /* inode logging flags */ 1002 { 1003 xfs_da_args_t dargs; /* args for dir/attr code */ 1004 1005 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) 1006 return 0; 1007 1008 if (S_ISDIR(VFS_I(ip)->i_mode)) { 1009 memset(&dargs, 0, sizeof(dargs)); 1010 dargs.geo = ip->i_mount->m_dir_geo; 1011 dargs.dp = ip; 1012 dargs.firstblock = firstblock; 1013 dargs.dfops = dfops; 1014 dargs.total = dargs.geo->fsbcount; 1015 dargs.whichfork = XFS_DATA_FORK; 1016 dargs.trans = tp; 1017 return xfs_dir2_sf_to_block(&dargs); 1018 } 1019 1020 if (S_ISLNK(VFS_I(ip)->i_mode)) 1021 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, 1022 flags, XFS_DATA_FORK, 1023 xfs_symlink_local_to_remote); 1024 1025 /* should only be called for types that support local format data */ 1026 ASSERT(0); 1027 return -EFSCORRUPTED; 1028 } 1029 1030 /* 1031 * Convert inode from non-attributed to attributed. 1032 * Must not be in a transaction, ip must not be locked. 1033 */ 1034 int /* error code */ 1035 xfs_bmap_add_attrfork( 1036 xfs_inode_t *ip, /* incore inode pointer */ 1037 int size, /* space new attribute needs */ 1038 int rsvd) /* xact may use reserved blks */ 1039 { 1040 xfs_fsblock_t firstblock; /* 1st block/ag allocated */ 1041 struct xfs_defer_ops dfops; /* freed extent records */ 1042 xfs_mount_t *mp; /* mount structure */ 1043 xfs_trans_t *tp; /* transaction pointer */ 1044 int blks; /* space reservation */ 1045 int version = 1; /* superblock attr version */ 1046 int logflags; /* logging flags */ 1047 int error; /* error return value */ 1048 1049 ASSERT(XFS_IFORK_Q(ip) == 0); 1050 1051 mp = ip->i_mount; 1052 ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 1053 1054 blks = XFS_ADDAFORK_SPACE_RES(mp); 1055 1056 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0, 1057 rsvd ? XFS_TRANS_RESERVE : 0, &tp); 1058 if (error) 1059 return error; 1060 1061 xfs_ilock(ip, XFS_ILOCK_EXCL); 1062 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? 1063 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : 1064 XFS_QMOPT_RES_REGBLKS); 1065 if (error) 1066 goto trans_cancel; 1067 if (XFS_IFORK_Q(ip)) 1068 goto trans_cancel; 1069 if (ip->i_d.di_anextents != 0) { 1070 error = -EFSCORRUPTED; 1071 goto trans_cancel; 1072 } 1073 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { 1074 /* 1075 * For inodes coming from pre-6.2 filesystems. 1076 */ 1077 ASSERT(ip->i_d.di_aformat == 0); 1078 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 1079 } 1080 1081 xfs_trans_ijoin(tp, ip, 0); 1082 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1083 1084 switch (ip->i_d.di_format) { 1085 case XFS_DINODE_FMT_DEV: 1086 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 1087 break; 1088 case XFS_DINODE_FMT_LOCAL: 1089 case XFS_DINODE_FMT_EXTENTS: 1090 case XFS_DINODE_FMT_BTREE: 1091 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); 1092 if (!ip->i_d.di_forkoff) 1093 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; 1094 else if (mp->m_flags & XFS_MOUNT_ATTR2) 1095 version = 2; 1096 break; 1097 default: 1098 ASSERT(0); 1099 error = -EINVAL; 1100 goto trans_cancel; 1101 } 1102 1103 ASSERT(ip->i_afp == NULL); 1104 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 1105 ip->i_afp->if_flags = XFS_IFEXTENTS; 1106 logflags = 0; 1107 xfs_defer_init(&dfops, &firstblock); 1108 switch (ip->i_d.di_format) { 1109 case XFS_DINODE_FMT_LOCAL: 1110 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops, 1111 &logflags); 1112 break; 1113 case XFS_DINODE_FMT_EXTENTS: 1114 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, 1115 &dfops, &logflags); 1116 break; 1117 case XFS_DINODE_FMT_BTREE: 1118 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops, 1119 &logflags); 1120 break; 1121 default: 1122 error = 0; 1123 break; 1124 } 1125 if (logflags) 1126 xfs_trans_log_inode(tp, ip, logflags); 1127 if (error) 1128 goto bmap_cancel; 1129 if (!xfs_sb_version_hasattr(&mp->m_sb) || 1130 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { 1131 bool log_sb = false; 1132 1133 spin_lock(&mp->m_sb_lock); 1134 if (!xfs_sb_version_hasattr(&mp->m_sb)) { 1135 xfs_sb_version_addattr(&mp->m_sb); 1136 log_sb = true; 1137 } 1138 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { 1139 xfs_sb_version_addattr2(&mp->m_sb); 1140 log_sb = true; 1141 } 1142 spin_unlock(&mp->m_sb_lock); 1143 if (log_sb) 1144 xfs_log_sb(tp); 1145 } 1146 1147 error = xfs_defer_finish(&tp, &dfops); 1148 if (error) 1149 goto bmap_cancel; 1150 error = xfs_trans_commit(tp); 1151 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1152 return error; 1153 1154 bmap_cancel: 1155 xfs_defer_cancel(&dfops); 1156 trans_cancel: 1157 xfs_trans_cancel(tp); 1158 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1159 return error; 1160 } 1161 1162 /* 1163 * Internal and external extent tree search functions. 1164 */ 1165 1166 /* 1167 * Read in extents from a btree-format inode. 1168 */ 1169 int 1170 xfs_iread_extents( 1171 struct xfs_trans *tp, 1172 struct xfs_inode *ip, 1173 int whichfork) 1174 { 1175 struct xfs_mount *mp = ip->i_mount; 1176 int state = xfs_bmap_fork_to_state(whichfork); 1177 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1178 xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 1179 struct xfs_btree_block *block = ifp->if_broot; 1180 struct xfs_iext_cursor icur; 1181 struct xfs_bmbt_irec new; 1182 xfs_fsblock_t bno; 1183 struct xfs_buf *bp; 1184 xfs_extnum_t i, j; 1185 int level; 1186 __be64 *pp; 1187 int error; 1188 1189 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1190 1191 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 1192 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 1193 return -EFSCORRUPTED; 1194 } 1195 1196 /* 1197 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 1198 */ 1199 level = be16_to_cpu(block->bb_level); 1200 ASSERT(level > 0); 1201 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 1202 bno = be64_to_cpu(*pp); 1203 1204 /* 1205 * Go down the tree until leaf level is reached, following the first 1206 * pointer (leftmost) at each level. 1207 */ 1208 while (level-- > 0) { 1209 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1210 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1211 if (error) 1212 goto out; 1213 block = XFS_BUF_TO_BLOCK(bp); 1214 if (level == 0) 1215 break; 1216 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1217 bno = be64_to_cpu(*pp); 1218 XFS_WANT_CORRUPTED_GOTO(mp, 1219 xfs_verify_fsbno(mp, bno), out_brelse); 1220 xfs_trans_brelse(tp, bp); 1221 } 1222 1223 /* 1224 * Here with bp and block set to the leftmost leaf node in the tree. 1225 */ 1226 i = 0; 1227 xfs_iext_first(ifp, &icur); 1228 1229 /* 1230 * Loop over all leaf nodes. Copy information to the extent records. 1231 */ 1232 for (;;) { 1233 xfs_bmbt_rec_t *frp; 1234 xfs_fsblock_t nextbno; 1235 xfs_extnum_t num_recs; 1236 1237 num_recs = xfs_btree_get_numrecs(block); 1238 if (unlikely(i + num_recs > nextents)) { 1239 xfs_warn(ip->i_mount, 1240 "corrupt dinode %Lu, (btree extents).", 1241 (unsigned long long) ip->i_ino); 1242 xfs_inode_verifier_error(ip, -EFSCORRUPTED, 1243 __func__, block, sizeof(*block), 1244 __this_address); 1245 error = -EFSCORRUPTED; 1246 goto out_brelse; 1247 } 1248 /* 1249 * Read-ahead the next leaf block, if any. 1250 */ 1251 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 1252 if (nextbno != NULLFSBLOCK) 1253 xfs_btree_reada_bufl(mp, nextbno, 1, 1254 &xfs_bmbt_buf_ops); 1255 /* 1256 * Copy records into the extent records. 1257 */ 1258 frp = XFS_BMBT_REC_ADDR(mp, block, 1); 1259 for (j = 0; j < num_recs; j++, frp++, i++) { 1260 xfs_failaddr_t fa; 1261 1262 xfs_bmbt_disk_get_all(frp, &new); 1263 fa = xfs_bmap_validate_extent(ip, whichfork, &new); 1264 if (fa) { 1265 error = -EFSCORRUPTED; 1266 xfs_inode_verifier_error(ip, error, 1267 "xfs_iread_extents(2)", 1268 frp, sizeof(*frp), fa); 1269 goto out_brelse; 1270 } 1271 xfs_iext_insert(ip, &icur, &new, state); 1272 trace_xfs_read_extent(ip, &icur, state, _THIS_IP_); 1273 xfs_iext_next(ifp, &icur); 1274 } 1275 xfs_trans_brelse(tp, bp); 1276 bno = nextbno; 1277 /* 1278 * If we've reached the end, stop. 1279 */ 1280 if (bno == NULLFSBLOCK) 1281 break; 1282 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1283 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1284 if (error) 1285 goto out; 1286 block = XFS_BUF_TO_BLOCK(bp); 1287 } 1288 1289 if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) { 1290 error = -EFSCORRUPTED; 1291 goto out; 1292 } 1293 ASSERT(i == xfs_iext_count(ifp)); 1294 1295 ifp->if_flags |= XFS_IFEXTENTS; 1296 return 0; 1297 1298 out_brelse: 1299 xfs_trans_brelse(tp, bp); 1300 out: 1301 xfs_iext_destroy(ifp); 1302 return error; 1303 } 1304 1305 /* 1306 * Returns the relative block number of the first unused block(s) in the given 1307 * fork with at least "len" logically contiguous blocks free. This is the 1308 * lowest-address hole if the fork has holes, else the first block past the end 1309 * of fork. Return 0 if the fork is currently local (in-inode). 1310 */ 1311 int /* error */ 1312 xfs_bmap_first_unused( 1313 struct xfs_trans *tp, /* transaction pointer */ 1314 struct xfs_inode *ip, /* incore inode */ 1315 xfs_extlen_t len, /* size of hole to find */ 1316 xfs_fileoff_t *first_unused, /* unused block */ 1317 int whichfork) /* data or attr fork */ 1318 { 1319 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1320 struct xfs_bmbt_irec got; 1321 struct xfs_iext_cursor icur; 1322 xfs_fileoff_t lastaddr = 0; 1323 xfs_fileoff_t lowest, max; 1324 int error; 1325 1326 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || 1327 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || 1328 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 1329 1330 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 1331 *first_unused = 0; 1332 return 0; 1333 } 1334 1335 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1336 error = xfs_iread_extents(tp, ip, whichfork); 1337 if (error) 1338 return error; 1339 } 1340 1341 lowest = max = *first_unused; 1342 for_each_xfs_iext(ifp, &icur, &got) { 1343 /* 1344 * See if the hole before this extent will work. 1345 */ 1346 if (got.br_startoff >= lowest + len && 1347 got.br_startoff - max >= len) 1348 break; 1349 lastaddr = got.br_startoff + got.br_blockcount; 1350 max = XFS_FILEOFF_MAX(lastaddr, lowest); 1351 } 1352 1353 *first_unused = max; 1354 return 0; 1355 } 1356 1357 /* 1358 * Returns the file-relative block number of the last block - 1 before 1359 * last_block (input value) in the file. 1360 * This is not based on i_size, it is based on the extent records. 1361 * Returns 0 for local files, as they do not have extent records. 1362 */ 1363 int /* error */ 1364 xfs_bmap_last_before( 1365 struct xfs_trans *tp, /* transaction pointer */ 1366 struct xfs_inode *ip, /* incore inode */ 1367 xfs_fileoff_t *last_block, /* last block */ 1368 int whichfork) /* data or attr fork */ 1369 { 1370 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1371 struct xfs_bmbt_irec got; 1372 struct xfs_iext_cursor icur; 1373 int error; 1374 1375 switch (XFS_IFORK_FORMAT(ip, whichfork)) { 1376 case XFS_DINODE_FMT_LOCAL: 1377 *last_block = 0; 1378 return 0; 1379 case XFS_DINODE_FMT_BTREE: 1380 case XFS_DINODE_FMT_EXTENTS: 1381 break; 1382 default: 1383 return -EIO; 1384 } 1385 1386 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1387 error = xfs_iread_extents(tp, ip, whichfork); 1388 if (error) 1389 return error; 1390 } 1391 1392 if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got)) 1393 *last_block = 0; 1394 return 0; 1395 } 1396 1397 int 1398 xfs_bmap_last_extent( 1399 struct xfs_trans *tp, 1400 struct xfs_inode *ip, 1401 int whichfork, 1402 struct xfs_bmbt_irec *rec, 1403 int *is_empty) 1404 { 1405 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1406 struct xfs_iext_cursor icur; 1407 int error; 1408 1409 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1410 error = xfs_iread_extents(tp, ip, whichfork); 1411 if (error) 1412 return error; 1413 } 1414 1415 xfs_iext_last(ifp, &icur); 1416 if (!xfs_iext_get_extent(ifp, &icur, rec)) 1417 *is_empty = 1; 1418 else 1419 *is_empty = 0; 1420 return 0; 1421 } 1422 1423 /* 1424 * Check the last inode extent to determine whether this allocation will result 1425 * in blocks being allocated at the end of the file. When we allocate new data 1426 * blocks at the end of the file which do not start at the previous data block, 1427 * we will try to align the new blocks at stripe unit boundaries. 1428 * 1429 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be 1430 * at, or past the EOF. 1431 */ 1432 STATIC int 1433 xfs_bmap_isaeof( 1434 struct xfs_bmalloca *bma, 1435 int whichfork) 1436 { 1437 struct xfs_bmbt_irec rec; 1438 int is_empty; 1439 int error; 1440 1441 bma->aeof = false; 1442 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, 1443 &is_empty); 1444 if (error) 1445 return error; 1446 1447 if (is_empty) { 1448 bma->aeof = true; 1449 return 0; 1450 } 1451 1452 /* 1453 * Check if we are allocation or past the last extent, or at least into 1454 * the last delayed allocated extent. 1455 */ 1456 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || 1457 (bma->offset >= rec.br_startoff && 1458 isnullstartblock(rec.br_startblock)); 1459 return 0; 1460 } 1461 1462 /* 1463 * Returns the file-relative block number of the first block past eof in 1464 * the file. This is not based on i_size, it is based on the extent records. 1465 * Returns 0 for local files, as they do not have extent records. 1466 */ 1467 int 1468 xfs_bmap_last_offset( 1469 struct xfs_inode *ip, 1470 xfs_fileoff_t *last_block, 1471 int whichfork) 1472 { 1473 struct xfs_bmbt_irec rec; 1474 int is_empty; 1475 int error; 1476 1477 *last_block = 0; 1478 1479 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) 1480 return 0; 1481 1482 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && 1483 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 1484 return -EIO; 1485 1486 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); 1487 if (error || is_empty) 1488 return error; 1489 1490 *last_block = rec.br_startoff + rec.br_blockcount; 1491 return 0; 1492 } 1493 1494 /* 1495 * Returns whether the selected fork of the inode has exactly one 1496 * block or not. For the data fork we check this matches di_size, 1497 * implying the file's range is 0..bsize-1. 1498 */ 1499 int /* 1=>1 block, 0=>otherwise */ 1500 xfs_bmap_one_block( 1501 xfs_inode_t *ip, /* incore inode */ 1502 int whichfork) /* data or attr fork */ 1503 { 1504 xfs_ifork_t *ifp; /* inode fork pointer */ 1505 int rval; /* return value */ 1506 xfs_bmbt_irec_t s; /* internal version of extent */ 1507 struct xfs_iext_cursor icur; 1508 1509 #ifndef DEBUG 1510 if (whichfork == XFS_DATA_FORK) 1511 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; 1512 #endif /* !DEBUG */ 1513 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) 1514 return 0; 1515 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 1516 return 0; 1517 ifp = XFS_IFORK_PTR(ip, whichfork); 1518 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 1519 xfs_iext_first(ifp, &icur); 1520 xfs_iext_get_extent(ifp, &icur, &s); 1521 rval = s.br_startoff == 0 && s.br_blockcount == 1; 1522 if (rval && whichfork == XFS_DATA_FORK) 1523 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); 1524 return rval; 1525 } 1526 1527 /* 1528 * Extent tree manipulation functions used during allocation. 1529 */ 1530 1531 /* 1532 * Convert a delayed allocation to a real allocation. 1533 */ 1534 STATIC int /* error */ 1535 xfs_bmap_add_extent_delay_real( 1536 struct xfs_bmalloca *bma, 1537 int whichfork) 1538 { 1539 struct xfs_bmbt_irec *new = &bma->got; 1540 int error; /* error return value */ 1541 int i; /* temp state */ 1542 xfs_ifork_t *ifp; /* inode fork pointer */ 1543 xfs_fileoff_t new_endoff; /* end offset of new entry */ 1544 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 1545 /* left is 0, right is 1, prev is 2 */ 1546 int rval=0; /* return value (logging flags) */ 1547 int state = xfs_bmap_fork_to_state(whichfork); 1548 xfs_filblks_t da_new; /* new count del alloc blocks used */ 1549 xfs_filblks_t da_old; /* old count del alloc blocks used */ 1550 xfs_filblks_t temp=0; /* value for da_new calculations */ 1551 int tmp_rval; /* partial logging flags */ 1552 struct xfs_mount *mp; 1553 xfs_extnum_t *nextents; 1554 struct xfs_bmbt_irec old; 1555 1556 mp = bma->ip->i_mount; 1557 ifp = XFS_IFORK_PTR(bma->ip, whichfork); 1558 ASSERT(whichfork != XFS_ATTR_FORK); 1559 nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents : 1560 &bma->ip->i_d.di_nextents); 1561 1562 ASSERT(!isnullstartblock(new->br_startblock)); 1563 ASSERT(!bma->cur || 1564 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 1565 1566 XFS_STATS_INC(mp, xs_add_exlist); 1567 1568 #define LEFT r[0] 1569 #define RIGHT r[1] 1570 #define PREV r[2] 1571 1572 /* 1573 * Set up a bunch of variables to make the tests simpler. 1574 */ 1575 xfs_iext_get_extent(ifp, &bma->icur, &PREV); 1576 new_endoff = new->br_startoff + new->br_blockcount; 1577 ASSERT(isnullstartblock(PREV.br_startblock)); 1578 ASSERT(PREV.br_startoff <= new->br_startoff); 1579 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1580 1581 da_old = startblockval(PREV.br_startblock); 1582 da_new = 0; 1583 1584 /* 1585 * Set flags determining what part of the previous delayed allocation 1586 * extent is being replaced by a real allocation. 1587 */ 1588 if (PREV.br_startoff == new->br_startoff) 1589 state |= BMAP_LEFT_FILLING; 1590 if (PREV.br_startoff + PREV.br_blockcount == new_endoff) 1591 state |= BMAP_RIGHT_FILLING; 1592 1593 /* 1594 * Check and set flags if this segment has a left neighbor. 1595 * Don't set contiguous if the combined extent would be too large. 1596 */ 1597 if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) { 1598 state |= BMAP_LEFT_VALID; 1599 if (isnullstartblock(LEFT.br_startblock)) 1600 state |= BMAP_LEFT_DELAY; 1601 } 1602 1603 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 1604 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 1605 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 1606 LEFT.br_state == new->br_state && 1607 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) 1608 state |= BMAP_LEFT_CONTIG; 1609 1610 /* 1611 * Check and set flags if this segment has a right neighbor. 1612 * Don't set contiguous if the combined extent would be too large. 1613 * Also check for all-three-contiguous being too large. 1614 */ 1615 if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) { 1616 state |= BMAP_RIGHT_VALID; 1617 if (isnullstartblock(RIGHT.br_startblock)) 1618 state |= BMAP_RIGHT_DELAY; 1619 } 1620 1621 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 1622 new_endoff == RIGHT.br_startoff && 1623 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 1624 new->br_state == RIGHT.br_state && 1625 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 1626 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 1627 BMAP_RIGHT_FILLING)) != 1628 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 1629 BMAP_RIGHT_FILLING) || 1630 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 1631 <= MAXEXTLEN)) 1632 state |= BMAP_RIGHT_CONTIG; 1633 1634 error = 0; 1635 /* 1636 * Switch out based on the FILLING and CONTIG state bits. 1637 */ 1638 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 1639 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { 1640 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 1641 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1642 /* 1643 * Filling in all of a previously delayed allocation extent. 1644 * The left and right neighbors are both contiguous with new. 1645 */ 1646 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 1647 1648 xfs_iext_remove(bma->ip, &bma->icur, state); 1649 xfs_iext_remove(bma->ip, &bma->icur, state); 1650 xfs_iext_prev(ifp, &bma->icur); 1651 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1652 (*nextents)--; 1653 1654 if (bma->cur == NULL) 1655 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1656 else { 1657 rval = XFS_ILOG_CORE; 1658 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1659 if (error) 1660 goto done; 1661 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1662 error = xfs_btree_delete(bma->cur, &i); 1663 if (error) 1664 goto done; 1665 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1666 error = xfs_btree_decrement(bma->cur, 0, &i); 1667 if (error) 1668 goto done; 1669 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1670 error = xfs_bmbt_update(bma->cur, &LEFT); 1671 if (error) 1672 goto done; 1673 } 1674 break; 1675 1676 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1677 /* 1678 * Filling in all of a previously delayed allocation extent. 1679 * The left neighbor is contiguous, the right is not. 1680 */ 1681 old = LEFT; 1682 LEFT.br_blockcount += PREV.br_blockcount; 1683 1684 xfs_iext_remove(bma->ip, &bma->icur, state); 1685 xfs_iext_prev(ifp, &bma->icur); 1686 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1687 1688 if (bma->cur == NULL) 1689 rval = XFS_ILOG_DEXT; 1690 else { 1691 rval = 0; 1692 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1693 if (error) 1694 goto done; 1695 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1696 error = xfs_bmbt_update(bma->cur, &LEFT); 1697 if (error) 1698 goto done; 1699 } 1700 break; 1701 1702 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1703 /* 1704 * Filling in all of a previously delayed allocation extent. 1705 * The right neighbor is contiguous, the left is not. 1706 */ 1707 PREV.br_startblock = new->br_startblock; 1708 PREV.br_blockcount += RIGHT.br_blockcount; 1709 1710 xfs_iext_next(ifp, &bma->icur); 1711 xfs_iext_remove(bma->ip, &bma->icur, state); 1712 xfs_iext_prev(ifp, &bma->icur); 1713 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1714 1715 if (bma->cur == NULL) 1716 rval = XFS_ILOG_DEXT; 1717 else { 1718 rval = 0; 1719 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1720 if (error) 1721 goto done; 1722 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1723 error = xfs_bmbt_update(bma->cur, &PREV); 1724 if (error) 1725 goto done; 1726 } 1727 break; 1728 1729 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 1730 /* 1731 * Filling in all of a previously delayed allocation extent. 1732 * Neither the left nor right neighbors are contiguous with 1733 * the new one. 1734 */ 1735 PREV.br_startblock = new->br_startblock; 1736 PREV.br_state = new->br_state; 1737 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1738 1739 (*nextents)++; 1740 if (bma->cur == NULL) 1741 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1742 else { 1743 rval = XFS_ILOG_CORE; 1744 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1745 if (error) 1746 goto done; 1747 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1748 error = xfs_btree_insert(bma->cur, &i); 1749 if (error) 1750 goto done; 1751 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1752 } 1753 break; 1754 1755 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 1756 /* 1757 * Filling in the first part of a previous delayed allocation. 1758 * The left neighbor is contiguous. 1759 */ 1760 old = LEFT; 1761 temp = PREV.br_blockcount - new->br_blockcount; 1762 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1763 startblockval(PREV.br_startblock)); 1764 1765 LEFT.br_blockcount += new->br_blockcount; 1766 1767 PREV.br_blockcount = temp; 1768 PREV.br_startoff += new->br_blockcount; 1769 PREV.br_startblock = nullstartblock(da_new); 1770 1771 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1772 xfs_iext_prev(ifp, &bma->icur); 1773 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1774 1775 if (bma->cur == NULL) 1776 rval = XFS_ILOG_DEXT; 1777 else { 1778 rval = 0; 1779 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1780 if (error) 1781 goto done; 1782 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1783 error = xfs_bmbt_update(bma->cur, &LEFT); 1784 if (error) 1785 goto done; 1786 } 1787 break; 1788 1789 case BMAP_LEFT_FILLING: 1790 /* 1791 * Filling in the first part of a previous delayed allocation. 1792 * The left neighbor is not contiguous. 1793 */ 1794 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1795 (*nextents)++; 1796 if (bma->cur == NULL) 1797 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1798 else { 1799 rval = XFS_ILOG_CORE; 1800 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1801 if (error) 1802 goto done; 1803 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1804 error = xfs_btree_insert(bma->cur, &i); 1805 if (error) 1806 goto done; 1807 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1808 } 1809 1810 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1811 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1812 bma->firstblock, bma->dfops, 1813 &bma->cur, 1, &tmp_rval, whichfork); 1814 rval |= tmp_rval; 1815 if (error) 1816 goto done; 1817 } 1818 1819 temp = PREV.br_blockcount - new->br_blockcount; 1820 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1821 startblockval(PREV.br_startblock) - 1822 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 1823 1824 PREV.br_startoff = new_endoff; 1825 PREV.br_blockcount = temp; 1826 PREV.br_startblock = nullstartblock(da_new); 1827 xfs_iext_next(ifp, &bma->icur); 1828 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1829 xfs_iext_prev(ifp, &bma->icur); 1830 break; 1831 1832 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1833 /* 1834 * Filling in the last part of a previous delayed allocation. 1835 * The right neighbor is contiguous with the new allocation. 1836 */ 1837 old = RIGHT; 1838 RIGHT.br_startoff = new->br_startoff; 1839 RIGHT.br_startblock = new->br_startblock; 1840 RIGHT.br_blockcount += new->br_blockcount; 1841 1842 if (bma->cur == NULL) 1843 rval = XFS_ILOG_DEXT; 1844 else { 1845 rval = 0; 1846 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1847 if (error) 1848 goto done; 1849 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1850 error = xfs_bmbt_update(bma->cur, &RIGHT); 1851 if (error) 1852 goto done; 1853 } 1854 1855 temp = PREV.br_blockcount - new->br_blockcount; 1856 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1857 startblockval(PREV.br_startblock)); 1858 1859 PREV.br_blockcount = temp; 1860 PREV.br_startblock = nullstartblock(da_new); 1861 1862 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1863 xfs_iext_next(ifp, &bma->icur); 1864 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT); 1865 break; 1866 1867 case BMAP_RIGHT_FILLING: 1868 /* 1869 * Filling in the last part of a previous delayed allocation. 1870 * The right neighbor is not contiguous. 1871 */ 1872 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1873 (*nextents)++; 1874 if (bma->cur == NULL) 1875 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1876 else { 1877 rval = XFS_ILOG_CORE; 1878 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1879 if (error) 1880 goto done; 1881 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1882 error = xfs_btree_insert(bma->cur, &i); 1883 if (error) 1884 goto done; 1885 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1886 } 1887 1888 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1889 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1890 bma->firstblock, bma->dfops, &bma->cur, 1, 1891 &tmp_rval, whichfork); 1892 rval |= tmp_rval; 1893 if (error) 1894 goto done; 1895 } 1896 1897 temp = PREV.br_blockcount - new->br_blockcount; 1898 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1899 startblockval(PREV.br_startblock) - 1900 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 1901 1902 PREV.br_startblock = nullstartblock(da_new); 1903 PREV.br_blockcount = temp; 1904 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1905 xfs_iext_next(ifp, &bma->icur); 1906 break; 1907 1908 case 0: 1909 /* 1910 * Filling in the middle part of a previous delayed allocation. 1911 * Contiguity is impossible here. 1912 * This case is avoided almost all the time. 1913 * 1914 * We start with a delayed allocation: 1915 * 1916 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ 1917 * PREV @ idx 1918 * 1919 * and we are allocating: 1920 * +rrrrrrrrrrrrrrrrr+ 1921 * new 1922 * 1923 * and we set it up for insertion as: 1924 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ 1925 * new 1926 * PREV @ idx LEFT RIGHT 1927 * inserted at idx + 1 1928 */ 1929 old = PREV; 1930 1931 /* LEFT is the new middle */ 1932 LEFT = *new; 1933 1934 /* RIGHT is the new right */ 1935 RIGHT.br_state = PREV.br_state; 1936 RIGHT.br_startoff = new_endoff; 1937 RIGHT.br_blockcount = 1938 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1939 RIGHT.br_startblock = 1940 nullstartblock(xfs_bmap_worst_indlen(bma->ip, 1941 RIGHT.br_blockcount)); 1942 1943 /* truncate PREV */ 1944 PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 1945 PREV.br_startblock = 1946 nullstartblock(xfs_bmap_worst_indlen(bma->ip, 1947 PREV.br_blockcount)); 1948 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1949 1950 xfs_iext_next(ifp, &bma->icur); 1951 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); 1952 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); 1953 (*nextents)++; 1954 1955 if (bma->cur == NULL) 1956 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1957 else { 1958 rval = XFS_ILOG_CORE; 1959 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1960 if (error) 1961 goto done; 1962 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1963 error = xfs_btree_insert(bma->cur, &i); 1964 if (error) 1965 goto done; 1966 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1967 } 1968 1969 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1970 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1971 bma->firstblock, bma->dfops, &bma->cur, 1972 1, &tmp_rval, whichfork); 1973 rval |= tmp_rval; 1974 if (error) 1975 goto done; 1976 } 1977 1978 da_new = startblockval(PREV.br_startblock) + 1979 startblockval(RIGHT.br_startblock); 1980 break; 1981 1982 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1983 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1984 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: 1985 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1986 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1987 case BMAP_LEFT_CONTIG: 1988 case BMAP_RIGHT_CONTIG: 1989 /* 1990 * These cases are all impossible. 1991 */ 1992 ASSERT(0); 1993 } 1994 1995 /* add reverse mapping unless caller opted out */ 1996 if (!(bma->flags & XFS_BMAPI_NORMAP)) { 1997 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, 1998 whichfork, new); 1999 if (error) 2000 goto done; 2001 } 2002 2003 /* convert to a btree if necessary */ 2004 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2005 int tmp_logflags; /* partial log flag return val */ 2006 2007 ASSERT(bma->cur == NULL); 2008 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2009 bma->firstblock, bma->dfops, &bma->cur, 2010 da_old > 0, &tmp_logflags, whichfork); 2011 bma->logflags |= tmp_logflags; 2012 if (error) 2013 goto done; 2014 } 2015 2016 if (bma->cur) { 2017 da_new += bma->cur->bc_private.b.allocated; 2018 bma->cur->bc_private.b.allocated = 0; 2019 } 2020 2021 /* adjust for changes in reserved delayed indirect blocks */ 2022 if (da_new != da_old) { 2023 ASSERT(state == 0 || da_new < da_old); 2024 error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), 2025 false); 2026 } 2027 2028 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); 2029 done: 2030 if (whichfork != XFS_COW_FORK) 2031 bma->logflags |= rval; 2032 return error; 2033 #undef LEFT 2034 #undef RIGHT 2035 #undef PREV 2036 } 2037 2038 /* 2039 * Convert an unwritten allocation to a real allocation or vice versa. 2040 */ 2041 STATIC int /* error */ 2042 xfs_bmap_add_extent_unwritten_real( 2043 struct xfs_trans *tp, 2044 xfs_inode_t *ip, /* incore inode pointer */ 2045 int whichfork, 2046 struct xfs_iext_cursor *icur, 2047 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 2048 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2049 xfs_fsblock_t *first, /* pointer to firstblock variable */ 2050 struct xfs_defer_ops *dfops, /* list of extents to be freed */ 2051 int *logflagsp) /* inode logging flags */ 2052 { 2053 xfs_btree_cur_t *cur; /* btree cursor */ 2054 int error; /* error return value */ 2055 int i; /* temp state */ 2056 xfs_ifork_t *ifp; /* inode fork pointer */ 2057 xfs_fileoff_t new_endoff; /* end offset of new entry */ 2058 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 2059 /* left is 0, right is 1, prev is 2 */ 2060 int rval=0; /* return value (logging flags) */ 2061 int state = xfs_bmap_fork_to_state(whichfork); 2062 struct xfs_mount *mp = ip->i_mount; 2063 struct xfs_bmbt_irec old; 2064 2065 *logflagsp = 0; 2066 2067 cur = *curp; 2068 ifp = XFS_IFORK_PTR(ip, whichfork); 2069 2070 ASSERT(!isnullstartblock(new->br_startblock)); 2071 2072 XFS_STATS_INC(mp, xs_add_exlist); 2073 2074 #define LEFT r[0] 2075 #define RIGHT r[1] 2076 #define PREV r[2] 2077 2078 /* 2079 * Set up a bunch of variables to make the tests simpler. 2080 */ 2081 error = 0; 2082 xfs_iext_get_extent(ifp, icur, &PREV); 2083 ASSERT(new->br_state != PREV.br_state); 2084 new_endoff = new->br_startoff + new->br_blockcount; 2085 ASSERT(PREV.br_startoff <= new->br_startoff); 2086 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 2087 2088 /* 2089 * Set flags determining what part of the previous oldext allocation 2090 * extent is being replaced by a newext allocation. 2091 */ 2092 if (PREV.br_startoff == new->br_startoff) 2093 state |= BMAP_LEFT_FILLING; 2094 if (PREV.br_startoff + PREV.br_blockcount == new_endoff) 2095 state |= BMAP_RIGHT_FILLING; 2096 2097 /* 2098 * Check and set flags if this segment has a left neighbor. 2099 * Don't set contiguous if the combined extent would be too large. 2100 */ 2101 if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) { 2102 state |= BMAP_LEFT_VALID; 2103 if (isnullstartblock(LEFT.br_startblock)) 2104 state |= BMAP_LEFT_DELAY; 2105 } 2106 2107 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2108 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 2109 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 2110 LEFT.br_state == new->br_state && 2111 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) 2112 state |= BMAP_LEFT_CONTIG; 2113 2114 /* 2115 * Check and set flags if this segment has a right neighbor. 2116 * Don't set contiguous if the combined extent would be too large. 2117 * Also check for all-three-contiguous being too large. 2118 */ 2119 if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) { 2120 state |= BMAP_RIGHT_VALID; 2121 if (isnullstartblock(RIGHT.br_startblock)) 2122 state |= BMAP_RIGHT_DELAY; 2123 } 2124 2125 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2126 new_endoff == RIGHT.br_startoff && 2127 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 2128 new->br_state == RIGHT.br_state && 2129 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 2130 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2131 BMAP_RIGHT_FILLING)) != 2132 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2133 BMAP_RIGHT_FILLING) || 2134 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 2135 <= MAXEXTLEN)) 2136 state |= BMAP_RIGHT_CONTIG; 2137 2138 /* 2139 * Switch out based on the FILLING and CONTIG state bits. 2140 */ 2141 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 2142 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { 2143 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 2144 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2145 /* 2146 * Setting all of a previous oldext extent to newext. 2147 * The left and right neighbors are both contiguous with new. 2148 */ 2149 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 2150 2151 xfs_iext_remove(ip, icur, state); 2152 xfs_iext_remove(ip, icur, state); 2153 xfs_iext_prev(ifp, icur); 2154 xfs_iext_update_extent(ip, state, icur, &LEFT); 2155 XFS_IFORK_NEXT_SET(ip, whichfork, 2156 XFS_IFORK_NEXTENTS(ip, whichfork) - 2); 2157 if (cur == NULL) 2158 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2159 else { 2160 rval = XFS_ILOG_CORE; 2161 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2162 if (error) 2163 goto done; 2164 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2165 if ((error = xfs_btree_delete(cur, &i))) 2166 goto done; 2167 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2168 if ((error = xfs_btree_decrement(cur, 0, &i))) 2169 goto done; 2170 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2171 if ((error = xfs_btree_delete(cur, &i))) 2172 goto done; 2173 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2174 if ((error = xfs_btree_decrement(cur, 0, &i))) 2175 goto done; 2176 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2177 error = xfs_bmbt_update(cur, &LEFT); 2178 if (error) 2179 goto done; 2180 } 2181 break; 2182 2183 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 2184 /* 2185 * Setting all of a previous oldext extent to newext. 2186 * The left neighbor is contiguous, the right is not. 2187 */ 2188 LEFT.br_blockcount += PREV.br_blockcount; 2189 2190 xfs_iext_remove(ip, icur, state); 2191 xfs_iext_prev(ifp, icur); 2192 xfs_iext_update_extent(ip, state, icur, &LEFT); 2193 XFS_IFORK_NEXT_SET(ip, whichfork, 2194 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2195 if (cur == NULL) 2196 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2197 else { 2198 rval = XFS_ILOG_CORE; 2199 error = xfs_bmbt_lookup_eq(cur, &PREV, &i); 2200 if (error) 2201 goto done; 2202 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2203 if ((error = xfs_btree_delete(cur, &i))) 2204 goto done; 2205 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2206 if ((error = xfs_btree_decrement(cur, 0, &i))) 2207 goto done; 2208 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2209 error = xfs_bmbt_update(cur, &LEFT); 2210 if (error) 2211 goto done; 2212 } 2213 break; 2214 2215 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2216 /* 2217 * Setting all of a previous oldext extent to newext. 2218 * The right neighbor is contiguous, the left is not. 2219 */ 2220 PREV.br_blockcount += RIGHT.br_blockcount; 2221 PREV.br_state = new->br_state; 2222 2223 xfs_iext_next(ifp, icur); 2224 xfs_iext_remove(ip, icur, state); 2225 xfs_iext_prev(ifp, icur); 2226 xfs_iext_update_extent(ip, state, icur, &PREV); 2227 2228 XFS_IFORK_NEXT_SET(ip, whichfork, 2229 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2230 if (cur == NULL) 2231 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2232 else { 2233 rval = XFS_ILOG_CORE; 2234 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2235 if (error) 2236 goto done; 2237 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2238 if ((error = xfs_btree_delete(cur, &i))) 2239 goto done; 2240 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2241 if ((error = xfs_btree_decrement(cur, 0, &i))) 2242 goto done; 2243 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2244 error = xfs_bmbt_update(cur, &PREV); 2245 if (error) 2246 goto done; 2247 } 2248 break; 2249 2250 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 2251 /* 2252 * Setting all of a previous oldext extent to newext. 2253 * Neither the left nor right neighbors are contiguous with 2254 * the new one. 2255 */ 2256 PREV.br_state = new->br_state; 2257 xfs_iext_update_extent(ip, state, icur, &PREV); 2258 2259 if (cur == NULL) 2260 rval = XFS_ILOG_DEXT; 2261 else { 2262 rval = 0; 2263 error = xfs_bmbt_lookup_eq(cur, new, &i); 2264 if (error) 2265 goto done; 2266 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2267 error = xfs_bmbt_update(cur, &PREV); 2268 if (error) 2269 goto done; 2270 } 2271 break; 2272 2273 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 2274 /* 2275 * Setting the first part of a previous oldext extent to newext. 2276 * The left neighbor is contiguous. 2277 */ 2278 LEFT.br_blockcount += new->br_blockcount; 2279 2280 old = PREV; 2281 PREV.br_startoff += new->br_blockcount; 2282 PREV.br_startblock += new->br_blockcount; 2283 PREV.br_blockcount -= new->br_blockcount; 2284 2285 xfs_iext_update_extent(ip, state, icur, &PREV); 2286 xfs_iext_prev(ifp, icur); 2287 xfs_iext_update_extent(ip, state, icur, &LEFT); 2288 2289 if (cur == NULL) 2290 rval = XFS_ILOG_DEXT; 2291 else { 2292 rval = 0; 2293 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2294 if (error) 2295 goto done; 2296 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2297 error = xfs_bmbt_update(cur, &PREV); 2298 if (error) 2299 goto done; 2300 error = xfs_btree_decrement(cur, 0, &i); 2301 if (error) 2302 goto done; 2303 error = xfs_bmbt_update(cur, &LEFT); 2304 if (error) 2305 goto done; 2306 } 2307 break; 2308 2309 case BMAP_LEFT_FILLING: 2310 /* 2311 * Setting the first part of a previous oldext extent to newext. 2312 * The left neighbor is not contiguous. 2313 */ 2314 old = PREV; 2315 PREV.br_startoff += new->br_blockcount; 2316 PREV.br_startblock += new->br_blockcount; 2317 PREV.br_blockcount -= new->br_blockcount; 2318 2319 xfs_iext_update_extent(ip, state, icur, &PREV); 2320 xfs_iext_insert(ip, icur, new, state); 2321 XFS_IFORK_NEXT_SET(ip, whichfork, 2322 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2323 if (cur == NULL) 2324 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2325 else { 2326 rval = XFS_ILOG_CORE; 2327 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2328 if (error) 2329 goto done; 2330 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2331 error = xfs_bmbt_update(cur, &PREV); 2332 if (error) 2333 goto done; 2334 cur->bc_rec.b = *new; 2335 if ((error = xfs_btree_insert(cur, &i))) 2336 goto done; 2337 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2338 } 2339 break; 2340 2341 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2342 /* 2343 * Setting the last part of a previous oldext extent to newext. 2344 * The right neighbor is contiguous with the new allocation. 2345 */ 2346 old = PREV; 2347 PREV.br_blockcount -= new->br_blockcount; 2348 2349 RIGHT.br_startoff = new->br_startoff; 2350 RIGHT.br_startblock = new->br_startblock; 2351 RIGHT.br_blockcount += new->br_blockcount; 2352 2353 xfs_iext_update_extent(ip, state, icur, &PREV); 2354 xfs_iext_next(ifp, icur); 2355 xfs_iext_update_extent(ip, state, icur, &RIGHT); 2356 2357 if (cur == NULL) 2358 rval = XFS_ILOG_DEXT; 2359 else { 2360 rval = 0; 2361 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2362 if (error) 2363 goto done; 2364 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2365 error = xfs_bmbt_update(cur, &PREV); 2366 if (error) 2367 goto done; 2368 error = xfs_btree_increment(cur, 0, &i); 2369 if (error) 2370 goto done; 2371 error = xfs_bmbt_update(cur, &RIGHT); 2372 if (error) 2373 goto done; 2374 } 2375 break; 2376 2377 case BMAP_RIGHT_FILLING: 2378 /* 2379 * Setting the last part of a previous oldext extent to newext. 2380 * The right neighbor is not contiguous. 2381 */ 2382 old = PREV; 2383 PREV.br_blockcount -= new->br_blockcount; 2384 2385 xfs_iext_update_extent(ip, state, icur, &PREV); 2386 xfs_iext_next(ifp, icur); 2387 xfs_iext_insert(ip, icur, new, state); 2388 2389 XFS_IFORK_NEXT_SET(ip, whichfork, 2390 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2391 if (cur == NULL) 2392 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2393 else { 2394 rval = XFS_ILOG_CORE; 2395 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2396 if (error) 2397 goto done; 2398 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2399 error = xfs_bmbt_update(cur, &PREV); 2400 if (error) 2401 goto done; 2402 error = xfs_bmbt_lookup_eq(cur, new, &i); 2403 if (error) 2404 goto done; 2405 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2406 if ((error = xfs_btree_insert(cur, &i))) 2407 goto done; 2408 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2409 } 2410 break; 2411 2412 case 0: 2413 /* 2414 * Setting the middle part of a previous oldext extent to 2415 * newext. Contiguity is impossible here. 2416 * One extent becomes three extents. 2417 */ 2418 old = PREV; 2419 PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 2420 2421 r[0] = *new; 2422 r[1].br_startoff = new_endoff; 2423 r[1].br_blockcount = 2424 old.br_startoff + old.br_blockcount - new_endoff; 2425 r[1].br_startblock = new->br_startblock + new->br_blockcount; 2426 r[1].br_state = PREV.br_state; 2427 2428 xfs_iext_update_extent(ip, state, icur, &PREV); 2429 xfs_iext_next(ifp, icur); 2430 xfs_iext_insert(ip, icur, &r[1], state); 2431 xfs_iext_insert(ip, icur, &r[0], state); 2432 2433 XFS_IFORK_NEXT_SET(ip, whichfork, 2434 XFS_IFORK_NEXTENTS(ip, whichfork) + 2); 2435 if (cur == NULL) 2436 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2437 else { 2438 rval = XFS_ILOG_CORE; 2439 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2440 if (error) 2441 goto done; 2442 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2443 /* new right extent - oldext */ 2444 error = xfs_bmbt_update(cur, &r[1]); 2445 if (error) 2446 goto done; 2447 /* new left extent - oldext */ 2448 cur->bc_rec.b = PREV; 2449 if ((error = xfs_btree_insert(cur, &i))) 2450 goto done; 2451 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2452 /* 2453 * Reset the cursor to the position of the new extent 2454 * we are about to insert as we can't trust it after 2455 * the previous insert. 2456 */ 2457 error = xfs_bmbt_lookup_eq(cur, new, &i); 2458 if (error) 2459 goto done; 2460 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2461 /* new middle extent - newext */ 2462 if ((error = xfs_btree_insert(cur, &i))) 2463 goto done; 2464 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2465 } 2466 break; 2467 2468 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2469 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2470 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: 2471 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 2472 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2473 case BMAP_LEFT_CONTIG: 2474 case BMAP_RIGHT_CONTIG: 2475 /* 2476 * These cases are all impossible. 2477 */ 2478 ASSERT(0); 2479 } 2480 2481 /* update reverse mappings */ 2482 error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); 2483 if (error) 2484 goto done; 2485 2486 /* convert to a btree if necessary */ 2487 if (xfs_bmap_needs_btree(ip, whichfork)) { 2488 int tmp_logflags; /* partial log flag return val */ 2489 2490 ASSERT(cur == NULL); 2491 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, 2492 0, &tmp_logflags, whichfork); 2493 *logflagsp |= tmp_logflags; 2494 if (error) 2495 goto done; 2496 } 2497 2498 /* clear out the allocated field, done with it now in any case. */ 2499 if (cur) { 2500 cur->bc_private.b.allocated = 0; 2501 *curp = cur; 2502 } 2503 2504 xfs_bmap_check_leaf_extents(*curp, ip, whichfork); 2505 done: 2506 *logflagsp |= rval; 2507 return error; 2508 #undef LEFT 2509 #undef RIGHT 2510 #undef PREV 2511 } 2512 2513 /* 2514 * Convert a hole to a delayed allocation. 2515 */ 2516 STATIC void 2517 xfs_bmap_add_extent_hole_delay( 2518 xfs_inode_t *ip, /* incore inode pointer */ 2519 int whichfork, 2520 struct xfs_iext_cursor *icur, 2521 xfs_bmbt_irec_t *new) /* new data to add to file extents */ 2522 { 2523 xfs_ifork_t *ifp; /* inode fork pointer */ 2524 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2525 xfs_filblks_t newlen=0; /* new indirect size */ 2526 xfs_filblks_t oldlen=0; /* old indirect size */ 2527 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2528 int state = xfs_bmap_fork_to_state(whichfork); 2529 xfs_filblks_t temp; /* temp for indirect calculations */ 2530 2531 ifp = XFS_IFORK_PTR(ip, whichfork); 2532 ASSERT(isnullstartblock(new->br_startblock)); 2533 2534 /* 2535 * Check and set flags if this segment has a left neighbor 2536 */ 2537 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2538 state |= BMAP_LEFT_VALID; 2539 if (isnullstartblock(left.br_startblock)) 2540 state |= BMAP_LEFT_DELAY; 2541 } 2542 2543 /* 2544 * Check and set flags if the current (right) segment exists. 2545 * If it doesn't exist, we're converting the hole at end-of-file. 2546 */ 2547 if (xfs_iext_get_extent(ifp, icur, &right)) { 2548 state |= BMAP_RIGHT_VALID; 2549 if (isnullstartblock(right.br_startblock)) 2550 state |= BMAP_RIGHT_DELAY; 2551 } 2552 2553 /* 2554 * Set contiguity flags on the left and right neighbors. 2555 * Don't let extents get too large, even if the pieces are contiguous. 2556 */ 2557 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && 2558 left.br_startoff + left.br_blockcount == new->br_startoff && 2559 left.br_blockcount + new->br_blockcount <= MAXEXTLEN) 2560 state |= BMAP_LEFT_CONTIG; 2561 2562 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && 2563 new->br_startoff + new->br_blockcount == right.br_startoff && 2564 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 2565 (!(state & BMAP_LEFT_CONTIG) || 2566 (left.br_blockcount + new->br_blockcount + 2567 right.br_blockcount <= MAXEXTLEN))) 2568 state |= BMAP_RIGHT_CONTIG; 2569 2570 /* 2571 * Switch out based on the contiguity flags. 2572 */ 2573 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 2574 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2575 /* 2576 * New allocation is contiguous with delayed allocations 2577 * on the left and on the right. 2578 * Merge all three into a single extent record. 2579 */ 2580 temp = left.br_blockcount + new->br_blockcount + 2581 right.br_blockcount; 2582 2583 oldlen = startblockval(left.br_startblock) + 2584 startblockval(new->br_startblock) + 2585 startblockval(right.br_startblock); 2586 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2587 oldlen); 2588 left.br_startblock = nullstartblock(newlen); 2589 left.br_blockcount = temp; 2590 2591 xfs_iext_remove(ip, icur, state); 2592 xfs_iext_prev(ifp, icur); 2593 xfs_iext_update_extent(ip, state, icur, &left); 2594 break; 2595 2596 case BMAP_LEFT_CONTIG: 2597 /* 2598 * New allocation is contiguous with a delayed allocation 2599 * on the left. 2600 * Merge the new allocation with the left neighbor. 2601 */ 2602 temp = left.br_blockcount + new->br_blockcount; 2603 2604 oldlen = startblockval(left.br_startblock) + 2605 startblockval(new->br_startblock); 2606 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2607 oldlen); 2608 left.br_blockcount = temp; 2609 left.br_startblock = nullstartblock(newlen); 2610 2611 xfs_iext_prev(ifp, icur); 2612 xfs_iext_update_extent(ip, state, icur, &left); 2613 break; 2614 2615 case BMAP_RIGHT_CONTIG: 2616 /* 2617 * New allocation is contiguous with a delayed allocation 2618 * on the right. 2619 * Merge the new allocation with the right neighbor. 2620 */ 2621 temp = new->br_blockcount + right.br_blockcount; 2622 oldlen = startblockval(new->br_startblock) + 2623 startblockval(right.br_startblock); 2624 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2625 oldlen); 2626 right.br_startoff = new->br_startoff; 2627 right.br_startblock = nullstartblock(newlen); 2628 right.br_blockcount = temp; 2629 xfs_iext_update_extent(ip, state, icur, &right); 2630 break; 2631 2632 case 0: 2633 /* 2634 * New allocation is not contiguous with another 2635 * delayed allocation. 2636 * Insert a new entry. 2637 */ 2638 oldlen = newlen = 0; 2639 xfs_iext_insert(ip, icur, new, state); 2640 break; 2641 } 2642 if (oldlen != newlen) { 2643 ASSERT(oldlen > newlen); 2644 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen), 2645 false); 2646 /* 2647 * Nothing to do for disk quota accounting here. 2648 */ 2649 } 2650 } 2651 2652 /* 2653 * Convert a hole to a real allocation. 2654 */ 2655 STATIC int /* error */ 2656 xfs_bmap_add_extent_hole_real( 2657 struct xfs_trans *tp, 2658 struct xfs_inode *ip, 2659 int whichfork, 2660 struct xfs_iext_cursor *icur, 2661 struct xfs_btree_cur **curp, 2662 struct xfs_bmbt_irec *new, 2663 xfs_fsblock_t *first, 2664 struct xfs_defer_ops *dfops, 2665 int *logflagsp, 2666 int flags) 2667 { 2668 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 2669 struct xfs_mount *mp = ip->i_mount; 2670 struct xfs_btree_cur *cur = *curp; 2671 int error; /* error return value */ 2672 int i; /* temp state */ 2673 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2674 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2675 int rval=0; /* return value (logging flags) */ 2676 int state = xfs_bmap_fork_to_state(whichfork); 2677 struct xfs_bmbt_irec old; 2678 2679 ASSERT(!isnullstartblock(new->br_startblock)); 2680 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 2681 2682 XFS_STATS_INC(mp, xs_add_exlist); 2683 2684 /* 2685 * Check and set flags if this segment has a left neighbor. 2686 */ 2687 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2688 state |= BMAP_LEFT_VALID; 2689 if (isnullstartblock(left.br_startblock)) 2690 state |= BMAP_LEFT_DELAY; 2691 } 2692 2693 /* 2694 * Check and set flags if this segment has a current value. 2695 * Not true if we're inserting into the "hole" at eof. 2696 */ 2697 if (xfs_iext_get_extent(ifp, icur, &right)) { 2698 state |= BMAP_RIGHT_VALID; 2699 if (isnullstartblock(right.br_startblock)) 2700 state |= BMAP_RIGHT_DELAY; 2701 } 2702 2703 /* 2704 * We're inserting a real allocation between "left" and "right". 2705 * Set the contiguity flags. Don't let extents get too large. 2706 */ 2707 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2708 left.br_startoff + left.br_blockcount == new->br_startoff && 2709 left.br_startblock + left.br_blockcount == new->br_startblock && 2710 left.br_state == new->br_state && 2711 left.br_blockcount + new->br_blockcount <= MAXEXTLEN) 2712 state |= BMAP_LEFT_CONTIG; 2713 2714 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2715 new->br_startoff + new->br_blockcount == right.br_startoff && 2716 new->br_startblock + new->br_blockcount == right.br_startblock && 2717 new->br_state == right.br_state && 2718 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 2719 (!(state & BMAP_LEFT_CONTIG) || 2720 left.br_blockcount + new->br_blockcount + 2721 right.br_blockcount <= MAXEXTLEN)) 2722 state |= BMAP_RIGHT_CONTIG; 2723 2724 error = 0; 2725 /* 2726 * Select which case we're in here, and implement it. 2727 */ 2728 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 2729 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2730 /* 2731 * New allocation is contiguous with real allocations on the 2732 * left and on the right. 2733 * Merge all three into a single extent record. 2734 */ 2735 left.br_blockcount += new->br_blockcount + right.br_blockcount; 2736 2737 xfs_iext_remove(ip, icur, state); 2738 xfs_iext_prev(ifp, icur); 2739 xfs_iext_update_extent(ip, state, icur, &left); 2740 2741 XFS_IFORK_NEXT_SET(ip, whichfork, 2742 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2743 if (cur == NULL) { 2744 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2745 } else { 2746 rval = XFS_ILOG_CORE; 2747 error = xfs_bmbt_lookup_eq(cur, &right, &i); 2748 if (error) 2749 goto done; 2750 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2751 error = xfs_btree_delete(cur, &i); 2752 if (error) 2753 goto done; 2754 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2755 error = xfs_btree_decrement(cur, 0, &i); 2756 if (error) 2757 goto done; 2758 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2759 error = xfs_bmbt_update(cur, &left); 2760 if (error) 2761 goto done; 2762 } 2763 break; 2764 2765 case BMAP_LEFT_CONTIG: 2766 /* 2767 * New allocation is contiguous with a real allocation 2768 * on the left. 2769 * Merge the new allocation with the left neighbor. 2770 */ 2771 old = left; 2772 left.br_blockcount += new->br_blockcount; 2773 2774 xfs_iext_prev(ifp, icur); 2775 xfs_iext_update_extent(ip, state, icur, &left); 2776 2777 if (cur == NULL) { 2778 rval = xfs_ilog_fext(whichfork); 2779 } else { 2780 rval = 0; 2781 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2782 if (error) 2783 goto done; 2784 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2785 error = xfs_bmbt_update(cur, &left); 2786 if (error) 2787 goto done; 2788 } 2789 break; 2790 2791 case BMAP_RIGHT_CONTIG: 2792 /* 2793 * New allocation is contiguous with a real allocation 2794 * on the right. 2795 * Merge the new allocation with the right neighbor. 2796 */ 2797 old = right; 2798 2799 right.br_startoff = new->br_startoff; 2800 right.br_startblock = new->br_startblock; 2801 right.br_blockcount += new->br_blockcount; 2802 xfs_iext_update_extent(ip, state, icur, &right); 2803 2804 if (cur == NULL) { 2805 rval = xfs_ilog_fext(whichfork); 2806 } else { 2807 rval = 0; 2808 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2809 if (error) 2810 goto done; 2811 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2812 error = xfs_bmbt_update(cur, &right); 2813 if (error) 2814 goto done; 2815 } 2816 break; 2817 2818 case 0: 2819 /* 2820 * New allocation is not contiguous with another 2821 * real allocation. 2822 * Insert a new entry. 2823 */ 2824 xfs_iext_insert(ip, icur, new, state); 2825 XFS_IFORK_NEXT_SET(ip, whichfork, 2826 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2827 if (cur == NULL) { 2828 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2829 } else { 2830 rval = XFS_ILOG_CORE; 2831 error = xfs_bmbt_lookup_eq(cur, new, &i); 2832 if (error) 2833 goto done; 2834 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2835 error = xfs_btree_insert(cur, &i); 2836 if (error) 2837 goto done; 2838 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2839 } 2840 break; 2841 } 2842 2843 /* add reverse mapping unless caller opted out */ 2844 if (!(flags & XFS_BMAPI_NORMAP)) { 2845 error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new); 2846 if (error) 2847 goto done; 2848 } 2849 2850 /* convert to a btree if necessary */ 2851 if (xfs_bmap_needs_btree(ip, whichfork)) { 2852 int tmp_logflags; /* partial log flag return val */ 2853 2854 ASSERT(cur == NULL); 2855 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp, 2856 0, &tmp_logflags, whichfork); 2857 *logflagsp |= tmp_logflags; 2858 cur = *curp; 2859 if (error) 2860 goto done; 2861 } 2862 2863 /* clear out the allocated field, done with it now in any case. */ 2864 if (cur) 2865 cur->bc_private.b.allocated = 0; 2866 2867 xfs_bmap_check_leaf_extents(cur, ip, whichfork); 2868 done: 2869 *logflagsp |= rval; 2870 return error; 2871 } 2872 2873 /* 2874 * Functions used in the extent read, allocate and remove paths 2875 */ 2876 2877 /* 2878 * Adjust the size of the new extent based on di_extsize and rt extsize. 2879 */ 2880 int 2881 xfs_bmap_extsize_align( 2882 xfs_mount_t *mp, 2883 xfs_bmbt_irec_t *gotp, /* next extent pointer */ 2884 xfs_bmbt_irec_t *prevp, /* previous extent pointer */ 2885 xfs_extlen_t extsz, /* align to this extent size */ 2886 int rt, /* is this a realtime inode? */ 2887 int eof, /* is extent at end-of-file? */ 2888 int delay, /* creating delalloc extent? */ 2889 int convert, /* overwriting unwritten extent? */ 2890 xfs_fileoff_t *offp, /* in/out: aligned offset */ 2891 xfs_extlen_t *lenp) /* in/out: aligned length */ 2892 { 2893 xfs_fileoff_t orig_off; /* original offset */ 2894 xfs_extlen_t orig_alen; /* original length */ 2895 xfs_fileoff_t orig_end; /* original off+len */ 2896 xfs_fileoff_t nexto; /* next file offset */ 2897 xfs_fileoff_t prevo; /* previous file offset */ 2898 xfs_fileoff_t align_off; /* temp for offset */ 2899 xfs_extlen_t align_alen; /* temp for length */ 2900 xfs_extlen_t temp; /* temp for calculations */ 2901 2902 if (convert) 2903 return 0; 2904 2905 orig_off = align_off = *offp; 2906 orig_alen = align_alen = *lenp; 2907 orig_end = orig_off + orig_alen; 2908 2909 /* 2910 * If this request overlaps an existing extent, then don't 2911 * attempt to perform any additional alignment. 2912 */ 2913 if (!delay && !eof && 2914 (orig_off >= gotp->br_startoff) && 2915 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { 2916 return 0; 2917 } 2918 2919 /* 2920 * If the file offset is unaligned vs. the extent size 2921 * we need to align it. This will be possible unless 2922 * the file was previously written with a kernel that didn't 2923 * perform this alignment, or if a truncate shot us in the 2924 * foot. 2925 */ 2926 div_u64_rem(orig_off, extsz, &temp); 2927 if (temp) { 2928 align_alen += temp; 2929 align_off -= temp; 2930 } 2931 2932 /* Same adjustment for the end of the requested area. */ 2933 temp = (align_alen % extsz); 2934 if (temp) 2935 align_alen += extsz - temp; 2936 2937 /* 2938 * For large extent hint sizes, the aligned extent might be larger than 2939 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls 2940 * the length back under MAXEXTLEN. The outer allocation loops handle 2941 * short allocation just fine, so it is safe to do this. We only want to 2942 * do it when we are forced to, though, because it means more allocation 2943 * operations are required. 2944 */ 2945 while (align_alen > MAXEXTLEN) 2946 align_alen -= extsz; 2947 ASSERT(align_alen <= MAXEXTLEN); 2948 2949 /* 2950 * If the previous block overlaps with this proposed allocation 2951 * then move the start forward without adjusting the length. 2952 */ 2953 if (prevp->br_startoff != NULLFILEOFF) { 2954 if (prevp->br_startblock == HOLESTARTBLOCK) 2955 prevo = prevp->br_startoff; 2956 else 2957 prevo = prevp->br_startoff + prevp->br_blockcount; 2958 } else 2959 prevo = 0; 2960 if (align_off != orig_off && align_off < prevo) 2961 align_off = prevo; 2962 /* 2963 * If the next block overlaps with this proposed allocation 2964 * then move the start back without adjusting the length, 2965 * but not before offset 0. 2966 * This may of course make the start overlap previous block, 2967 * and if we hit the offset 0 limit then the next block 2968 * can still overlap too. 2969 */ 2970 if (!eof && gotp->br_startoff != NULLFILEOFF) { 2971 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || 2972 (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) 2973 nexto = gotp->br_startoff + gotp->br_blockcount; 2974 else 2975 nexto = gotp->br_startoff; 2976 } else 2977 nexto = NULLFILEOFF; 2978 if (!eof && 2979 align_off + align_alen != orig_end && 2980 align_off + align_alen > nexto) 2981 align_off = nexto > align_alen ? nexto - align_alen : 0; 2982 /* 2983 * If we're now overlapping the next or previous extent that 2984 * means we can't fit an extsz piece in this hole. Just move 2985 * the start forward to the first valid spot and set 2986 * the length so we hit the end. 2987 */ 2988 if (align_off != orig_off && align_off < prevo) 2989 align_off = prevo; 2990 if (align_off + align_alen != orig_end && 2991 align_off + align_alen > nexto && 2992 nexto != NULLFILEOFF) { 2993 ASSERT(nexto > prevo); 2994 align_alen = nexto - align_off; 2995 } 2996 2997 /* 2998 * If realtime, and the result isn't a multiple of the realtime 2999 * extent size we need to remove blocks until it is. 3000 */ 3001 if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { 3002 /* 3003 * We're not covering the original request, or 3004 * we won't be able to once we fix the length. 3005 */ 3006 if (orig_off < align_off || 3007 orig_end > align_off + align_alen || 3008 align_alen - temp < orig_alen) 3009 return -EINVAL; 3010 /* 3011 * Try to fix it by moving the start up. 3012 */ 3013 if (align_off + temp <= orig_off) { 3014 align_alen -= temp; 3015 align_off += temp; 3016 } 3017 /* 3018 * Try to fix it by moving the end in. 3019 */ 3020 else if (align_off + align_alen - temp >= orig_end) 3021 align_alen -= temp; 3022 /* 3023 * Set the start to the minimum then trim the length. 3024 */ 3025 else { 3026 align_alen -= orig_off - align_off; 3027 align_off = orig_off; 3028 align_alen -= align_alen % mp->m_sb.sb_rextsize; 3029 } 3030 /* 3031 * Result doesn't cover the request, fail it. 3032 */ 3033 if (orig_off < align_off || orig_end > align_off + align_alen) 3034 return -EINVAL; 3035 } else { 3036 ASSERT(orig_off >= align_off); 3037 /* see MAXEXTLEN handling above */ 3038 ASSERT(orig_end <= align_off + align_alen || 3039 align_alen + extsz > MAXEXTLEN); 3040 } 3041 3042 #ifdef DEBUG 3043 if (!eof && gotp->br_startoff != NULLFILEOFF) 3044 ASSERT(align_off + align_alen <= gotp->br_startoff); 3045 if (prevp->br_startoff != NULLFILEOFF) 3046 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); 3047 #endif 3048 3049 *lenp = align_alen; 3050 *offp = align_off; 3051 return 0; 3052 } 3053 3054 #define XFS_ALLOC_GAP_UNITS 4 3055 3056 void 3057 xfs_bmap_adjacent( 3058 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3059 { 3060 xfs_fsblock_t adjust; /* adjustment to block numbers */ 3061 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 3062 xfs_mount_t *mp; /* mount point structure */ 3063 int nullfb; /* true if ap->firstblock isn't set */ 3064 int rt; /* true if inode is realtime */ 3065 3066 #define ISVALID(x,y) \ 3067 (rt ? \ 3068 (x) < mp->m_sb.sb_rblocks : \ 3069 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ 3070 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ 3071 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) 3072 3073 mp = ap->ip->i_mount; 3074 nullfb = *ap->firstblock == NULLFSBLOCK; 3075 rt = XFS_IS_REALTIME_INODE(ap->ip) && 3076 xfs_alloc_is_userdata(ap->datatype); 3077 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); 3078 /* 3079 * If allocating at eof, and there's a previous real block, 3080 * try to use its last block as our starting point. 3081 */ 3082 if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && 3083 !isnullstartblock(ap->prev.br_startblock) && 3084 ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, 3085 ap->prev.br_startblock)) { 3086 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; 3087 /* 3088 * Adjust for the gap between prevp and us. 3089 */ 3090 adjust = ap->offset - 3091 (ap->prev.br_startoff + ap->prev.br_blockcount); 3092 if (adjust && 3093 ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) 3094 ap->blkno += adjust; 3095 } 3096 /* 3097 * If not at eof, then compare the two neighbor blocks. 3098 * Figure out whether either one gives us a good starting point, 3099 * and pick the better one. 3100 */ 3101 else if (!ap->eof) { 3102 xfs_fsblock_t gotbno; /* right side block number */ 3103 xfs_fsblock_t gotdiff=0; /* right side difference */ 3104 xfs_fsblock_t prevbno; /* left side block number */ 3105 xfs_fsblock_t prevdiff=0; /* left side difference */ 3106 3107 /* 3108 * If there's a previous (left) block, select a requested 3109 * start block based on it. 3110 */ 3111 if (ap->prev.br_startoff != NULLFILEOFF && 3112 !isnullstartblock(ap->prev.br_startblock) && 3113 (prevbno = ap->prev.br_startblock + 3114 ap->prev.br_blockcount) && 3115 ISVALID(prevbno, ap->prev.br_startblock)) { 3116 /* 3117 * Calculate gap to end of previous block. 3118 */ 3119 adjust = prevdiff = ap->offset - 3120 (ap->prev.br_startoff + 3121 ap->prev.br_blockcount); 3122 /* 3123 * Figure the startblock based on the previous block's 3124 * end and the gap size. 3125 * Heuristic! 3126 * If the gap is large relative to the piece we're 3127 * allocating, or using it gives us an invalid block 3128 * number, then just use the end of the previous block. 3129 */ 3130 if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && 3131 ISVALID(prevbno + prevdiff, 3132 ap->prev.br_startblock)) 3133 prevbno += adjust; 3134 else 3135 prevdiff += adjust; 3136 /* 3137 * If the firstblock forbids it, can't use it, 3138 * must use default. 3139 */ 3140 if (!rt && !nullfb && 3141 XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) 3142 prevbno = NULLFSBLOCK; 3143 } 3144 /* 3145 * No previous block or can't follow it, just default. 3146 */ 3147 else 3148 prevbno = NULLFSBLOCK; 3149 /* 3150 * If there's a following (right) block, select a requested 3151 * start block based on it. 3152 */ 3153 if (!isnullstartblock(ap->got.br_startblock)) { 3154 /* 3155 * Calculate gap to start of next block. 3156 */ 3157 adjust = gotdiff = ap->got.br_startoff - ap->offset; 3158 /* 3159 * Figure the startblock based on the next block's 3160 * start and the gap size. 3161 */ 3162 gotbno = ap->got.br_startblock; 3163 /* 3164 * Heuristic! 3165 * If the gap is large relative to the piece we're 3166 * allocating, or using it gives us an invalid block 3167 * number, then just use the start of the next block 3168 * offset by our length. 3169 */ 3170 if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && 3171 ISVALID(gotbno - gotdiff, gotbno)) 3172 gotbno -= adjust; 3173 else if (ISVALID(gotbno - ap->length, gotbno)) { 3174 gotbno -= ap->length; 3175 gotdiff += adjust - ap->length; 3176 } else 3177 gotdiff += adjust; 3178 /* 3179 * If the firstblock forbids it, can't use it, 3180 * must use default. 3181 */ 3182 if (!rt && !nullfb && 3183 XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) 3184 gotbno = NULLFSBLOCK; 3185 } 3186 /* 3187 * No next block, just default. 3188 */ 3189 else 3190 gotbno = NULLFSBLOCK; 3191 /* 3192 * If both valid, pick the better one, else the only good 3193 * one, else ap->blkno is already set (to 0 or the inode block). 3194 */ 3195 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) 3196 ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; 3197 else if (prevbno != NULLFSBLOCK) 3198 ap->blkno = prevbno; 3199 else if (gotbno != NULLFSBLOCK) 3200 ap->blkno = gotbno; 3201 } 3202 #undef ISVALID 3203 } 3204 3205 static int 3206 xfs_bmap_longest_free_extent( 3207 struct xfs_trans *tp, 3208 xfs_agnumber_t ag, 3209 xfs_extlen_t *blen, 3210 int *notinit) 3211 { 3212 struct xfs_mount *mp = tp->t_mountp; 3213 struct xfs_perag *pag; 3214 xfs_extlen_t longest; 3215 int error = 0; 3216 3217 pag = xfs_perag_get(mp, ag); 3218 if (!pag->pagf_init) { 3219 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); 3220 if (error) 3221 goto out; 3222 3223 if (!pag->pagf_init) { 3224 *notinit = 1; 3225 goto out; 3226 } 3227 } 3228 3229 longest = xfs_alloc_longest_free_extent(pag, 3230 xfs_alloc_min_freelist(mp, pag), 3231 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); 3232 if (*blen < longest) 3233 *blen = longest; 3234 3235 out: 3236 xfs_perag_put(pag); 3237 return error; 3238 } 3239 3240 static void 3241 xfs_bmap_select_minlen( 3242 struct xfs_bmalloca *ap, 3243 struct xfs_alloc_arg *args, 3244 xfs_extlen_t *blen, 3245 int notinit) 3246 { 3247 if (notinit || *blen < ap->minlen) { 3248 /* 3249 * Since we did a BUF_TRYLOCK above, it is possible that 3250 * there is space for this request. 3251 */ 3252 args->minlen = ap->minlen; 3253 } else if (*blen < args->maxlen) { 3254 /* 3255 * If the best seen length is less than the request length, 3256 * use the best as the minimum. 3257 */ 3258 args->minlen = *blen; 3259 } else { 3260 /* 3261 * Otherwise we've seen an extent as big as maxlen, use that 3262 * as the minimum. 3263 */ 3264 args->minlen = args->maxlen; 3265 } 3266 } 3267 3268 STATIC int 3269 xfs_bmap_btalloc_nullfb( 3270 struct xfs_bmalloca *ap, 3271 struct xfs_alloc_arg *args, 3272 xfs_extlen_t *blen) 3273 { 3274 struct xfs_mount *mp = ap->ip->i_mount; 3275 xfs_agnumber_t ag, startag; 3276 int notinit = 0; 3277 int error; 3278 3279 args->type = XFS_ALLOCTYPE_START_BNO; 3280 args->total = ap->total; 3281 3282 startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); 3283 if (startag == NULLAGNUMBER) 3284 startag = ag = 0; 3285 3286 while (*blen < args->maxlen) { 3287 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, 3288 ¬init); 3289 if (error) 3290 return error; 3291 3292 if (++ag == mp->m_sb.sb_agcount) 3293 ag = 0; 3294 if (ag == startag) 3295 break; 3296 } 3297 3298 xfs_bmap_select_minlen(ap, args, blen, notinit); 3299 return 0; 3300 } 3301 3302 STATIC int 3303 xfs_bmap_btalloc_filestreams( 3304 struct xfs_bmalloca *ap, 3305 struct xfs_alloc_arg *args, 3306 xfs_extlen_t *blen) 3307 { 3308 struct xfs_mount *mp = ap->ip->i_mount; 3309 xfs_agnumber_t ag; 3310 int notinit = 0; 3311 int error; 3312 3313 args->type = XFS_ALLOCTYPE_NEAR_BNO; 3314 args->total = ap->total; 3315 3316 ag = XFS_FSB_TO_AGNO(mp, args->fsbno); 3317 if (ag == NULLAGNUMBER) 3318 ag = 0; 3319 3320 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); 3321 if (error) 3322 return error; 3323 3324 if (*blen < args->maxlen) { 3325 error = xfs_filestream_new_ag(ap, &ag); 3326 if (error) 3327 return error; 3328 3329 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, 3330 ¬init); 3331 if (error) 3332 return error; 3333 3334 } 3335 3336 xfs_bmap_select_minlen(ap, args, blen, notinit); 3337 3338 /* 3339 * Set the failure fallback case to look in the selected AG as stream 3340 * may have moved. 3341 */ 3342 ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); 3343 return 0; 3344 } 3345 3346 /* Update all inode and quota accounting for the allocation we just did. */ 3347 static void 3348 xfs_bmap_btalloc_accounting( 3349 struct xfs_bmalloca *ap, 3350 struct xfs_alloc_arg *args) 3351 { 3352 if (ap->flags & XFS_BMAPI_COWFORK) { 3353 /* 3354 * COW fork blocks are in-core only and thus are treated as 3355 * in-core quota reservation (like delalloc blocks) even when 3356 * converted to real blocks. The quota reservation is not 3357 * accounted to disk until blocks are remapped to the data 3358 * fork. So if these blocks were previously delalloc, we 3359 * already have quota reservation and there's nothing to do 3360 * yet. 3361 */ 3362 if (ap->wasdel) 3363 return; 3364 3365 /* 3366 * Otherwise, we've allocated blocks in a hole. The transaction 3367 * has acquired in-core quota reservation for this extent. 3368 * Rather than account these as real blocks, however, we reduce 3369 * the transaction quota reservation based on the allocation. 3370 * This essentially transfers the transaction quota reservation 3371 * to that of a delalloc extent. 3372 */ 3373 ap->ip->i_delayed_blks += args->len; 3374 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, 3375 -(long)args->len); 3376 return; 3377 } 3378 3379 /* data/attr fork only */ 3380 ap->ip->i_d.di_nblocks += args->len; 3381 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3382 if (ap->wasdel) 3383 ap->ip->i_delayed_blks -= args->len; 3384 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 3385 ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, 3386 args->len); 3387 } 3388 3389 STATIC int 3390 xfs_bmap_btalloc( 3391 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3392 { 3393 xfs_mount_t *mp; /* mount point structure */ 3394 xfs_alloctype_t atype = 0; /* type for allocation routines */ 3395 xfs_extlen_t align = 0; /* minimum allocation alignment */ 3396 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 3397 xfs_agnumber_t ag; 3398 xfs_alloc_arg_t args; 3399 xfs_fileoff_t orig_offset; 3400 xfs_extlen_t orig_length; 3401 xfs_extlen_t blen; 3402 xfs_extlen_t nextminlen = 0; 3403 int nullfb; /* true if ap->firstblock isn't set */ 3404 int isaligned; 3405 int tryagain; 3406 int error; 3407 int stripe_align; 3408 3409 ASSERT(ap->length); 3410 orig_offset = ap->offset; 3411 orig_length = ap->length; 3412 3413 mp = ap->ip->i_mount; 3414 3415 /* stripe alignment for allocation is determined by mount parameters */ 3416 stripe_align = 0; 3417 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) 3418 stripe_align = mp->m_swidth; 3419 else if (mp->m_dalign) 3420 stripe_align = mp->m_dalign; 3421 3422 if (ap->flags & XFS_BMAPI_COWFORK) 3423 align = xfs_get_cowextsz_hint(ap->ip); 3424 else if (xfs_alloc_is_userdata(ap->datatype)) 3425 align = xfs_get_extsz_hint(ap->ip); 3426 if (align) { 3427 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 3428 align, 0, ap->eof, 0, ap->conv, 3429 &ap->offset, &ap->length); 3430 ASSERT(!error); 3431 ASSERT(ap->length); 3432 } 3433 3434 3435 nullfb = *ap->firstblock == NULLFSBLOCK; 3436 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); 3437 if (nullfb) { 3438 if (xfs_alloc_is_userdata(ap->datatype) && 3439 xfs_inode_is_filestream(ap->ip)) { 3440 ag = xfs_filestream_lookup_ag(ap->ip); 3441 ag = (ag != NULLAGNUMBER) ? ag : 0; 3442 ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); 3443 } else { 3444 ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); 3445 } 3446 } else 3447 ap->blkno = *ap->firstblock; 3448 3449 xfs_bmap_adjacent(ap); 3450 3451 /* 3452 * If allowed, use ap->blkno; otherwise must use firstblock since 3453 * it's in the right allocation group. 3454 */ 3455 if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) 3456 ; 3457 else 3458 ap->blkno = *ap->firstblock; 3459 /* 3460 * Normal allocation, done through xfs_alloc_vextent. 3461 */ 3462 tryagain = isaligned = 0; 3463 memset(&args, 0, sizeof(args)); 3464 args.tp = ap->tp; 3465 args.mp = mp; 3466 args.fsbno = ap->blkno; 3467 xfs_rmap_skip_owner_update(&args.oinfo); 3468 3469 /* Trim the allocation back to the maximum an AG can fit. */ 3470 args.maxlen = min(ap->length, mp->m_ag_max_usable); 3471 args.firstblock = *ap->firstblock; 3472 blen = 0; 3473 if (nullfb) { 3474 /* 3475 * Search for an allocation group with a single extent large 3476 * enough for the request. If one isn't found, then adjust 3477 * the minimum allocation size to the largest space found. 3478 */ 3479 if (xfs_alloc_is_userdata(ap->datatype) && 3480 xfs_inode_is_filestream(ap->ip)) 3481 error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); 3482 else 3483 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); 3484 if (error) 3485 return error; 3486 } else if (ap->dfops->dop_low) { 3487 if (xfs_inode_is_filestream(ap->ip)) 3488 args.type = XFS_ALLOCTYPE_FIRST_AG; 3489 else 3490 args.type = XFS_ALLOCTYPE_START_BNO; 3491 args.total = args.minlen = ap->minlen; 3492 } else { 3493 args.type = XFS_ALLOCTYPE_NEAR_BNO; 3494 args.total = ap->total; 3495 args.minlen = ap->minlen; 3496 } 3497 /* apply extent size hints if obtained earlier */ 3498 if (align) { 3499 args.prod = align; 3500 div_u64_rem(ap->offset, args.prod, &args.mod); 3501 if (args.mod) 3502 args.mod = args.prod - args.mod; 3503 } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) { 3504 args.prod = 1; 3505 args.mod = 0; 3506 } else { 3507 args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog; 3508 div_u64_rem(ap->offset, args.prod, &args.mod); 3509 if (args.mod) 3510 args.mod = args.prod - args.mod; 3511 } 3512 /* 3513 * If we are not low on available data blocks, and the 3514 * underlying logical volume manager is a stripe, and 3515 * the file offset is zero then try to allocate data 3516 * blocks on stripe unit boundary. 3517 * NOTE: ap->aeof is only set if the allocation length 3518 * is >= the stripe unit and the allocation offset is 3519 * at the end of file. 3520 */ 3521 if (!ap->dfops->dop_low && ap->aeof) { 3522 if (!ap->offset) { 3523 args.alignment = stripe_align; 3524 atype = args.type; 3525 isaligned = 1; 3526 /* 3527 * Adjust for alignment 3528 */ 3529 if (blen > args.alignment && blen <= args.maxlen) 3530 args.minlen = blen - args.alignment; 3531 args.minalignslop = 0; 3532 } else { 3533 /* 3534 * First try an exact bno allocation. 3535 * If it fails then do a near or start bno 3536 * allocation with alignment turned on. 3537 */ 3538 atype = args.type; 3539 tryagain = 1; 3540 args.type = XFS_ALLOCTYPE_THIS_BNO; 3541 args.alignment = 1; 3542 /* 3543 * Compute the minlen+alignment for the 3544 * next case. Set slop so that the value 3545 * of minlen+alignment+slop doesn't go up 3546 * between the calls. 3547 */ 3548 if (blen > stripe_align && blen <= args.maxlen) 3549 nextminlen = blen - stripe_align; 3550 else 3551 nextminlen = args.minlen; 3552 if (nextminlen + stripe_align > args.minlen + 1) 3553 args.minalignslop = 3554 nextminlen + stripe_align - 3555 args.minlen - 1; 3556 else 3557 args.minalignslop = 0; 3558 } 3559 } else { 3560 args.alignment = 1; 3561 args.minalignslop = 0; 3562 } 3563 args.minleft = ap->minleft; 3564 args.wasdel = ap->wasdel; 3565 args.resv = XFS_AG_RESV_NONE; 3566 args.datatype = ap->datatype; 3567 if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) 3568 args.ip = ap->ip; 3569 3570 error = xfs_alloc_vextent(&args); 3571 if (error) 3572 return error; 3573 3574 if (tryagain && args.fsbno == NULLFSBLOCK) { 3575 /* 3576 * Exact allocation failed. Now try with alignment 3577 * turned on. 3578 */ 3579 args.type = atype; 3580 args.fsbno = ap->blkno; 3581 args.alignment = stripe_align; 3582 args.minlen = nextminlen; 3583 args.minalignslop = 0; 3584 isaligned = 1; 3585 if ((error = xfs_alloc_vextent(&args))) 3586 return error; 3587 } 3588 if (isaligned && args.fsbno == NULLFSBLOCK) { 3589 /* 3590 * allocation failed, so turn off alignment and 3591 * try again. 3592 */ 3593 args.type = atype; 3594 args.fsbno = ap->blkno; 3595 args.alignment = 0; 3596 if ((error = xfs_alloc_vextent(&args))) 3597 return error; 3598 } 3599 if (args.fsbno == NULLFSBLOCK && nullfb && 3600 args.minlen > ap->minlen) { 3601 args.minlen = ap->minlen; 3602 args.type = XFS_ALLOCTYPE_START_BNO; 3603 args.fsbno = ap->blkno; 3604 if ((error = xfs_alloc_vextent(&args))) 3605 return error; 3606 } 3607 if (args.fsbno == NULLFSBLOCK && nullfb) { 3608 args.fsbno = 0; 3609 args.type = XFS_ALLOCTYPE_FIRST_AG; 3610 args.total = ap->minlen; 3611 if ((error = xfs_alloc_vextent(&args))) 3612 return error; 3613 ap->dfops->dop_low = true; 3614 } 3615 if (args.fsbno != NULLFSBLOCK) { 3616 /* 3617 * check the allocation happened at the same or higher AG than 3618 * the first block that was allocated. 3619 */ 3620 ASSERT(*ap->firstblock == NULLFSBLOCK || 3621 XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= 3622 XFS_FSB_TO_AGNO(mp, args.fsbno)); 3623 3624 ap->blkno = args.fsbno; 3625 if (*ap->firstblock == NULLFSBLOCK) 3626 *ap->firstblock = args.fsbno; 3627 ASSERT(nullfb || fb_agno <= args.agno); 3628 ap->length = args.len; 3629 /* 3630 * If the extent size hint is active, we tried to round the 3631 * caller's allocation request offset down to extsz and the 3632 * length up to another extsz boundary. If we found a free 3633 * extent we mapped it in starting at this new offset. If the 3634 * newly mapped space isn't long enough to cover any of the 3635 * range of offsets that was originally requested, move the 3636 * mapping up so that we can fill as much of the caller's 3637 * original request as possible. Free space is apparently 3638 * very fragmented so we're unlikely to be able to satisfy the 3639 * hints anyway. 3640 */ 3641 if (ap->length <= orig_length) 3642 ap->offset = orig_offset; 3643 else if (ap->offset + ap->length < orig_offset + orig_length) 3644 ap->offset = orig_offset + orig_length - ap->length; 3645 xfs_bmap_btalloc_accounting(ap, &args); 3646 } else { 3647 ap->blkno = NULLFSBLOCK; 3648 ap->length = 0; 3649 } 3650 return 0; 3651 } 3652 3653 /* 3654 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 3655 * It figures out where to ask the underlying allocator to put the new extent. 3656 */ 3657 STATIC int 3658 xfs_bmap_alloc( 3659 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3660 { 3661 if (XFS_IS_REALTIME_INODE(ap->ip) && 3662 xfs_alloc_is_userdata(ap->datatype)) 3663 return xfs_bmap_rtalloc(ap); 3664 return xfs_bmap_btalloc(ap); 3665 } 3666 3667 /* Trim extent to fit a logical block range. */ 3668 void 3669 xfs_trim_extent( 3670 struct xfs_bmbt_irec *irec, 3671 xfs_fileoff_t bno, 3672 xfs_filblks_t len) 3673 { 3674 xfs_fileoff_t distance; 3675 xfs_fileoff_t end = bno + len; 3676 3677 if (irec->br_startoff + irec->br_blockcount <= bno || 3678 irec->br_startoff >= end) { 3679 irec->br_blockcount = 0; 3680 return; 3681 } 3682 3683 if (irec->br_startoff < bno) { 3684 distance = bno - irec->br_startoff; 3685 if (isnullstartblock(irec->br_startblock)) 3686 irec->br_startblock = DELAYSTARTBLOCK; 3687 if (irec->br_startblock != DELAYSTARTBLOCK && 3688 irec->br_startblock != HOLESTARTBLOCK) 3689 irec->br_startblock += distance; 3690 irec->br_startoff += distance; 3691 irec->br_blockcount -= distance; 3692 } 3693 3694 if (end < irec->br_startoff + irec->br_blockcount) { 3695 distance = irec->br_startoff + irec->br_blockcount - end; 3696 irec->br_blockcount -= distance; 3697 } 3698 } 3699 3700 /* trim extent to within eof */ 3701 void 3702 xfs_trim_extent_eof( 3703 struct xfs_bmbt_irec *irec, 3704 struct xfs_inode *ip) 3705 3706 { 3707 xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, 3708 i_size_read(VFS_I(ip)))); 3709 } 3710 3711 /* 3712 * Trim the returned map to the required bounds 3713 */ 3714 STATIC void 3715 xfs_bmapi_trim_map( 3716 struct xfs_bmbt_irec *mval, 3717 struct xfs_bmbt_irec *got, 3718 xfs_fileoff_t *bno, 3719 xfs_filblks_t len, 3720 xfs_fileoff_t obno, 3721 xfs_fileoff_t end, 3722 int n, 3723 int flags) 3724 { 3725 if ((flags & XFS_BMAPI_ENTIRE) || 3726 got->br_startoff + got->br_blockcount <= obno) { 3727 *mval = *got; 3728 if (isnullstartblock(got->br_startblock)) 3729 mval->br_startblock = DELAYSTARTBLOCK; 3730 return; 3731 } 3732 3733 if (obno > *bno) 3734 *bno = obno; 3735 ASSERT((*bno >= obno) || (n == 0)); 3736 ASSERT(*bno < end); 3737 mval->br_startoff = *bno; 3738 if (isnullstartblock(got->br_startblock)) 3739 mval->br_startblock = DELAYSTARTBLOCK; 3740 else 3741 mval->br_startblock = got->br_startblock + 3742 (*bno - got->br_startoff); 3743 /* 3744 * Return the minimum of what we got and what we asked for for 3745 * the length. We can use the len variable here because it is 3746 * modified below and we could have been there before coming 3747 * here if the first part of the allocation didn't overlap what 3748 * was asked for. 3749 */ 3750 mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, 3751 got->br_blockcount - (*bno - got->br_startoff)); 3752 mval->br_state = got->br_state; 3753 ASSERT(mval->br_blockcount <= len); 3754 return; 3755 } 3756 3757 /* 3758 * Update and validate the extent map to return 3759 */ 3760 STATIC void 3761 xfs_bmapi_update_map( 3762 struct xfs_bmbt_irec **map, 3763 xfs_fileoff_t *bno, 3764 xfs_filblks_t *len, 3765 xfs_fileoff_t obno, 3766 xfs_fileoff_t end, 3767 int *n, 3768 int flags) 3769 { 3770 xfs_bmbt_irec_t *mval = *map; 3771 3772 ASSERT((flags & XFS_BMAPI_ENTIRE) || 3773 ((mval->br_startoff + mval->br_blockcount) <= end)); 3774 ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || 3775 (mval->br_startoff < obno)); 3776 3777 *bno = mval->br_startoff + mval->br_blockcount; 3778 *len = end - *bno; 3779 if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { 3780 /* update previous map with new information */ 3781 ASSERT(mval->br_startblock == mval[-1].br_startblock); 3782 ASSERT(mval->br_blockcount > mval[-1].br_blockcount); 3783 ASSERT(mval->br_state == mval[-1].br_state); 3784 mval[-1].br_blockcount = mval->br_blockcount; 3785 mval[-1].br_state = mval->br_state; 3786 } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && 3787 mval[-1].br_startblock != DELAYSTARTBLOCK && 3788 mval[-1].br_startblock != HOLESTARTBLOCK && 3789 mval->br_startblock == mval[-1].br_startblock + 3790 mval[-1].br_blockcount && 3791 ((flags & XFS_BMAPI_IGSTATE) || 3792 mval[-1].br_state == mval->br_state)) { 3793 ASSERT(mval->br_startoff == 3794 mval[-1].br_startoff + mval[-1].br_blockcount); 3795 mval[-1].br_blockcount += mval->br_blockcount; 3796 } else if (*n > 0 && 3797 mval->br_startblock == DELAYSTARTBLOCK && 3798 mval[-1].br_startblock == DELAYSTARTBLOCK && 3799 mval->br_startoff == 3800 mval[-1].br_startoff + mval[-1].br_blockcount) { 3801 mval[-1].br_blockcount += mval->br_blockcount; 3802 mval[-1].br_state = mval->br_state; 3803 } else if (!((*n == 0) && 3804 ((mval->br_startoff + mval->br_blockcount) <= 3805 obno))) { 3806 mval++; 3807 (*n)++; 3808 } 3809 *map = mval; 3810 } 3811 3812 /* 3813 * Map file blocks to filesystem blocks without allocation. 3814 */ 3815 int 3816 xfs_bmapi_read( 3817 struct xfs_inode *ip, 3818 xfs_fileoff_t bno, 3819 xfs_filblks_t len, 3820 struct xfs_bmbt_irec *mval, 3821 int *nmap, 3822 int flags) 3823 { 3824 struct xfs_mount *mp = ip->i_mount; 3825 struct xfs_ifork *ifp; 3826 struct xfs_bmbt_irec got; 3827 xfs_fileoff_t obno; 3828 xfs_fileoff_t end; 3829 struct xfs_iext_cursor icur; 3830 int error; 3831 bool eof = false; 3832 int n = 0; 3833 int whichfork = xfs_bmapi_whichfork(flags); 3834 3835 ASSERT(*nmap >= 1); 3836 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| 3837 XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK))); 3838 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); 3839 3840 if (unlikely(XFS_TEST_ERROR( 3841 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 3842 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 3843 mp, XFS_ERRTAG_BMAPIFORMAT))) { 3844 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); 3845 return -EFSCORRUPTED; 3846 } 3847 3848 if (XFS_FORCED_SHUTDOWN(mp)) 3849 return -EIO; 3850 3851 XFS_STATS_INC(mp, xs_blk_mapr); 3852 3853 ifp = XFS_IFORK_PTR(ip, whichfork); 3854 3855 /* No CoW fork? Return a hole. */ 3856 if (whichfork == XFS_COW_FORK && !ifp) { 3857 mval->br_startoff = bno; 3858 mval->br_startblock = HOLESTARTBLOCK; 3859 mval->br_blockcount = len; 3860 mval->br_state = XFS_EXT_NORM; 3861 *nmap = 1; 3862 return 0; 3863 } 3864 3865 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 3866 error = xfs_iread_extents(NULL, ip, whichfork); 3867 if (error) 3868 return error; 3869 } 3870 3871 if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) 3872 eof = true; 3873 end = bno + len; 3874 obno = bno; 3875 3876 while (bno < end && n < *nmap) { 3877 /* Reading past eof, act as though there's a hole up to end. */ 3878 if (eof) 3879 got.br_startoff = end; 3880 if (got.br_startoff > bno) { 3881 /* Reading in a hole. */ 3882 mval->br_startoff = bno; 3883 mval->br_startblock = HOLESTARTBLOCK; 3884 mval->br_blockcount = 3885 XFS_FILBLKS_MIN(len, got.br_startoff - bno); 3886 mval->br_state = XFS_EXT_NORM; 3887 bno += mval->br_blockcount; 3888 len -= mval->br_blockcount; 3889 mval++; 3890 n++; 3891 continue; 3892 } 3893 3894 /* set up the extent map to return. */ 3895 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); 3896 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); 3897 3898 /* If we're done, stop now. */ 3899 if (bno >= end || n >= *nmap) 3900 break; 3901 3902 /* Else go on to the next record. */ 3903 if (!xfs_iext_next_extent(ifp, &icur, &got)) 3904 eof = true; 3905 } 3906 *nmap = n; 3907 return 0; 3908 } 3909 3910 /* 3911 * Add a delayed allocation extent to an inode. Blocks are reserved from the 3912 * global pool and the extent inserted into the inode in-core extent tree. 3913 * 3914 * On entry, got refers to the first extent beyond the offset of the extent to 3915 * allocate or eof is specified if no such extent exists. On return, got refers 3916 * to the extent record that was inserted to the inode fork. 3917 * 3918 * Note that the allocated extent may have been merged with contiguous extents 3919 * during insertion into the inode fork. Thus, got does not reflect the current 3920 * state of the inode fork on return. If necessary, the caller can use lastx to 3921 * look up the updated record in the inode fork. 3922 */ 3923 int 3924 xfs_bmapi_reserve_delalloc( 3925 struct xfs_inode *ip, 3926 int whichfork, 3927 xfs_fileoff_t off, 3928 xfs_filblks_t len, 3929 xfs_filblks_t prealloc, 3930 struct xfs_bmbt_irec *got, 3931 struct xfs_iext_cursor *icur, 3932 int eof) 3933 { 3934 struct xfs_mount *mp = ip->i_mount; 3935 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3936 xfs_extlen_t alen; 3937 xfs_extlen_t indlen; 3938 int error; 3939 xfs_fileoff_t aoff = off; 3940 3941 /* 3942 * Cap the alloc length. Keep track of prealloc so we know whether to 3943 * tag the inode before we return. 3944 */ 3945 alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); 3946 if (!eof) 3947 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); 3948 if (prealloc && alen >= len) 3949 prealloc = alen - len; 3950 3951 /* Figure out the extent size, adjust alen */ 3952 if (whichfork == XFS_COW_FORK) { 3953 struct xfs_bmbt_irec prev; 3954 xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); 3955 3956 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) 3957 prev.br_startoff = NULLFILEOFF; 3958 3959 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 3960 1, 0, &aoff, &alen); 3961 ASSERT(!error); 3962 } 3963 3964 /* 3965 * Make a transaction-less quota reservation for delayed allocation 3966 * blocks. This number gets adjusted later. We return if we haven't 3967 * allocated blocks already inside this loop. 3968 */ 3969 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, 3970 XFS_QMOPT_RES_REGBLKS); 3971 if (error) 3972 return error; 3973 3974 /* 3975 * Split changing sb for alen and indlen since they could be coming 3976 * from different places. 3977 */ 3978 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); 3979 ASSERT(indlen > 0); 3980 3981 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); 3982 if (error) 3983 goto out_unreserve_quota; 3984 3985 error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false); 3986 if (error) 3987 goto out_unreserve_blocks; 3988 3989 3990 ip->i_delayed_blks += alen; 3991 3992 got->br_startoff = aoff; 3993 got->br_startblock = nullstartblock(indlen); 3994 got->br_blockcount = alen; 3995 got->br_state = XFS_EXT_NORM; 3996 3997 xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got); 3998 3999 /* 4000 * Tag the inode if blocks were preallocated. Note that COW fork 4001 * preallocation can occur at the start or end of the extent, even when 4002 * prealloc == 0, so we must also check the aligned offset and length. 4003 */ 4004 if (whichfork == XFS_DATA_FORK && prealloc) 4005 xfs_inode_set_eofblocks_tag(ip); 4006 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) 4007 xfs_inode_set_cowblocks_tag(ip); 4008 4009 return 0; 4010 4011 out_unreserve_blocks: 4012 xfs_mod_fdblocks(mp, alen, false); 4013 out_unreserve_quota: 4014 if (XFS_IS_QUOTA_ON(mp)) 4015 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, 4016 XFS_QMOPT_RES_REGBLKS); 4017 return error; 4018 } 4019 4020 static int 4021 xfs_bmapi_allocate( 4022 struct xfs_bmalloca *bma) 4023 { 4024 struct xfs_mount *mp = bma->ip->i_mount; 4025 int whichfork = xfs_bmapi_whichfork(bma->flags); 4026 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4027 int tmp_logflags = 0; 4028 int error; 4029 4030 ASSERT(bma->length > 0); 4031 4032 /* 4033 * For the wasdelay case, we could also just allocate the stuff asked 4034 * for in this bmap call but that wouldn't be as good. 4035 */ 4036 if (bma->wasdel) { 4037 bma->length = (xfs_extlen_t)bma->got.br_blockcount; 4038 bma->offset = bma->got.br_startoff; 4039 xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev); 4040 } else { 4041 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); 4042 if (!bma->eof) 4043 bma->length = XFS_FILBLKS_MIN(bma->length, 4044 bma->got.br_startoff - bma->offset); 4045 } 4046 4047 /* 4048 * Set the data type being allocated. For the data fork, the first data 4049 * in the file is treated differently to all other allocations. For the 4050 * attribute fork, we only need to ensure the allocated range is not on 4051 * the busy list. 4052 */ 4053 if (!(bma->flags & XFS_BMAPI_METADATA)) { 4054 bma->datatype = XFS_ALLOC_NOBUSY; 4055 if (whichfork == XFS_DATA_FORK) { 4056 if (bma->offset == 0) 4057 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; 4058 else 4059 bma->datatype |= XFS_ALLOC_USERDATA; 4060 } 4061 if (bma->flags & XFS_BMAPI_ZERO) 4062 bma->datatype |= XFS_ALLOC_USERDATA_ZERO; 4063 } 4064 4065 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4066 4067 /* 4068 * Only want to do the alignment at the eof if it is userdata and 4069 * allocation length is larger than a stripe unit. 4070 */ 4071 if (mp->m_dalign && bma->length >= mp->m_dalign && 4072 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { 4073 error = xfs_bmap_isaeof(bma, whichfork); 4074 if (error) 4075 return error; 4076 } 4077 4078 error = xfs_bmap_alloc(bma); 4079 if (error) 4080 return error; 4081 4082 if (bma->cur) 4083 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4084 if (bma->blkno == NULLFSBLOCK) 4085 return 0; 4086 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { 4087 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); 4088 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4089 bma->cur->bc_private.b.dfops = bma->dfops; 4090 } 4091 /* 4092 * Bump the number of extents we've allocated 4093 * in this call. 4094 */ 4095 bma->nallocs++; 4096 4097 if (bma->cur) 4098 bma->cur->bc_private.b.flags = 4099 bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 4100 4101 bma->got.br_startoff = bma->offset; 4102 bma->got.br_startblock = bma->blkno; 4103 bma->got.br_blockcount = bma->length; 4104 bma->got.br_state = XFS_EXT_NORM; 4105 4106 /* 4107 * In the data fork, a wasdelay extent has been initialized, so 4108 * shouldn't be flagged as unwritten. 4109 * 4110 * For the cow fork, however, we convert delalloc reservations 4111 * (extents allocated for speculative preallocation) to 4112 * allocated unwritten extents, and only convert the unwritten 4113 * extents to real extents when we're about to write the data. 4114 */ 4115 if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && 4116 (bma->flags & XFS_BMAPI_PREALLOC) && 4117 xfs_sb_version_hasextflgbit(&mp->m_sb)) 4118 bma->got.br_state = XFS_EXT_UNWRITTEN; 4119 4120 if (bma->wasdel) 4121 error = xfs_bmap_add_extent_delay_real(bma, whichfork); 4122 else 4123 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, 4124 whichfork, &bma->icur, &bma->cur, &bma->got, 4125 bma->firstblock, bma->dfops, &bma->logflags, 4126 bma->flags); 4127 4128 bma->logflags |= tmp_logflags; 4129 if (error) 4130 return error; 4131 4132 /* 4133 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real 4134 * or xfs_bmap_add_extent_hole_real might have merged it into one of 4135 * the neighbouring ones. 4136 */ 4137 xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4138 4139 ASSERT(bma->got.br_startoff <= bma->offset); 4140 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= 4141 bma->offset + bma->length); 4142 ASSERT(bma->got.br_state == XFS_EXT_NORM || 4143 bma->got.br_state == XFS_EXT_UNWRITTEN); 4144 return 0; 4145 } 4146 4147 STATIC int 4148 xfs_bmapi_convert_unwritten( 4149 struct xfs_bmalloca *bma, 4150 struct xfs_bmbt_irec *mval, 4151 xfs_filblks_t len, 4152 int flags) 4153 { 4154 int whichfork = xfs_bmapi_whichfork(flags); 4155 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4156 int tmp_logflags = 0; 4157 int error; 4158 4159 /* check if we need to do unwritten->real conversion */ 4160 if (mval->br_state == XFS_EXT_UNWRITTEN && 4161 (flags & XFS_BMAPI_PREALLOC)) 4162 return 0; 4163 4164 /* check if we need to do real->unwritten conversion */ 4165 if (mval->br_state == XFS_EXT_NORM && 4166 (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != 4167 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) 4168 return 0; 4169 4170 /* 4171 * Modify (by adding) the state flag, if writing. 4172 */ 4173 ASSERT(mval->br_blockcount <= len); 4174 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { 4175 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, 4176 bma->ip, whichfork); 4177 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4178 bma->cur->bc_private.b.dfops = bma->dfops; 4179 } 4180 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4181 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 4182 4183 /* 4184 * Before insertion into the bmbt, zero the range being converted 4185 * if required. 4186 */ 4187 if (flags & XFS_BMAPI_ZERO) { 4188 error = xfs_zero_extent(bma->ip, mval->br_startblock, 4189 mval->br_blockcount); 4190 if (error) 4191 return error; 4192 } 4193 4194 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, 4195 &bma->icur, &bma->cur, mval, bma->firstblock, 4196 bma->dfops, &tmp_logflags); 4197 /* 4198 * Log the inode core unconditionally in the unwritten extent conversion 4199 * path because the conversion might not have done so (e.g., if the 4200 * extent count hasn't changed). We need to make sure the inode is dirty 4201 * in the transaction for the sake of fsync(), even if nothing has 4202 * changed, because fsync() will not force the log for this transaction 4203 * unless it sees the inode pinned. 4204 * 4205 * Note: If we're only converting cow fork extents, there aren't 4206 * any on-disk updates to make, so we don't need to log anything. 4207 */ 4208 if (whichfork != XFS_COW_FORK) 4209 bma->logflags |= tmp_logflags | XFS_ILOG_CORE; 4210 if (error) 4211 return error; 4212 4213 /* 4214 * Update our extent pointer, given that 4215 * xfs_bmap_add_extent_unwritten_real might have merged it into one 4216 * of the neighbouring ones. 4217 */ 4218 xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4219 4220 /* 4221 * We may have combined previously unwritten space with written space, 4222 * so generate another request. 4223 */ 4224 if (mval->br_blockcount < len) 4225 return -EAGAIN; 4226 return 0; 4227 } 4228 4229 /* 4230 * Map file blocks to filesystem blocks, and allocate blocks or convert the 4231 * extent state if necessary. Details behaviour is controlled by the flags 4232 * parameter. Only allocates blocks from a single allocation group, to avoid 4233 * locking problems. 4234 * 4235 * The returned value in "firstblock" from the first call in a transaction 4236 * must be remembered and presented to subsequent calls in "firstblock". 4237 * An upper bound for the number of blocks to be allocated is supplied to 4238 * the first call in "total"; if no allocation group has that many free 4239 * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). 4240 */ 4241 int 4242 xfs_bmapi_write( 4243 struct xfs_trans *tp, /* transaction pointer */ 4244 struct xfs_inode *ip, /* incore inode */ 4245 xfs_fileoff_t bno, /* starting file offs. mapped */ 4246 xfs_filblks_t len, /* length to map in file */ 4247 int flags, /* XFS_BMAPI_... */ 4248 xfs_fsblock_t *firstblock, /* first allocated block 4249 controls a.g. for allocs */ 4250 xfs_extlen_t total, /* total blocks needed */ 4251 struct xfs_bmbt_irec *mval, /* output: map values */ 4252 int *nmap, /* i/o: mval size/count */ 4253 struct xfs_defer_ops *dfops) /* i/o: list extents to free */ 4254 { 4255 struct xfs_mount *mp = ip->i_mount; 4256 struct xfs_ifork *ifp; 4257 struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ 4258 xfs_fileoff_t end; /* end of mapped file region */ 4259 bool eof = false; /* after the end of extents */ 4260 int error; /* error return */ 4261 int n; /* current extent index */ 4262 xfs_fileoff_t obno; /* old block number (offset) */ 4263 int whichfork; /* data or attr fork */ 4264 4265 #ifdef DEBUG 4266 xfs_fileoff_t orig_bno; /* original block number value */ 4267 int orig_flags; /* original flags arg value */ 4268 xfs_filblks_t orig_len; /* original value of len arg */ 4269 struct xfs_bmbt_irec *orig_mval; /* original value of mval */ 4270 int orig_nmap; /* original value of *nmap */ 4271 4272 orig_bno = bno; 4273 orig_len = len; 4274 orig_flags = flags; 4275 orig_mval = mval; 4276 orig_nmap = *nmap; 4277 #endif 4278 whichfork = xfs_bmapi_whichfork(flags); 4279 4280 ASSERT(*nmap >= 1); 4281 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); 4282 ASSERT(!(flags & XFS_BMAPI_IGSTATE)); 4283 ASSERT(tp != NULL || 4284 (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == 4285 (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); 4286 ASSERT(len > 0); 4287 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); 4288 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4289 ASSERT(!(flags & XFS_BMAPI_REMAP)); 4290 4291 /* zeroing is for currently only for data extents, not metadata */ 4292 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != 4293 (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)); 4294 /* 4295 * we can allocate unwritten extents or pre-zero allocated blocks, 4296 * but it makes no sense to do both at once. This would result in 4297 * zeroing the unwritten extent twice, but it still being an 4298 * unwritten extent.... 4299 */ 4300 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != 4301 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); 4302 4303 if (unlikely(XFS_TEST_ERROR( 4304 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 4305 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 4306 mp, XFS_ERRTAG_BMAPIFORMAT))) { 4307 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); 4308 return -EFSCORRUPTED; 4309 } 4310 4311 if (XFS_FORCED_SHUTDOWN(mp)) 4312 return -EIO; 4313 4314 ifp = XFS_IFORK_PTR(ip, whichfork); 4315 4316 XFS_STATS_INC(mp, xs_blk_mapw); 4317 4318 if (*firstblock == NULLFSBLOCK) { 4319 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) 4320 bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; 4321 else 4322 bma.minleft = 1; 4323 } else { 4324 bma.minleft = 0; 4325 } 4326 4327 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 4328 error = xfs_iread_extents(tp, ip, whichfork); 4329 if (error) 4330 goto error0; 4331 } 4332 4333 n = 0; 4334 end = bno + len; 4335 obno = bno; 4336 4337 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got)) 4338 eof = true; 4339 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) 4340 bma.prev.br_startoff = NULLFILEOFF; 4341 bma.tp = tp; 4342 bma.ip = ip; 4343 bma.total = total; 4344 bma.datatype = 0; 4345 bma.dfops = dfops; 4346 bma.firstblock = firstblock; 4347 4348 while (bno < end && n < *nmap) { 4349 bool need_alloc = false, wasdelay = false; 4350 4351 /* in hole or beyond EOF? */ 4352 if (eof || bma.got.br_startoff > bno) { 4353 /* 4354 * CoW fork conversions should /never/ hit EOF or 4355 * holes. There should always be something for us 4356 * to work on. 4357 */ 4358 ASSERT(!((flags & XFS_BMAPI_CONVERT) && 4359 (flags & XFS_BMAPI_COWFORK))); 4360 4361 if (flags & XFS_BMAPI_DELALLOC) { 4362 /* 4363 * For the COW fork we can reasonably get a 4364 * request for converting an extent that races 4365 * with other threads already having converted 4366 * part of it, as there converting COW to 4367 * regular blocks is not protected using the 4368 * IOLOCK. 4369 */ 4370 ASSERT(flags & XFS_BMAPI_COWFORK); 4371 if (!(flags & XFS_BMAPI_COWFORK)) { 4372 error = -EIO; 4373 goto error0; 4374 } 4375 4376 if (eof || bno >= end) 4377 break; 4378 } else { 4379 need_alloc = true; 4380 } 4381 } else if (isnullstartblock(bma.got.br_startblock)) { 4382 wasdelay = true; 4383 } 4384 4385 /* 4386 * First, deal with the hole before the allocated space 4387 * that we found, if any. 4388 */ 4389 if ((need_alloc || wasdelay) && 4390 !(flags & XFS_BMAPI_CONVERT_ONLY)) { 4391 bma.eof = eof; 4392 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4393 bma.wasdel = wasdelay; 4394 bma.offset = bno; 4395 bma.flags = flags; 4396 4397 /* 4398 * There's a 32/64 bit type mismatch between the 4399 * allocation length request (which can be 64 bits in 4400 * length) and the bma length request, which is 4401 * xfs_extlen_t and therefore 32 bits. Hence we have to 4402 * check for 32-bit overflows and handle them here. 4403 */ 4404 if (len > (xfs_filblks_t)MAXEXTLEN) 4405 bma.length = MAXEXTLEN; 4406 else 4407 bma.length = len; 4408 4409 ASSERT(len > 0); 4410 ASSERT(bma.length > 0); 4411 error = xfs_bmapi_allocate(&bma); 4412 if (error) 4413 goto error0; 4414 if (bma.blkno == NULLFSBLOCK) 4415 break; 4416 4417 /* 4418 * If this is a CoW allocation, record the data in 4419 * the refcount btree for orphan recovery. 4420 */ 4421 if (whichfork == XFS_COW_FORK) { 4422 error = xfs_refcount_alloc_cow_extent(mp, dfops, 4423 bma.blkno, bma.length); 4424 if (error) 4425 goto error0; 4426 } 4427 } 4428 4429 /* Deal with the allocated space we found. */ 4430 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, 4431 end, n, flags); 4432 4433 /* Execute unwritten extent conversion if necessary */ 4434 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); 4435 if (error == -EAGAIN) 4436 continue; 4437 if (error) 4438 goto error0; 4439 4440 /* update the extent map to return */ 4441 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); 4442 4443 /* 4444 * If we're done, stop now. Stop when we've allocated 4445 * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise 4446 * the transaction may get too big. 4447 */ 4448 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) 4449 break; 4450 4451 /* Else go on to the next record. */ 4452 bma.prev = bma.got; 4453 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) 4454 eof = true; 4455 } 4456 *nmap = n; 4457 4458 /* 4459 * Transform from btree to extents, give it cur. 4460 */ 4461 if (xfs_bmap_wants_extents(ip, whichfork)) { 4462 int tmp_logflags = 0; 4463 4464 ASSERT(bma.cur); 4465 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, 4466 &tmp_logflags, whichfork); 4467 bma.logflags |= tmp_logflags; 4468 if (error) 4469 goto error0; 4470 } 4471 4472 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || 4473 XFS_IFORK_NEXTENTS(ip, whichfork) > 4474 XFS_IFORK_MAXEXT(ip, whichfork)); 4475 error = 0; 4476 error0: 4477 /* 4478 * Log everything. Do this after conversion, there's no point in 4479 * logging the extent records if we've converted to btree format. 4480 */ 4481 if ((bma.logflags & xfs_ilog_fext(whichfork)) && 4482 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 4483 bma.logflags &= ~xfs_ilog_fext(whichfork); 4484 else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) && 4485 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) 4486 bma.logflags &= ~xfs_ilog_fbroot(whichfork); 4487 /* 4488 * Log whatever the flags say, even if error. Otherwise we might miss 4489 * detecting a case where the data is changed, there's an error, 4490 * and it's not logged so we don't shutdown when we should. 4491 */ 4492 if (bma.logflags) 4493 xfs_trans_log_inode(tp, ip, bma.logflags); 4494 4495 if (bma.cur) { 4496 if (!error) { 4497 ASSERT(*firstblock == NULLFSBLOCK || 4498 XFS_FSB_TO_AGNO(mp, *firstblock) <= 4499 XFS_FSB_TO_AGNO(mp, 4500 bma.cur->bc_private.b.firstblock)); 4501 *firstblock = bma.cur->bc_private.b.firstblock; 4502 } 4503 xfs_btree_del_cursor(bma.cur, 4504 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 4505 } 4506 if (!error) 4507 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, 4508 orig_nmap, *nmap); 4509 return error; 4510 } 4511 4512 int 4513 xfs_bmapi_remap( 4514 struct xfs_trans *tp, 4515 struct xfs_inode *ip, 4516 xfs_fileoff_t bno, 4517 xfs_filblks_t len, 4518 xfs_fsblock_t startblock, 4519 struct xfs_defer_ops *dfops, 4520 int flags) 4521 { 4522 struct xfs_mount *mp = ip->i_mount; 4523 struct xfs_ifork *ifp; 4524 struct xfs_btree_cur *cur = NULL; 4525 xfs_fsblock_t firstblock = NULLFSBLOCK; 4526 struct xfs_bmbt_irec got; 4527 struct xfs_iext_cursor icur; 4528 int whichfork = xfs_bmapi_whichfork(flags); 4529 int logflags = 0, error; 4530 4531 ifp = XFS_IFORK_PTR(ip, whichfork); 4532 ASSERT(len > 0); 4533 ASSERT(len <= (xfs_filblks_t)MAXEXTLEN); 4534 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4535 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC | 4536 XFS_BMAPI_NORMAP))); 4537 ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) != 4538 (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)); 4539 4540 if (unlikely(XFS_TEST_ERROR( 4541 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 4542 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 4543 mp, XFS_ERRTAG_BMAPIFORMAT))) { 4544 XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp); 4545 return -EFSCORRUPTED; 4546 } 4547 4548 if (XFS_FORCED_SHUTDOWN(mp)) 4549 return -EIO; 4550 4551 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 4552 error = xfs_iread_extents(tp, ip, whichfork); 4553 if (error) 4554 return error; 4555 } 4556 4557 if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 4558 /* make sure we only reflink into a hole. */ 4559 ASSERT(got.br_startoff > bno); 4560 ASSERT(got.br_startoff - bno >= len); 4561 } 4562 4563 ip->i_d.di_nblocks += len; 4564 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4565 4566 if (ifp->if_flags & XFS_IFBROOT) { 4567 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 4568 cur->bc_private.b.firstblock = firstblock; 4569 cur->bc_private.b.dfops = dfops; 4570 cur->bc_private.b.flags = 0; 4571 } 4572 4573 got.br_startoff = bno; 4574 got.br_startblock = startblock; 4575 got.br_blockcount = len; 4576 if (flags & XFS_BMAPI_PREALLOC) 4577 got.br_state = XFS_EXT_UNWRITTEN; 4578 else 4579 got.br_state = XFS_EXT_NORM; 4580 4581 error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur, 4582 &cur, &got, &firstblock, dfops, &logflags, flags); 4583 if (error) 4584 goto error0; 4585 4586 if (xfs_bmap_wants_extents(ip, whichfork)) { 4587 int tmp_logflags = 0; 4588 4589 error = xfs_bmap_btree_to_extents(tp, ip, cur, 4590 &tmp_logflags, whichfork); 4591 logflags |= tmp_logflags; 4592 } 4593 4594 error0: 4595 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) 4596 logflags &= ~XFS_ILOG_DEXT; 4597 else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) 4598 logflags &= ~XFS_ILOG_DBROOT; 4599 4600 if (logflags) 4601 xfs_trans_log_inode(tp, ip, logflags); 4602 if (cur) { 4603 xfs_btree_del_cursor(cur, 4604 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 4605 } 4606 return error; 4607 } 4608 4609 /* 4610 * When a delalloc extent is split (e.g., due to a hole punch), the original 4611 * indlen reservation must be shared across the two new extents that are left 4612 * behind. 4613 * 4614 * Given the original reservation and the worst case indlen for the two new 4615 * extents (as calculated by xfs_bmap_worst_indlen()), split the original 4616 * reservation fairly across the two new extents. If necessary, steal available 4617 * blocks from a deleted extent to make up a reservation deficiency (e.g., if 4618 * ores == 1). The number of stolen blocks is returned. The availability and 4619 * subsequent accounting of stolen blocks is the responsibility of the caller. 4620 */ 4621 static xfs_filblks_t 4622 xfs_bmap_split_indlen( 4623 xfs_filblks_t ores, /* original res. */ 4624 xfs_filblks_t *indlen1, /* ext1 worst indlen */ 4625 xfs_filblks_t *indlen2, /* ext2 worst indlen */ 4626 xfs_filblks_t avail) /* stealable blocks */ 4627 { 4628 xfs_filblks_t len1 = *indlen1; 4629 xfs_filblks_t len2 = *indlen2; 4630 xfs_filblks_t nres = len1 + len2; /* new total res. */ 4631 xfs_filblks_t stolen = 0; 4632 xfs_filblks_t resfactor; 4633 4634 /* 4635 * Steal as many blocks as we can to try and satisfy the worst case 4636 * indlen for both new extents. 4637 */ 4638 if (ores < nres && avail) 4639 stolen = XFS_FILBLKS_MIN(nres - ores, avail); 4640 ores += stolen; 4641 4642 /* nothing else to do if we've satisfied the new reservation */ 4643 if (ores >= nres) 4644 return stolen; 4645 4646 /* 4647 * We can't meet the total required reservation for the two extents. 4648 * Calculate the percent of the overall shortage between both extents 4649 * and apply this percentage to each of the requested indlen values. 4650 * This distributes the shortage fairly and reduces the chances that one 4651 * of the two extents is left with nothing when extents are repeatedly 4652 * split. 4653 */ 4654 resfactor = (ores * 100); 4655 do_div(resfactor, nres); 4656 len1 *= resfactor; 4657 do_div(len1, 100); 4658 len2 *= resfactor; 4659 do_div(len2, 100); 4660 ASSERT(len1 + len2 <= ores); 4661 ASSERT(len1 < *indlen1 && len2 < *indlen2); 4662 4663 /* 4664 * Hand out the remainder to each extent. If one of the two reservations 4665 * is zero, we want to make sure that one gets a block first. The loop 4666 * below starts with len1, so hand len2 a block right off the bat if it 4667 * is zero. 4668 */ 4669 ores -= (len1 + len2); 4670 ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); 4671 if (ores && !len2 && *indlen2) { 4672 len2++; 4673 ores--; 4674 } 4675 while (ores) { 4676 if (len1 < *indlen1) { 4677 len1++; 4678 ores--; 4679 } 4680 if (!ores) 4681 break; 4682 if (len2 < *indlen2) { 4683 len2++; 4684 ores--; 4685 } 4686 } 4687 4688 *indlen1 = len1; 4689 *indlen2 = len2; 4690 4691 return stolen; 4692 } 4693 4694 int 4695 xfs_bmap_del_extent_delay( 4696 struct xfs_inode *ip, 4697 int whichfork, 4698 struct xfs_iext_cursor *icur, 4699 struct xfs_bmbt_irec *got, 4700 struct xfs_bmbt_irec *del) 4701 { 4702 struct xfs_mount *mp = ip->i_mount; 4703 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 4704 struct xfs_bmbt_irec new; 4705 int64_t da_old, da_new, da_diff = 0; 4706 xfs_fileoff_t del_endoff, got_endoff; 4707 xfs_filblks_t got_indlen, new_indlen, stolen; 4708 int state = xfs_bmap_fork_to_state(whichfork); 4709 int error = 0; 4710 bool isrt; 4711 4712 XFS_STATS_INC(mp, xs_del_exlist); 4713 4714 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 4715 del_endoff = del->br_startoff + del->br_blockcount; 4716 got_endoff = got->br_startoff + got->br_blockcount; 4717 da_old = startblockval(got->br_startblock); 4718 da_new = 0; 4719 4720 ASSERT(del->br_blockcount > 0); 4721 ASSERT(got->br_startoff <= del->br_startoff); 4722 ASSERT(got_endoff >= del_endoff); 4723 4724 if (isrt) { 4725 uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); 4726 4727 do_div(rtexts, mp->m_sb.sb_rextsize); 4728 xfs_mod_frextents(mp, rtexts); 4729 } 4730 4731 /* 4732 * Update the inode delalloc counter now and wait to update the 4733 * sb counters as we might have to borrow some blocks for the 4734 * indirect block accounting. 4735 */ 4736 error = xfs_trans_reserve_quota_nblks(NULL, ip, 4737 -((long)del->br_blockcount), 0, 4738 isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 4739 if (error) 4740 return error; 4741 ip->i_delayed_blks -= del->br_blockcount; 4742 4743 if (got->br_startoff == del->br_startoff) 4744 state |= BMAP_LEFT_FILLING; 4745 if (got_endoff == del_endoff) 4746 state |= BMAP_RIGHT_FILLING; 4747 4748 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 4749 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4750 /* 4751 * Matches the whole extent. Delete the entry. 4752 */ 4753 xfs_iext_remove(ip, icur, state); 4754 xfs_iext_prev(ifp, icur); 4755 break; 4756 case BMAP_LEFT_FILLING: 4757 /* 4758 * Deleting the first part of the extent. 4759 */ 4760 got->br_startoff = del_endoff; 4761 got->br_blockcount -= del->br_blockcount; 4762 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 4763 got->br_blockcount), da_old); 4764 got->br_startblock = nullstartblock((int)da_new); 4765 xfs_iext_update_extent(ip, state, icur, got); 4766 break; 4767 case BMAP_RIGHT_FILLING: 4768 /* 4769 * Deleting the last part of the extent. 4770 */ 4771 got->br_blockcount = got->br_blockcount - del->br_blockcount; 4772 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 4773 got->br_blockcount), da_old); 4774 got->br_startblock = nullstartblock((int)da_new); 4775 xfs_iext_update_extent(ip, state, icur, got); 4776 break; 4777 case 0: 4778 /* 4779 * Deleting the middle of the extent. 4780 * 4781 * Distribute the original indlen reservation across the two new 4782 * extents. Steal blocks from the deleted extent if necessary. 4783 * Stealing blocks simply fudges the fdblocks accounting below. 4784 * Warn if either of the new indlen reservations is zero as this 4785 * can lead to delalloc problems. 4786 */ 4787 got->br_blockcount = del->br_startoff - got->br_startoff; 4788 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); 4789 4790 new.br_blockcount = got_endoff - del_endoff; 4791 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); 4792 4793 WARN_ON_ONCE(!got_indlen || !new_indlen); 4794 stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen, 4795 del->br_blockcount); 4796 4797 got->br_startblock = nullstartblock((int)got_indlen); 4798 4799 new.br_startoff = del_endoff; 4800 new.br_state = got->br_state; 4801 new.br_startblock = nullstartblock((int)new_indlen); 4802 4803 xfs_iext_update_extent(ip, state, icur, got); 4804 xfs_iext_next(ifp, icur); 4805 xfs_iext_insert(ip, icur, &new, state); 4806 4807 da_new = got_indlen + new_indlen - stolen; 4808 del->br_blockcount -= stolen; 4809 break; 4810 } 4811 4812 ASSERT(da_old >= da_new); 4813 da_diff = da_old - da_new; 4814 if (!isrt) 4815 da_diff += del->br_blockcount; 4816 if (da_diff) 4817 xfs_mod_fdblocks(mp, da_diff, false); 4818 return error; 4819 } 4820 4821 void 4822 xfs_bmap_del_extent_cow( 4823 struct xfs_inode *ip, 4824 struct xfs_iext_cursor *icur, 4825 struct xfs_bmbt_irec *got, 4826 struct xfs_bmbt_irec *del) 4827 { 4828 struct xfs_mount *mp = ip->i_mount; 4829 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 4830 struct xfs_bmbt_irec new; 4831 xfs_fileoff_t del_endoff, got_endoff; 4832 int state = BMAP_COWFORK; 4833 4834 XFS_STATS_INC(mp, xs_del_exlist); 4835 4836 del_endoff = del->br_startoff + del->br_blockcount; 4837 got_endoff = got->br_startoff + got->br_blockcount; 4838 4839 ASSERT(del->br_blockcount > 0); 4840 ASSERT(got->br_startoff <= del->br_startoff); 4841 ASSERT(got_endoff >= del_endoff); 4842 ASSERT(!isnullstartblock(got->br_startblock)); 4843 4844 if (got->br_startoff == del->br_startoff) 4845 state |= BMAP_LEFT_FILLING; 4846 if (got_endoff == del_endoff) 4847 state |= BMAP_RIGHT_FILLING; 4848 4849 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 4850 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4851 /* 4852 * Matches the whole extent. Delete the entry. 4853 */ 4854 xfs_iext_remove(ip, icur, state); 4855 xfs_iext_prev(ifp, icur); 4856 break; 4857 case BMAP_LEFT_FILLING: 4858 /* 4859 * Deleting the first part of the extent. 4860 */ 4861 got->br_startoff = del_endoff; 4862 got->br_blockcount -= del->br_blockcount; 4863 got->br_startblock = del->br_startblock + del->br_blockcount; 4864 xfs_iext_update_extent(ip, state, icur, got); 4865 break; 4866 case BMAP_RIGHT_FILLING: 4867 /* 4868 * Deleting the last part of the extent. 4869 */ 4870 got->br_blockcount -= del->br_blockcount; 4871 xfs_iext_update_extent(ip, state, icur, got); 4872 break; 4873 case 0: 4874 /* 4875 * Deleting the middle of the extent. 4876 */ 4877 got->br_blockcount = del->br_startoff - got->br_startoff; 4878 4879 new.br_startoff = del_endoff; 4880 new.br_blockcount = got_endoff - del_endoff; 4881 new.br_state = got->br_state; 4882 new.br_startblock = del->br_startblock + del->br_blockcount; 4883 4884 xfs_iext_update_extent(ip, state, icur, got); 4885 xfs_iext_next(ifp, icur); 4886 xfs_iext_insert(ip, icur, &new, state); 4887 break; 4888 } 4889 ip->i_delayed_blks -= del->br_blockcount; 4890 } 4891 4892 /* 4893 * Called by xfs_bmapi to update file extent records and the btree 4894 * after removing space. 4895 */ 4896 STATIC int /* error */ 4897 xfs_bmap_del_extent_real( 4898 xfs_inode_t *ip, /* incore inode pointer */ 4899 xfs_trans_t *tp, /* current transaction pointer */ 4900 struct xfs_iext_cursor *icur, 4901 struct xfs_defer_ops *dfops, /* list of extents to be freed */ 4902 xfs_btree_cur_t *cur, /* if null, not a btree */ 4903 xfs_bmbt_irec_t *del, /* data to remove from extents */ 4904 int *logflagsp, /* inode logging flags */ 4905 int whichfork, /* data or attr fork */ 4906 int bflags) /* bmapi flags */ 4907 { 4908 xfs_fsblock_t del_endblock=0; /* first block past del */ 4909 xfs_fileoff_t del_endoff; /* first offset past del */ 4910 int do_fx; /* free extent at end of routine */ 4911 int error; /* error return value */ 4912 int flags = 0;/* inode logging flags */ 4913 struct xfs_bmbt_irec got; /* current extent entry */ 4914 xfs_fileoff_t got_endoff; /* first offset past got */ 4915 int i; /* temp state */ 4916 xfs_ifork_t *ifp; /* inode fork pointer */ 4917 xfs_mount_t *mp; /* mount structure */ 4918 xfs_filblks_t nblks; /* quota/sb block count */ 4919 xfs_bmbt_irec_t new; /* new record to be inserted */ 4920 /* REFERENCED */ 4921 uint qfield; /* quota field to update */ 4922 int state = xfs_bmap_fork_to_state(whichfork); 4923 struct xfs_bmbt_irec old; 4924 4925 mp = ip->i_mount; 4926 XFS_STATS_INC(mp, xs_del_exlist); 4927 4928 ifp = XFS_IFORK_PTR(ip, whichfork); 4929 ASSERT(del->br_blockcount > 0); 4930 xfs_iext_get_extent(ifp, icur, &got); 4931 ASSERT(got.br_startoff <= del->br_startoff); 4932 del_endoff = del->br_startoff + del->br_blockcount; 4933 got_endoff = got.br_startoff + got.br_blockcount; 4934 ASSERT(got_endoff >= del_endoff); 4935 ASSERT(!isnullstartblock(got.br_startblock)); 4936 qfield = 0; 4937 error = 0; 4938 4939 /* 4940 * If it's the case where the directory code is running with no block 4941 * reservation, and the deleted block is in the middle of its extent, 4942 * and the resulting insert of an extent would cause transformation to 4943 * btree format, then reject it. The calling code will then swap blocks 4944 * around instead. We have to do this now, rather than waiting for the 4945 * conversion to btree format, since the transaction will be dirty then. 4946 */ 4947 if (tp->t_blk_res == 0 && 4948 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 4949 XFS_IFORK_NEXTENTS(ip, whichfork) >= 4950 XFS_IFORK_MAXEXT(ip, whichfork) && 4951 del->br_startoff > got.br_startoff && del_endoff < got_endoff) 4952 return -ENOSPC; 4953 4954 flags = XFS_ILOG_CORE; 4955 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { 4956 xfs_fsblock_t bno; 4957 xfs_filblks_t len; 4958 xfs_extlen_t mod; 4959 4960 bno = div_u64_rem(del->br_startblock, mp->m_sb.sb_rextsize, 4961 &mod); 4962 ASSERT(mod == 0); 4963 len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize, 4964 &mod); 4965 ASSERT(mod == 0); 4966 4967 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); 4968 if (error) 4969 goto done; 4970 do_fx = 0; 4971 nblks = len * mp->m_sb.sb_rextsize; 4972 qfield = XFS_TRANS_DQ_RTBCOUNT; 4973 } else { 4974 do_fx = 1; 4975 nblks = del->br_blockcount; 4976 qfield = XFS_TRANS_DQ_BCOUNT; 4977 } 4978 4979 del_endblock = del->br_startblock + del->br_blockcount; 4980 if (cur) { 4981 error = xfs_bmbt_lookup_eq(cur, &got, &i); 4982 if (error) 4983 goto done; 4984 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 4985 } 4986 4987 if (got.br_startoff == del->br_startoff) 4988 state |= BMAP_LEFT_FILLING; 4989 if (got_endoff == del_endoff) 4990 state |= BMAP_RIGHT_FILLING; 4991 4992 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 4993 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4994 /* 4995 * Matches the whole extent. Delete the entry. 4996 */ 4997 xfs_iext_remove(ip, icur, state); 4998 xfs_iext_prev(ifp, icur); 4999 XFS_IFORK_NEXT_SET(ip, whichfork, 5000 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5001 flags |= XFS_ILOG_CORE; 5002 if (!cur) { 5003 flags |= xfs_ilog_fext(whichfork); 5004 break; 5005 } 5006 if ((error = xfs_btree_delete(cur, &i))) 5007 goto done; 5008 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5009 break; 5010 case BMAP_LEFT_FILLING: 5011 /* 5012 * Deleting the first part of the extent. 5013 */ 5014 got.br_startoff = del_endoff; 5015 got.br_startblock = del_endblock; 5016 got.br_blockcount -= del->br_blockcount; 5017 xfs_iext_update_extent(ip, state, icur, &got); 5018 if (!cur) { 5019 flags |= xfs_ilog_fext(whichfork); 5020 break; 5021 } 5022 error = xfs_bmbt_update(cur, &got); 5023 if (error) 5024 goto done; 5025 break; 5026 case BMAP_RIGHT_FILLING: 5027 /* 5028 * Deleting the last part of the extent. 5029 */ 5030 got.br_blockcount -= del->br_blockcount; 5031 xfs_iext_update_extent(ip, state, icur, &got); 5032 if (!cur) { 5033 flags |= xfs_ilog_fext(whichfork); 5034 break; 5035 } 5036 error = xfs_bmbt_update(cur, &got); 5037 if (error) 5038 goto done; 5039 break; 5040 case 0: 5041 /* 5042 * Deleting the middle of the extent. 5043 */ 5044 old = got; 5045 5046 got.br_blockcount = del->br_startoff - got.br_startoff; 5047 xfs_iext_update_extent(ip, state, icur, &got); 5048 5049 new.br_startoff = del_endoff; 5050 new.br_blockcount = got_endoff - del_endoff; 5051 new.br_state = got.br_state; 5052 new.br_startblock = del_endblock; 5053 5054 flags |= XFS_ILOG_CORE; 5055 if (cur) { 5056 error = xfs_bmbt_update(cur, &got); 5057 if (error) 5058 goto done; 5059 error = xfs_btree_increment(cur, 0, &i); 5060 if (error) 5061 goto done; 5062 cur->bc_rec.b = new; 5063 error = xfs_btree_insert(cur, &i); 5064 if (error && error != -ENOSPC) 5065 goto done; 5066 /* 5067 * If get no-space back from btree insert, it tried a 5068 * split, and we have a zero block reservation. Fix up 5069 * our state and return the error. 5070 */ 5071 if (error == -ENOSPC) { 5072 /* 5073 * Reset the cursor, don't trust it after any 5074 * insert operation. 5075 */ 5076 error = xfs_bmbt_lookup_eq(cur, &got, &i); 5077 if (error) 5078 goto done; 5079 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5080 /* 5081 * Update the btree record back 5082 * to the original value. 5083 */ 5084 error = xfs_bmbt_update(cur, &old); 5085 if (error) 5086 goto done; 5087 /* 5088 * Reset the extent record back 5089 * to the original value. 5090 */ 5091 xfs_iext_update_extent(ip, state, icur, &old); 5092 flags = 0; 5093 error = -ENOSPC; 5094 goto done; 5095 } 5096 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5097 } else 5098 flags |= xfs_ilog_fext(whichfork); 5099 XFS_IFORK_NEXT_SET(ip, whichfork, 5100 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5101 xfs_iext_next(ifp, icur); 5102 xfs_iext_insert(ip, icur, &new, state); 5103 break; 5104 } 5105 5106 /* remove reverse mapping */ 5107 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); 5108 if (error) 5109 goto done; 5110 5111 /* 5112 * If we need to, add to list of extents to delete. 5113 */ 5114 if (do_fx && !(bflags & XFS_BMAPI_REMAP)) { 5115 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { 5116 error = xfs_refcount_decrease_extent(mp, dfops, del); 5117 if (error) 5118 goto done; 5119 } else { 5120 __xfs_bmap_add_free(mp, dfops, del->br_startblock, 5121 del->br_blockcount, NULL, 5122 (bflags & XFS_BMAPI_NODISCARD) || 5123 del->br_state == XFS_EXT_UNWRITTEN); 5124 } 5125 } 5126 5127 /* 5128 * Adjust inode # blocks in the file. 5129 */ 5130 if (nblks) 5131 ip->i_d.di_nblocks -= nblks; 5132 /* 5133 * Adjust quota data. 5134 */ 5135 if (qfield && !(bflags & XFS_BMAPI_REMAP)) 5136 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); 5137 5138 done: 5139 *logflagsp = flags; 5140 return error; 5141 } 5142 5143 /* 5144 * Unmap (remove) blocks from a file. 5145 * If nexts is nonzero then the number of extents to remove is limited to 5146 * that value. If not all extents in the block range can be removed then 5147 * *done is set. 5148 */ 5149 int /* error */ 5150 __xfs_bunmapi( 5151 xfs_trans_t *tp, /* transaction pointer */ 5152 struct xfs_inode *ip, /* incore inode */ 5153 xfs_fileoff_t start, /* first file offset deleted */ 5154 xfs_filblks_t *rlen, /* i/o: amount remaining */ 5155 int flags, /* misc flags */ 5156 xfs_extnum_t nexts, /* number of extents max */ 5157 xfs_fsblock_t *firstblock, /* first allocated block 5158 controls a.g. for allocs */ 5159 struct xfs_defer_ops *dfops) /* i/o: deferred updates */ 5160 { 5161 xfs_btree_cur_t *cur; /* bmap btree cursor */ 5162 xfs_bmbt_irec_t del; /* extent being deleted */ 5163 int error; /* error return value */ 5164 xfs_extnum_t extno; /* extent number in list */ 5165 xfs_bmbt_irec_t got; /* current extent record */ 5166 xfs_ifork_t *ifp; /* inode fork pointer */ 5167 int isrt; /* freeing in rt area */ 5168 int logflags; /* transaction logging flags */ 5169 xfs_extlen_t mod; /* rt extent offset */ 5170 xfs_mount_t *mp; /* mount structure */ 5171 int tmp_logflags; /* partial logging flags */ 5172 int wasdel; /* was a delayed alloc extent */ 5173 int whichfork; /* data or attribute fork */ 5174 xfs_fsblock_t sum; 5175 xfs_filblks_t len = *rlen; /* length to unmap in file */ 5176 xfs_fileoff_t max_len; 5177 xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; 5178 xfs_fileoff_t end; 5179 struct xfs_iext_cursor icur; 5180 bool done = false; 5181 5182 trace_xfs_bunmap(ip, start, len, flags, _RET_IP_); 5183 5184 whichfork = xfs_bmapi_whichfork(flags); 5185 ASSERT(whichfork != XFS_COW_FORK); 5186 ifp = XFS_IFORK_PTR(ip, whichfork); 5187 if (unlikely( 5188 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5189 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 5190 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, 5191 ip->i_mount); 5192 return -EFSCORRUPTED; 5193 } 5194 mp = ip->i_mount; 5195 if (XFS_FORCED_SHUTDOWN(mp)) 5196 return -EIO; 5197 5198 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5199 ASSERT(len > 0); 5200 ASSERT(nexts >= 0); 5201 5202 /* 5203 * Guesstimate how many blocks we can unmap without running the risk of 5204 * blowing out the transaction with a mix of EFIs and reflink 5205 * adjustments. 5206 */ 5207 if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5208 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5209 else 5210 max_len = len; 5211 5212 if (!(ifp->if_flags & XFS_IFEXTENTS) && 5213 (error = xfs_iread_extents(tp, ip, whichfork))) 5214 return error; 5215 if (xfs_iext_count(ifp) == 0) { 5216 *rlen = 0; 5217 return 0; 5218 } 5219 XFS_STATS_INC(mp, xs_blk_unmap); 5220 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 5221 end = start + len; 5222 5223 if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { 5224 *rlen = 0; 5225 return 0; 5226 } 5227 end--; 5228 5229 logflags = 0; 5230 if (ifp->if_flags & XFS_IFBROOT) { 5231 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5232 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5233 cur->bc_private.b.firstblock = *firstblock; 5234 cur->bc_private.b.dfops = dfops; 5235 cur->bc_private.b.flags = 0; 5236 } else 5237 cur = NULL; 5238 5239 if (isrt) { 5240 /* 5241 * Synchronize by locking the bitmap inode. 5242 */ 5243 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); 5244 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); 5245 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); 5246 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); 5247 } 5248 5249 extno = 0; 5250 while (end != (xfs_fileoff_t)-1 && end >= start && 5251 (nexts == 0 || extno < nexts) && max_len > 0) { 5252 /* 5253 * Is the found extent after a hole in which end lives? 5254 * Just back up to the previous extent, if so. 5255 */ 5256 if (got.br_startoff > end && 5257 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5258 done = true; 5259 break; 5260 } 5261 /* 5262 * Is the last block of this extent before the range 5263 * we're supposed to delete? If so, we're done. 5264 */ 5265 end = XFS_FILEOFF_MIN(end, 5266 got.br_startoff + got.br_blockcount - 1); 5267 if (end < start) 5268 break; 5269 /* 5270 * Then deal with the (possibly delayed) allocated space 5271 * we found. 5272 */ 5273 del = got; 5274 wasdel = isnullstartblock(del.br_startblock); 5275 5276 /* 5277 * Make sure we don't touch multiple AGF headers out of order 5278 * in a single transaction, as that could cause AB-BA deadlocks. 5279 */ 5280 if (!wasdel) { 5281 agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); 5282 if (prev_agno != NULLAGNUMBER && prev_agno > agno) 5283 break; 5284 prev_agno = agno; 5285 } 5286 if (got.br_startoff < start) { 5287 del.br_startoff = start; 5288 del.br_blockcount -= start - got.br_startoff; 5289 if (!wasdel) 5290 del.br_startblock += start - got.br_startoff; 5291 } 5292 if (del.br_startoff + del.br_blockcount > end + 1) 5293 del.br_blockcount = end + 1 - del.br_startoff; 5294 5295 /* How much can we safely unmap? */ 5296 if (max_len < del.br_blockcount) { 5297 del.br_startoff += del.br_blockcount - max_len; 5298 if (!wasdel) 5299 del.br_startblock += del.br_blockcount - max_len; 5300 del.br_blockcount = max_len; 5301 } 5302 5303 if (!isrt) 5304 goto delete; 5305 5306 sum = del.br_startblock + del.br_blockcount; 5307 div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod); 5308 if (mod) { 5309 /* 5310 * Realtime extent not lined up at the end. 5311 * The extent could have been split into written 5312 * and unwritten pieces, or we could just be 5313 * unmapping part of it. But we can't really 5314 * get rid of part of a realtime extent. 5315 */ 5316 if (del.br_state == XFS_EXT_UNWRITTEN || 5317 !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 5318 /* 5319 * This piece is unwritten, or we're not 5320 * using unwritten extents. Skip over it. 5321 */ 5322 ASSERT(end >= mod); 5323 end -= mod > del.br_blockcount ? 5324 del.br_blockcount : mod; 5325 if (end < got.br_startoff && 5326 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5327 done = true; 5328 break; 5329 } 5330 continue; 5331 } 5332 /* 5333 * It's written, turn it unwritten. 5334 * This is better than zeroing it. 5335 */ 5336 ASSERT(del.br_state == XFS_EXT_NORM); 5337 ASSERT(tp->t_blk_res > 0); 5338 /* 5339 * If this spans a realtime extent boundary, 5340 * chop it back to the start of the one we end at. 5341 */ 5342 if (del.br_blockcount > mod) { 5343 del.br_startoff += del.br_blockcount - mod; 5344 del.br_startblock += del.br_blockcount - mod; 5345 del.br_blockcount = mod; 5346 } 5347 del.br_state = XFS_EXT_UNWRITTEN; 5348 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5349 whichfork, &icur, &cur, &del, 5350 firstblock, dfops, &logflags); 5351 if (error) 5352 goto error0; 5353 goto nodelete; 5354 } 5355 div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod); 5356 if (mod) { 5357 /* 5358 * Realtime extent is lined up at the end but not 5359 * at the front. We'll get rid of full extents if 5360 * we can. 5361 */ 5362 mod = mp->m_sb.sb_rextsize - mod; 5363 if (del.br_blockcount > mod) { 5364 del.br_blockcount -= mod; 5365 del.br_startoff += mod; 5366 del.br_startblock += mod; 5367 } else if ((del.br_startoff == start && 5368 (del.br_state == XFS_EXT_UNWRITTEN || 5369 tp->t_blk_res == 0)) || 5370 !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 5371 /* 5372 * Can't make it unwritten. There isn't 5373 * a full extent here so just skip it. 5374 */ 5375 ASSERT(end >= del.br_blockcount); 5376 end -= del.br_blockcount; 5377 if (got.br_startoff > end && 5378 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5379 done = true; 5380 break; 5381 } 5382 continue; 5383 } else if (del.br_state == XFS_EXT_UNWRITTEN) { 5384 struct xfs_bmbt_irec prev; 5385 5386 /* 5387 * This one is already unwritten. 5388 * It must have a written left neighbor. 5389 * Unwrite the killed part of that one and 5390 * try again. 5391 */ 5392 if (!xfs_iext_prev_extent(ifp, &icur, &prev)) 5393 ASSERT(0); 5394 ASSERT(prev.br_state == XFS_EXT_NORM); 5395 ASSERT(!isnullstartblock(prev.br_startblock)); 5396 ASSERT(del.br_startblock == 5397 prev.br_startblock + prev.br_blockcount); 5398 if (prev.br_startoff < start) { 5399 mod = start - prev.br_startoff; 5400 prev.br_blockcount -= mod; 5401 prev.br_startblock += mod; 5402 prev.br_startoff = start; 5403 } 5404 prev.br_state = XFS_EXT_UNWRITTEN; 5405 error = xfs_bmap_add_extent_unwritten_real(tp, 5406 ip, whichfork, &icur, &cur, 5407 &prev, firstblock, dfops, 5408 &logflags); 5409 if (error) 5410 goto error0; 5411 goto nodelete; 5412 } else { 5413 ASSERT(del.br_state == XFS_EXT_NORM); 5414 del.br_state = XFS_EXT_UNWRITTEN; 5415 error = xfs_bmap_add_extent_unwritten_real(tp, 5416 ip, whichfork, &icur, &cur, 5417 &del, firstblock, dfops, 5418 &logflags); 5419 if (error) 5420 goto error0; 5421 goto nodelete; 5422 } 5423 } 5424 5425 delete: 5426 if (wasdel) { 5427 error = xfs_bmap_del_extent_delay(ip, whichfork, &icur, 5428 &got, &del); 5429 } else { 5430 error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops, 5431 cur, &del, &tmp_logflags, whichfork, 5432 flags); 5433 logflags |= tmp_logflags; 5434 } 5435 5436 if (error) 5437 goto error0; 5438 5439 max_len -= del.br_blockcount; 5440 end = del.br_startoff - 1; 5441 nodelete: 5442 /* 5443 * If not done go on to the next (previous) record. 5444 */ 5445 if (end != (xfs_fileoff_t)-1 && end >= start) { 5446 if (!xfs_iext_get_extent(ifp, &icur, &got) || 5447 (got.br_startoff > end && 5448 !xfs_iext_prev_extent(ifp, &icur, &got))) { 5449 done = true; 5450 break; 5451 } 5452 extno++; 5453 } 5454 } 5455 if (done || end == (xfs_fileoff_t)-1 || end < start) 5456 *rlen = 0; 5457 else 5458 *rlen = end - start + 1; 5459 5460 /* 5461 * Convert to a btree if necessary. 5462 */ 5463 if (xfs_bmap_needs_btree(ip, whichfork)) { 5464 ASSERT(cur == NULL); 5465 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, 5466 &cur, 0, &tmp_logflags, whichfork); 5467 logflags |= tmp_logflags; 5468 if (error) 5469 goto error0; 5470 } 5471 /* 5472 * transform from btree to extents, give it cur 5473 */ 5474 else if (xfs_bmap_wants_extents(ip, whichfork)) { 5475 ASSERT(cur != NULL); 5476 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, 5477 whichfork); 5478 logflags |= tmp_logflags; 5479 if (error) 5480 goto error0; 5481 } 5482 /* 5483 * transform from extents to local? 5484 */ 5485 error = 0; 5486 error0: 5487 /* 5488 * Log everything. Do this after conversion, there's no point in 5489 * logging the extent records if we've converted to btree format. 5490 */ 5491 if ((logflags & xfs_ilog_fext(whichfork)) && 5492 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 5493 logflags &= ~xfs_ilog_fext(whichfork); 5494 else if ((logflags & xfs_ilog_fbroot(whichfork)) && 5495 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) 5496 logflags &= ~xfs_ilog_fbroot(whichfork); 5497 /* 5498 * Log inode even in the error case, if the transaction 5499 * is dirty we'll need to shut down the filesystem. 5500 */ 5501 if (logflags) 5502 xfs_trans_log_inode(tp, ip, logflags); 5503 if (cur) { 5504 if (!error) { 5505 *firstblock = cur->bc_private.b.firstblock; 5506 cur->bc_private.b.allocated = 0; 5507 } 5508 xfs_btree_del_cursor(cur, 5509 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5510 } 5511 return error; 5512 } 5513 5514 /* Unmap a range of a file. */ 5515 int 5516 xfs_bunmapi( 5517 xfs_trans_t *tp, 5518 struct xfs_inode *ip, 5519 xfs_fileoff_t bno, 5520 xfs_filblks_t len, 5521 int flags, 5522 xfs_extnum_t nexts, 5523 xfs_fsblock_t *firstblock, 5524 struct xfs_defer_ops *dfops, 5525 int *done) 5526 { 5527 int error; 5528 5529 error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock, 5530 dfops); 5531 *done = (len == 0); 5532 return error; 5533 } 5534 5535 /* 5536 * Determine whether an extent shift can be accomplished by a merge with the 5537 * extent that precedes the target hole of the shift. 5538 */ 5539 STATIC bool 5540 xfs_bmse_can_merge( 5541 struct xfs_bmbt_irec *left, /* preceding extent */ 5542 struct xfs_bmbt_irec *got, /* current extent to shift */ 5543 xfs_fileoff_t shift) /* shift fsb */ 5544 { 5545 xfs_fileoff_t startoff; 5546 5547 startoff = got->br_startoff - shift; 5548 5549 /* 5550 * The extent, once shifted, must be adjacent in-file and on-disk with 5551 * the preceding extent. 5552 */ 5553 if ((left->br_startoff + left->br_blockcount != startoff) || 5554 (left->br_startblock + left->br_blockcount != got->br_startblock) || 5555 (left->br_state != got->br_state) || 5556 (left->br_blockcount + got->br_blockcount > MAXEXTLEN)) 5557 return false; 5558 5559 return true; 5560 } 5561 5562 /* 5563 * A bmap extent shift adjusts the file offset of an extent to fill a preceding 5564 * hole in the file. If an extent shift would result in the extent being fully 5565 * adjacent to the extent that currently precedes the hole, we can merge with 5566 * the preceding extent rather than do the shift. 5567 * 5568 * This function assumes the caller has verified a shift-by-merge is possible 5569 * with the provided extents via xfs_bmse_can_merge(). 5570 */ 5571 STATIC int 5572 xfs_bmse_merge( 5573 struct xfs_inode *ip, 5574 int whichfork, 5575 xfs_fileoff_t shift, /* shift fsb */ 5576 struct xfs_iext_cursor *icur, 5577 struct xfs_bmbt_irec *got, /* extent to shift */ 5578 struct xfs_bmbt_irec *left, /* preceding extent */ 5579 struct xfs_btree_cur *cur, 5580 int *logflags, /* output */ 5581 struct xfs_defer_ops *dfops) 5582 { 5583 struct xfs_bmbt_irec new; 5584 xfs_filblks_t blockcount; 5585 int error, i; 5586 struct xfs_mount *mp = ip->i_mount; 5587 5588 blockcount = left->br_blockcount + got->br_blockcount; 5589 5590 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5591 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5592 ASSERT(xfs_bmse_can_merge(left, got, shift)); 5593 5594 new = *left; 5595 new.br_blockcount = blockcount; 5596 5597 /* 5598 * Update the on-disk extent count, the btree if necessary and log the 5599 * inode. 5600 */ 5601 XFS_IFORK_NEXT_SET(ip, whichfork, 5602 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5603 *logflags |= XFS_ILOG_CORE; 5604 if (!cur) { 5605 *logflags |= XFS_ILOG_DEXT; 5606 goto done; 5607 } 5608 5609 /* lookup and remove the extent to merge */ 5610 error = xfs_bmbt_lookup_eq(cur, got, &i); 5611 if (error) 5612 return error; 5613 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5614 5615 error = xfs_btree_delete(cur, &i); 5616 if (error) 5617 return error; 5618 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5619 5620 /* lookup and update size of the previous extent */ 5621 error = xfs_bmbt_lookup_eq(cur, left, &i); 5622 if (error) 5623 return error; 5624 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5625 5626 error = xfs_bmbt_update(cur, &new); 5627 if (error) 5628 return error; 5629 5630 done: 5631 xfs_iext_remove(ip, icur, 0); 5632 xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur); 5633 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5634 &new); 5635 5636 /* update reverse mapping. rmap functions merge the rmaps for us */ 5637 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); 5638 if (error) 5639 return error; 5640 memcpy(&new, got, sizeof(new)); 5641 new.br_startoff = left->br_startoff + left->br_blockcount; 5642 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); 5643 } 5644 5645 static int 5646 xfs_bmap_shift_update_extent( 5647 struct xfs_inode *ip, 5648 int whichfork, 5649 struct xfs_iext_cursor *icur, 5650 struct xfs_bmbt_irec *got, 5651 struct xfs_btree_cur *cur, 5652 int *logflags, 5653 struct xfs_defer_ops *dfops, 5654 xfs_fileoff_t startoff) 5655 { 5656 struct xfs_mount *mp = ip->i_mount; 5657 struct xfs_bmbt_irec prev = *got; 5658 int error, i; 5659 5660 *logflags |= XFS_ILOG_CORE; 5661 5662 got->br_startoff = startoff; 5663 5664 if (cur) { 5665 error = xfs_bmbt_lookup_eq(cur, &prev, &i); 5666 if (error) 5667 return error; 5668 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5669 5670 error = xfs_bmbt_update(cur, got); 5671 if (error) 5672 return error; 5673 } else { 5674 *logflags |= XFS_ILOG_DEXT; 5675 } 5676 5677 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5678 got); 5679 5680 /* update reverse mapping */ 5681 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev); 5682 if (error) 5683 return error; 5684 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got); 5685 } 5686 5687 int 5688 xfs_bmap_collapse_extents( 5689 struct xfs_trans *tp, 5690 struct xfs_inode *ip, 5691 xfs_fileoff_t *next_fsb, 5692 xfs_fileoff_t offset_shift_fsb, 5693 bool *done, 5694 xfs_fsblock_t *firstblock, 5695 struct xfs_defer_ops *dfops) 5696 { 5697 int whichfork = XFS_DATA_FORK; 5698 struct xfs_mount *mp = ip->i_mount; 5699 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 5700 struct xfs_btree_cur *cur = NULL; 5701 struct xfs_bmbt_irec got, prev; 5702 struct xfs_iext_cursor icur; 5703 xfs_fileoff_t new_startoff; 5704 int error = 0; 5705 int logflags = 0; 5706 5707 if (unlikely(XFS_TEST_ERROR( 5708 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5709 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5710 mp, XFS_ERRTAG_BMAPIFORMAT))) { 5711 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 5712 return -EFSCORRUPTED; 5713 } 5714 5715 if (XFS_FORCED_SHUTDOWN(mp)) 5716 return -EIO; 5717 5718 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); 5719 5720 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5721 error = xfs_iread_extents(tp, ip, whichfork); 5722 if (error) 5723 return error; 5724 } 5725 5726 if (ifp->if_flags & XFS_IFBROOT) { 5727 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5728 cur->bc_private.b.firstblock = *firstblock; 5729 cur->bc_private.b.dfops = dfops; 5730 cur->bc_private.b.flags = 0; 5731 } 5732 5733 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 5734 *done = true; 5735 goto del_cursor; 5736 } 5737 XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), 5738 del_cursor); 5739 5740 new_startoff = got.br_startoff - offset_shift_fsb; 5741 if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { 5742 if (new_startoff < prev.br_startoff + prev.br_blockcount) { 5743 error = -EINVAL; 5744 goto del_cursor; 5745 } 5746 5747 if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) { 5748 error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5749 &icur, &got, &prev, cur, &logflags, 5750 dfops); 5751 if (error) 5752 goto del_cursor; 5753 goto done; 5754 } 5755 } else { 5756 if (got.br_startoff < offset_shift_fsb) { 5757 error = -EINVAL; 5758 goto del_cursor; 5759 } 5760 } 5761 5762 error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, 5763 &logflags, dfops, new_startoff); 5764 if (error) 5765 goto del_cursor; 5766 5767 done: 5768 if (!xfs_iext_next_extent(ifp, &icur, &got)) { 5769 *done = true; 5770 goto del_cursor; 5771 } 5772 5773 *next_fsb = got.br_startoff; 5774 del_cursor: 5775 if (cur) 5776 xfs_btree_del_cursor(cur, 5777 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5778 if (logflags) 5779 xfs_trans_log_inode(tp, ip, logflags); 5780 return error; 5781 } 5782 5783 /* Make sure we won't be right-shifting an extent past the maximum bound. */ 5784 int 5785 xfs_bmap_can_insert_extents( 5786 struct xfs_inode *ip, 5787 xfs_fileoff_t off, 5788 xfs_fileoff_t shift) 5789 { 5790 struct xfs_bmbt_irec got; 5791 int is_empty; 5792 int error = 0; 5793 5794 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5795 5796 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 5797 return -EIO; 5798 5799 xfs_ilock(ip, XFS_ILOCK_EXCL); 5800 error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty); 5801 if (!error && !is_empty && got.br_startoff >= off && 5802 ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff) 5803 error = -EINVAL; 5804 xfs_iunlock(ip, XFS_ILOCK_EXCL); 5805 5806 return error; 5807 } 5808 5809 int 5810 xfs_bmap_insert_extents( 5811 struct xfs_trans *tp, 5812 struct xfs_inode *ip, 5813 xfs_fileoff_t *next_fsb, 5814 xfs_fileoff_t offset_shift_fsb, 5815 bool *done, 5816 xfs_fileoff_t stop_fsb, 5817 xfs_fsblock_t *firstblock, 5818 struct xfs_defer_ops *dfops) 5819 { 5820 int whichfork = XFS_DATA_FORK; 5821 struct xfs_mount *mp = ip->i_mount; 5822 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 5823 struct xfs_btree_cur *cur = NULL; 5824 struct xfs_bmbt_irec got, next; 5825 struct xfs_iext_cursor icur; 5826 xfs_fileoff_t new_startoff; 5827 int error = 0; 5828 int logflags = 0; 5829 5830 if (unlikely(XFS_TEST_ERROR( 5831 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5832 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5833 mp, XFS_ERRTAG_BMAPIFORMAT))) { 5834 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 5835 return -EFSCORRUPTED; 5836 } 5837 5838 if (XFS_FORCED_SHUTDOWN(mp)) 5839 return -EIO; 5840 5841 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); 5842 5843 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5844 error = xfs_iread_extents(tp, ip, whichfork); 5845 if (error) 5846 return error; 5847 } 5848 5849 if (ifp->if_flags & XFS_IFBROOT) { 5850 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5851 cur->bc_private.b.firstblock = *firstblock; 5852 cur->bc_private.b.dfops = dfops; 5853 cur->bc_private.b.flags = 0; 5854 } 5855 5856 if (*next_fsb == NULLFSBLOCK) { 5857 xfs_iext_last(ifp, &icur); 5858 if (!xfs_iext_get_extent(ifp, &icur, &got) || 5859 stop_fsb > got.br_startoff) { 5860 *done = true; 5861 goto del_cursor; 5862 } 5863 } else { 5864 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 5865 *done = true; 5866 goto del_cursor; 5867 } 5868 } 5869 XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), 5870 del_cursor); 5871 5872 if (stop_fsb >= got.br_startoff + got.br_blockcount) { 5873 error = -EIO; 5874 goto del_cursor; 5875 } 5876 5877 new_startoff = got.br_startoff + offset_shift_fsb; 5878 if (xfs_iext_peek_next_extent(ifp, &icur, &next)) { 5879 if (new_startoff + got.br_blockcount > next.br_startoff) { 5880 error = -EINVAL; 5881 goto del_cursor; 5882 } 5883 5884 /* 5885 * Unlike a left shift (which involves a hole punch), a right 5886 * shift does not modify extent neighbors in any way. We should 5887 * never find mergeable extents in this scenario. Check anyways 5888 * and warn if we encounter two extents that could be one. 5889 */ 5890 if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb)) 5891 WARN_ON_ONCE(1); 5892 } 5893 5894 error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, 5895 &logflags, dfops, new_startoff); 5896 if (error) 5897 goto del_cursor; 5898 5899 if (!xfs_iext_prev_extent(ifp, &icur, &got) || 5900 stop_fsb >= got.br_startoff + got.br_blockcount) { 5901 *done = true; 5902 goto del_cursor; 5903 } 5904 5905 *next_fsb = got.br_startoff; 5906 del_cursor: 5907 if (cur) 5908 xfs_btree_del_cursor(cur, 5909 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5910 if (logflags) 5911 xfs_trans_log_inode(tp, ip, logflags); 5912 return error; 5913 } 5914 5915 /* 5916 * Splits an extent into two extents at split_fsb block such that it is the 5917 * first block of the current_ext. @ext is a target extent to be split. 5918 * @split_fsb is a block where the extents is split. If split_fsb lies in a 5919 * hole or the first block of extents, just return 0. 5920 */ 5921 STATIC int 5922 xfs_bmap_split_extent_at( 5923 struct xfs_trans *tp, 5924 struct xfs_inode *ip, 5925 xfs_fileoff_t split_fsb, 5926 xfs_fsblock_t *firstfsb, 5927 struct xfs_defer_ops *dfops) 5928 { 5929 int whichfork = XFS_DATA_FORK; 5930 struct xfs_btree_cur *cur = NULL; 5931 struct xfs_bmbt_irec got; 5932 struct xfs_bmbt_irec new; /* split extent */ 5933 struct xfs_mount *mp = ip->i_mount; 5934 struct xfs_ifork *ifp; 5935 xfs_fsblock_t gotblkcnt; /* new block count for got */ 5936 struct xfs_iext_cursor icur; 5937 int error = 0; 5938 int logflags = 0; 5939 int i = 0; 5940 5941 if (unlikely(XFS_TEST_ERROR( 5942 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5943 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5944 mp, XFS_ERRTAG_BMAPIFORMAT))) { 5945 XFS_ERROR_REPORT("xfs_bmap_split_extent_at", 5946 XFS_ERRLEVEL_LOW, mp); 5947 return -EFSCORRUPTED; 5948 } 5949 5950 if (XFS_FORCED_SHUTDOWN(mp)) 5951 return -EIO; 5952 5953 ifp = XFS_IFORK_PTR(ip, whichfork); 5954 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5955 /* Read in all the extents */ 5956 error = xfs_iread_extents(tp, ip, whichfork); 5957 if (error) 5958 return error; 5959 } 5960 5961 /* 5962 * If there are not extents, or split_fsb lies in a hole we are done. 5963 */ 5964 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) || 5965 got.br_startoff >= split_fsb) 5966 return 0; 5967 5968 gotblkcnt = split_fsb - got.br_startoff; 5969 new.br_startoff = split_fsb; 5970 new.br_startblock = got.br_startblock + gotblkcnt; 5971 new.br_blockcount = got.br_blockcount - gotblkcnt; 5972 new.br_state = got.br_state; 5973 5974 if (ifp->if_flags & XFS_IFBROOT) { 5975 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5976 cur->bc_private.b.firstblock = *firstfsb; 5977 cur->bc_private.b.dfops = dfops; 5978 cur->bc_private.b.flags = 0; 5979 error = xfs_bmbt_lookup_eq(cur, &got, &i); 5980 if (error) 5981 goto del_cursor; 5982 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); 5983 } 5984 5985 got.br_blockcount = gotblkcnt; 5986 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur, 5987 &got); 5988 5989 logflags = XFS_ILOG_CORE; 5990 if (cur) { 5991 error = xfs_bmbt_update(cur, &got); 5992 if (error) 5993 goto del_cursor; 5994 } else 5995 logflags |= XFS_ILOG_DEXT; 5996 5997 /* Add new extent */ 5998 xfs_iext_next(ifp, &icur); 5999 xfs_iext_insert(ip, &icur, &new, 0); 6000 XFS_IFORK_NEXT_SET(ip, whichfork, 6001 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 6002 6003 if (cur) { 6004 error = xfs_bmbt_lookup_eq(cur, &new, &i); 6005 if (error) 6006 goto del_cursor; 6007 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor); 6008 error = xfs_btree_insert(cur, &i); 6009 if (error) 6010 goto del_cursor; 6011 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); 6012 } 6013 6014 /* 6015 * Convert to a btree if necessary. 6016 */ 6017 if (xfs_bmap_needs_btree(ip, whichfork)) { 6018 int tmp_logflags; /* partial log flag return val */ 6019 6020 ASSERT(cur == NULL); 6021 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops, 6022 &cur, 0, &tmp_logflags, whichfork); 6023 logflags |= tmp_logflags; 6024 } 6025 6026 del_cursor: 6027 if (cur) { 6028 cur->bc_private.b.allocated = 0; 6029 xfs_btree_del_cursor(cur, 6030 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 6031 } 6032 6033 if (logflags) 6034 xfs_trans_log_inode(tp, ip, logflags); 6035 return error; 6036 } 6037 6038 int 6039 xfs_bmap_split_extent( 6040 struct xfs_inode *ip, 6041 xfs_fileoff_t split_fsb) 6042 { 6043 struct xfs_mount *mp = ip->i_mount; 6044 struct xfs_trans *tp; 6045 struct xfs_defer_ops dfops; 6046 xfs_fsblock_t firstfsb; 6047 int error; 6048 6049 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 6050 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); 6051 if (error) 6052 return error; 6053 6054 xfs_ilock(ip, XFS_ILOCK_EXCL); 6055 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 6056 6057 xfs_defer_init(&dfops, &firstfsb); 6058 6059 error = xfs_bmap_split_extent_at(tp, ip, split_fsb, 6060 &firstfsb, &dfops); 6061 if (error) 6062 goto out; 6063 6064 error = xfs_defer_finish(&tp, &dfops); 6065 if (error) 6066 goto out; 6067 6068 return xfs_trans_commit(tp); 6069 6070 out: 6071 xfs_defer_cancel(&dfops); 6072 xfs_trans_cancel(tp); 6073 return error; 6074 } 6075 6076 /* Deferred mapping is only for real extents in the data fork. */ 6077 static bool 6078 xfs_bmap_is_update_needed( 6079 struct xfs_bmbt_irec *bmap) 6080 { 6081 return bmap->br_startblock != HOLESTARTBLOCK && 6082 bmap->br_startblock != DELAYSTARTBLOCK; 6083 } 6084 6085 /* Record a bmap intent. */ 6086 static int 6087 __xfs_bmap_add( 6088 struct xfs_mount *mp, 6089 struct xfs_defer_ops *dfops, 6090 enum xfs_bmap_intent_type type, 6091 struct xfs_inode *ip, 6092 int whichfork, 6093 struct xfs_bmbt_irec *bmap) 6094 { 6095 int error; 6096 struct xfs_bmap_intent *bi; 6097 6098 trace_xfs_bmap_defer(mp, 6099 XFS_FSB_TO_AGNO(mp, bmap->br_startblock), 6100 type, 6101 XFS_FSB_TO_AGBNO(mp, bmap->br_startblock), 6102 ip->i_ino, whichfork, 6103 bmap->br_startoff, 6104 bmap->br_blockcount, 6105 bmap->br_state); 6106 6107 bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS); 6108 INIT_LIST_HEAD(&bi->bi_list); 6109 bi->bi_type = type; 6110 bi->bi_owner = ip; 6111 bi->bi_whichfork = whichfork; 6112 bi->bi_bmap = *bmap; 6113 6114 error = xfs_defer_ijoin(dfops, bi->bi_owner); 6115 if (error) { 6116 kmem_free(bi); 6117 return error; 6118 } 6119 6120 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list); 6121 return 0; 6122 } 6123 6124 /* Map an extent into a file. */ 6125 int 6126 xfs_bmap_map_extent( 6127 struct xfs_mount *mp, 6128 struct xfs_defer_ops *dfops, 6129 struct xfs_inode *ip, 6130 struct xfs_bmbt_irec *PREV) 6131 { 6132 if (!xfs_bmap_is_update_needed(PREV)) 6133 return 0; 6134 6135 return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip, 6136 XFS_DATA_FORK, PREV); 6137 } 6138 6139 /* Unmap an extent out of a file. */ 6140 int 6141 xfs_bmap_unmap_extent( 6142 struct xfs_mount *mp, 6143 struct xfs_defer_ops *dfops, 6144 struct xfs_inode *ip, 6145 struct xfs_bmbt_irec *PREV) 6146 { 6147 if (!xfs_bmap_is_update_needed(PREV)) 6148 return 0; 6149 6150 return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip, 6151 XFS_DATA_FORK, PREV); 6152 } 6153 6154 /* 6155 * Process one of the deferred bmap operations. We pass back the 6156 * btree cursor to maintain our lock on the bmapbt between calls. 6157 */ 6158 int 6159 xfs_bmap_finish_one( 6160 struct xfs_trans *tp, 6161 struct xfs_defer_ops *dfops, 6162 struct xfs_inode *ip, 6163 enum xfs_bmap_intent_type type, 6164 int whichfork, 6165 xfs_fileoff_t startoff, 6166 xfs_fsblock_t startblock, 6167 xfs_filblks_t *blockcount, 6168 xfs_exntst_t state) 6169 { 6170 xfs_fsblock_t firstfsb; 6171 int error = 0; 6172 6173 /* 6174 * firstfsb is tied to the transaction lifetime and is used to 6175 * ensure correct AG locking order and schedule work item 6176 * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us 6177 * to only making one bmap call per transaction, so it should 6178 * be safe to have it as a local variable here. 6179 */ 6180 firstfsb = NULLFSBLOCK; 6181 6182 trace_xfs_bmap_deferred(tp->t_mountp, 6183 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, 6184 XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 6185 ip->i_ino, whichfork, startoff, *blockcount, state); 6186 6187 if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK)) 6188 return -EFSCORRUPTED; 6189 6190 if (XFS_TEST_ERROR(false, tp->t_mountp, 6191 XFS_ERRTAG_BMAP_FINISH_ONE)) 6192 return -EIO; 6193 6194 switch (type) { 6195 case XFS_BMAP_MAP: 6196 error = xfs_bmapi_remap(tp, ip, startoff, *blockcount, 6197 startblock, dfops, 0); 6198 *blockcount = 0; 6199 break; 6200 case XFS_BMAP_UNMAP: 6201 error = __xfs_bunmapi(tp, ip, startoff, blockcount, 6202 XFS_BMAPI_REMAP, 1, &firstfsb, dfops); 6203 break; 6204 default: 6205 ASSERT(0); 6206 error = -EFSCORRUPTED; 6207 } 6208 6209 return error; 6210 } 6211 6212 /* Check that an inode's extent does not have invalid flags or bad ranges. */ 6213 xfs_failaddr_t 6214 xfs_bmap_validate_extent( 6215 struct xfs_inode *ip, 6216 int whichfork, 6217 struct xfs_bmbt_irec *irec) 6218 { 6219 struct xfs_mount *mp = ip->i_mount; 6220 xfs_fsblock_t endfsb; 6221 bool isrt; 6222 6223 isrt = XFS_IS_REALTIME_INODE(ip); 6224 endfsb = irec->br_startblock + irec->br_blockcount - 1; 6225 if (isrt) { 6226 if (!xfs_verify_rtbno(mp, irec->br_startblock)) 6227 return __this_address; 6228 if (!xfs_verify_rtbno(mp, endfsb)) 6229 return __this_address; 6230 } else { 6231 if (!xfs_verify_fsbno(mp, irec->br_startblock)) 6232 return __this_address; 6233 if (!xfs_verify_fsbno(mp, endfsb)) 6234 return __this_address; 6235 if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) != 6236 XFS_FSB_TO_AGNO(mp, endfsb)) 6237 return __this_address; 6238 } 6239 if (irec->br_state != XFS_EXT_NORM) { 6240 if (whichfork != XFS_DATA_FORK) 6241 return __this_address; 6242 if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) 6243 return __this_address; 6244 } 6245 return NULL; 6246 } 6247