1 /* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_shared.h" 21 #include "xfs_format.h" 22 #include "xfs_log_format.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_sb.h" 26 #include "xfs_mount.h" 27 #include "xfs_defer.h" 28 #include "xfs_da_format.h" 29 #include "xfs_da_btree.h" 30 #include "xfs_dir2.h" 31 #include "xfs_inode.h" 32 #include "xfs_btree.h" 33 #include "xfs_trans.h" 34 #include "xfs_inode_item.h" 35 #include "xfs_extfree_item.h" 36 #include "xfs_alloc.h" 37 #include "xfs_bmap.h" 38 #include "xfs_bmap_util.h" 39 #include "xfs_bmap_btree.h" 40 #include "xfs_rtalloc.h" 41 #include "xfs_errortag.h" 42 #include "xfs_error.h" 43 #include "xfs_quota.h" 44 #include "xfs_trans_space.h" 45 #include "xfs_buf_item.h" 46 #include "xfs_trace.h" 47 #include "xfs_symlink.h" 48 #include "xfs_attr_leaf.h" 49 #include "xfs_filestream.h" 50 #include "xfs_rmap.h" 51 #include "xfs_ag_resv.h" 52 #include "xfs_refcount.h" 53 #include "xfs_icache.h" 54 55 56 kmem_zone_t *xfs_bmap_free_item_zone; 57 58 /* 59 * Miscellaneous helper functions 60 */ 61 62 /* 63 * Compute and fill in the value of the maximum depth of a bmap btree 64 * in this filesystem. Done once, during mount. 65 */ 66 void 67 xfs_bmap_compute_maxlevels( 68 xfs_mount_t *mp, /* file system mount structure */ 69 int whichfork) /* data or attr fork */ 70 { 71 int level; /* btree level */ 72 uint maxblocks; /* max blocks at this level */ 73 uint maxleafents; /* max leaf entries possible */ 74 int maxrootrecs; /* max records in root block */ 75 int minleafrecs; /* min records in leaf block */ 76 int minnoderecs; /* min records in node block */ 77 int sz; /* root block size */ 78 79 /* 80 * The maximum number of extents in a file, hence the maximum 81 * number of leaf entries, is controlled by the type of di_nextents 82 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents 83 * (a signed 16-bit number, xfs_aextnum_t). 84 * 85 * Note that we can no longer assume that if we are in ATTR1 that 86 * the fork offset of all the inodes will be 87 * (xfs_default_attroffset(ip) >> 3) because we could have mounted 88 * with ATTR2 and then mounted back with ATTR1, keeping the 89 * di_forkoff's fixed but probably at various positions. Therefore, 90 * for both ATTR1 and ATTR2 we have to assume the worst case scenario 91 * of a minimum size available. 92 */ 93 if (whichfork == XFS_DATA_FORK) { 94 maxleafents = MAXEXTNUM; 95 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS); 96 } else { 97 maxleafents = MAXAEXTNUM; 98 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS); 99 } 100 maxrootrecs = xfs_bmdr_maxrecs(sz, 0); 101 minleafrecs = mp->m_bmap_dmnr[0]; 102 minnoderecs = mp->m_bmap_dmnr[1]; 103 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 104 for (level = 1; maxblocks > 1; level++) { 105 if (maxblocks <= maxrootrecs) 106 maxblocks = 1; 107 else 108 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 109 } 110 mp->m_bm_maxlevels[whichfork] = level; 111 } 112 113 STATIC int /* error */ 114 xfs_bmbt_lookup_eq( 115 struct xfs_btree_cur *cur, 116 struct xfs_bmbt_irec *irec, 117 int *stat) /* success/failure */ 118 { 119 cur->bc_rec.b = *irec; 120 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 121 } 122 123 STATIC int /* error */ 124 xfs_bmbt_lookup_first( 125 struct xfs_btree_cur *cur, 126 int *stat) /* success/failure */ 127 { 128 cur->bc_rec.b.br_startoff = 0; 129 cur->bc_rec.b.br_startblock = 0; 130 cur->bc_rec.b.br_blockcount = 0; 131 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 132 } 133 134 /* 135 * Check if the inode needs to be converted to btree format. 136 */ 137 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork) 138 { 139 return whichfork != XFS_COW_FORK && 140 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 141 XFS_IFORK_NEXTENTS(ip, whichfork) > 142 XFS_IFORK_MAXEXT(ip, whichfork); 143 } 144 145 /* 146 * Check if the inode should be converted to extent format. 147 */ 148 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork) 149 { 150 return whichfork != XFS_COW_FORK && 151 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE && 152 XFS_IFORK_NEXTENTS(ip, whichfork) <= 153 XFS_IFORK_MAXEXT(ip, whichfork); 154 } 155 156 /* 157 * Update the record referred to by cur to the value given by irec 158 * This either works (return 0) or gets an EFSCORRUPTED error. 159 */ 160 STATIC int 161 xfs_bmbt_update( 162 struct xfs_btree_cur *cur, 163 struct xfs_bmbt_irec *irec) 164 { 165 union xfs_btree_rec rec; 166 167 xfs_bmbt_disk_set_all(&rec.bmbt, irec); 168 return xfs_btree_update(cur, &rec); 169 } 170 171 /* 172 * Compute the worst-case number of indirect blocks that will be used 173 * for ip's delayed extent of length "len". 174 */ 175 STATIC xfs_filblks_t 176 xfs_bmap_worst_indlen( 177 xfs_inode_t *ip, /* incore inode pointer */ 178 xfs_filblks_t len) /* delayed extent length */ 179 { 180 int level; /* btree level number */ 181 int maxrecs; /* maximum record count at this level */ 182 xfs_mount_t *mp; /* mount structure */ 183 xfs_filblks_t rval; /* return value */ 184 185 mp = ip->i_mount; 186 maxrecs = mp->m_bmap_dmxr[0]; 187 for (level = 0, rval = 0; 188 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); 189 level++) { 190 len += maxrecs - 1; 191 do_div(len, maxrecs); 192 rval += len; 193 if (len == 1) 194 return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 195 level - 1; 196 if (level == 0) 197 maxrecs = mp->m_bmap_dmxr[1]; 198 } 199 return rval; 200 } 201 202 /* 203 * Calculate the default attribute fork offset for newly created inodes. 204 */ 205 uint 206 xfs_default_attroffset( 207 struct xfs_inode *ip) 208 { 209 struct xfs_mount *mp = ip->i_mount; 210 uint offset; 211 212 if (mp->m_sb.sb_inodesize == 256) { 213 offset = XFS_LITINO(mp, ip->i_d.di_version) - 214 XFS_BMDR_SPACE_CALC(MINABTPTRS); 215 } else { 216 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS); 217 } 218 219 ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version)); 220 return offset; 221 } 222 223 /* 224 * Helper routine to reset inode di_forkoff field when switching 225 * attribute fork from local to extent format - we reset it where 226 * possible to make space available for inline data fork extents. 227 */ 228 STATIC void 229 xfs_bmap_forkoff_reset( 230 xfs_inode_t *ip, 231 int whichfork) 232 { 233 if (whichfork == XFS_ATTR_FORK && 234 ip->i_d.di_format != XFS_DINODE_FMT_DEV && 235 ip->i_d.di_format != XFS_DINODE_FMT_BTREE) { 236 uint dfl_forkoff = xfs_default_attroffset(ip) >> 3; 237 238 if (dfl_forkoff > ip->i_d.di_forkoff) 239 ip->i_d.di_forkoff = dfl_forkoff; 240 } 241 } 242 243 #ifdef DEBUG 244 STATIC struct xfs_buf * 245 xfs_bmap_get_bp( 246 struct xfs_btree_cur *cur, 247 xfs_fsblock_t bno) 248 { 249 struct xfs_log_item *lip; 250 int i; 251 252 if (!cur) 253 return NULL; 254 255 for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) { 256 if (!cur->bc_bufs[i]) 257 break; 258 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno) 259 return cur->bc_bufs[i]; 260 } 261 262 /* Chase down all the log items to see if the bp is there */ 263 list_for_each_entry(lip, &cur->bc_tp->t_items, li_trans) { 264 struct xfs_buf_log_item *bip = (struct xfs_buf_log_item *)lip; 265 266 if (bip->bli_item.li_type == XFS_LI_BUF && 267 XFS_BUF_ADDR(bip->bli_buf) == bno) 268 return bip->bli_buf; 269 } 270 271 return NULL; 272 } 273 274 STATIC void 275 xfs_check_block( 276 struct xfs_btree_block *block, 277 xfs_mount_t *mp, 278 int root, 279 short sz) 280 { 281 int i, j, dmxr; 282 __be64 *pp, *thispa; /* pointer to block address */ 283 xfs_bmbt_key_t *prevp, *keyp; 284 285 ASSERT(be16_to_cpu(block->bb_level) > 0); 286 287 prevp = NULL; 288 for( i = 1; i <= xfs_btree_get_numrecs(block); i++) { 289 dmxr = mp->m_bmap_dmxr[0]; 290 keyp = XFS_BMBT_KEY_ADDR(mp, block, i); 291 292 if (prevp) { 293 ASSERT(be64_to_cpu(prevp->br_startoff) < 294 be64_to_cpu(keyp->br_startoff)); 295 } 296 prevp = keyp; 297 298 /* 299 * Compare the block numbers to see if there are dups. 300 */ 301 if (root) 302 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz); 303 else 304 pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr); 305 306 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) { 307 if (root) 308 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz); 309 else 310 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); 311 if (*thispa == *pp) { 312 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld", 313 __func__, j, i, 314 (unsigned long long)be64_to_cpu(*thispa)); 315 xfs_err(mp, "%s: ptrs are equal in node\n", 316 __func__); 317 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 318 } 319 } 320 } 321 } 322 323 /* 324 * Check that the extents for the inode ip are in the right order in all 325 * btree leaves. THis becomes prohibitively expensive for large extent count 326 * files, so don't bother with inodes that have more than 10,000 extents in 327 * them. The btree record ordering checks will still be done, so for such large 328 * bmapbt constructs that is going to catch most corruptions. 329 */ 330 STATIC void 331 xfs_bmap_check_leaf_extents( 332 xfs_btree_cur_t *cur, /* btree cursor or null */ 333 xfs_inode_t *ip, /* incore inode pointer */ 334 int whichfork) /* data or attr fork */ 335 { 336 struct xfs_btree_block *block; /* current btree block */ 337 xfs_fsblock_t bno; /* block # of "block" */ 338 xfs_buf_t *bp; /* buffer for "block" */ 339 int error; /* error return value */ 340 xfs_extnum_t i=0, j; /* index into the extents list */ 341 xfs_ifork_t *ifp; /* fork structure */ 342 int level; /* btree level, for checking */ 343 xfs_mount_t *mp; /* file system mount structure */ 344 __be64 *pp; /* pointer to block address */ 345 xfs_bmbt_rec_t *ep; /* pointer to current extent */ 346 xfs_bmbt_rec_t last = {0, 0}; /* last extent in prev block */ 347 xfs_bmbt_rec_t *nextp; /* pointer to next extent */ 348 int bp_release = 0; 349 350 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) { 351 return; 352 } 353 354 /* skip large extent count inodes */ 355 if (ip->i_d.di_nextents > 10000) 356 return; 357 358 bno = NULLFSBLOCK; 359 mp = ip->i_mount; 360 ifp = XFS_IFORK_PTR(ip, whichfork); 361 block = ifp->if_broot; 362 /* 363 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 364 */ 365 level = be16_to_cpu(block->bb_level); 366 ASSERT(level > 0); 367 xfs_check_block(block, mp, 1, ifp->if_broot_bytes); 368 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 369 bno = be64_to_cpu(*pp); 370 371 ASSERT(bno != NULLFSBLOCK); 372 ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount); 373 ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks); 374 375 /* 376 * Go down the tree until leaf level is reached, following the first 377 * pointer (leftmost) at each level. 378 */ 379 while (level-- > 0) { 380 /* See if buf is in cur first */ 381 bp_release = 0; 382 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 383 if (!bp) { 384 bp_release = 1; 385 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 386 XFS_BMAP_BTREE_REF, 387 &xfs_bmbt_buf_ops); 388 if (error) 389 goto error_norelse; 390 } 391 block = XFS_BUF_TO_BLOCK(bp); 392 if (level == 0) 393 break; 394 395 /* 396 * Check this block for basic sanity (increasing keys and 397 * no duplicate blocks). 398 */ 399 400 xfs_check_block(block, mp, 0, 0); 401 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 402 bno = be64_to_cpu(*pp); 403 XFS_WANT_CORRUPTED_GOTO(mp, 404 xfs_verify_fsbno(mp, bno), error0); 405 if (bp_release) { 406 bp_release = 0; 407 xfs_trans_brelse(NULL, bp); 408 } 409 } 410 411 /* 412 * Here with bp and block set to the leftmost leaf node in the tree. 413 */ 414 i = 0; 415 416 /* 417 * Loop over all leaf nodes checking that all extents are in the right order. 418 */ 419 for (;;) { 420 xfs_fsblock_t nextbno; 421 xfs_extnum_t num_recs; 422 423 424 num_recs = xfs_btree_get_numrecs(block); 425 426 /* 427 * Read-ahead the next leaf block, if any. 428 */ 429 430 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 431 432 /* 433 * Check all the extents to make sure they are OK. 434 * If we had a previous block, the last entry should 435 * conform with the first entry in this one. 436 */ 437 438 ep = XFS_BMBT_REC_ADDR(mp, block, 1); 439 if (i) { 440 ASSERT(xfs_bmbt_disk_get_startoff(&last) + 441 xfs_bmbt_disk_get_blockcount(&last) <= 442 xfs_bmbt_disk_get_startoff(ep)); 443 } 444 for (j = 1; j < num_recs; j++) { 445 nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1); 446 ASSERT(xfs_bmbt_disk_get_startoff(ep) + 447 xfs_bmbt_disk_get_blockcount(ep) <= 448 xfs_bmbt_disk_get_startoff(nextp)); 449 ep = nextp; 450 } 451 452 last = *ep; 453 i += num_recs; 454 if (bp_release) { 455 bp_release = 0; 456 xfs_trans_brelse(NULL, bp); 457 } 458 bno = nextbno; 459 /* 460 * If we've reached the end, stop. 461 */ 462 if (bno == NULLFSBLOCK) 463 break; 464 465 bp_release = 0; 466 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 467 if (!bp) { 468 bp_release = 1; 469 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 470 XFS_BMAP_BTREE_REF, 471 &xfs_bmbt_buf_ops); 472 if (error) 473 goto error_norelse; 474 } 475 block = XFS_BUF_TO_BLOCK(bp); 476 } 477 478 return; 479 480 error0: 481 xfs_warn(mp, "%s: at error0", __func__); 482 if (bp_release) 483 xfs_trans_brelse(NULL, bp); 484 error_norelse: 485 xfs_warn(mp, "%s: BAD after btree leaves for %d extents", 486 __func__, i); 487 xfs_err(mp, "%s: CORRUPTED BTREE OR SOMETHING", __func__); 488 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 489 return; 490 } 491 492 /* 493 * Validate that the bmbt_irecs being returned from bmapi are valid 494 * given the caller's original parameters. Specifically check the 495 * ranges of the returned irecs to ensure that they only extend beyond 496 * the given parameters if the XFS_BMAPI_ENTIRE flag was set. 497 */ 498 STATIC void 499 xfs_bmap_validate_ret( 500 xfs_fileoff_t bno, 501 xfs_filblks_t len, 502 int flags, 503 xfs_bmbt_irec_t *mval, 504 int nmap, 505 int ret_nmap) 506 { 507 int i; /* index to map values */ 508 509 ASSERT(ret_nmap <= nmap); 510 511 for (i = 0; i < ret_nmap; i++) { 512 ASSERT(mval[i].br_blockcount > 0); 513 if (!(flags & XFS_BMAPI_ENTIRE)) { 514 ASSERT(mval[i].br_startoff >= bno); 515 ASSERT(mval[i].br_blockcount <= len); 516 ASSERT(mval[i].br_startoff + mval[i].br_blockcount <= 517 bno + len); 518 } else { 519 ASSERT(mval[i].br_startoff < bno + len); 520 ASSERT(mval[i].br_startoff + mval[i].br_blockcount > 521 bno); 522 } 523 ASSERT(i == 0 || 524 mval[i - 1].br_startoff + mval[i - 1].br_blockcount == 525 mval[i].br_startoff); 526 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK && 527 mval[i].br_startblock != HOLESTARTBLOCK); 528 ASSERT(mval[i].br_state == XFS_EXT_NORM || 529 mval[i].br_state == XFS_EXT_UNWRITTEN); 530 } 531 } 532 533 #else 534 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0) 535 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0) 536 #endif /* DEBUG */ 537 538 /* 539 * bmap free list manipulation functions 540 */ 541 542 /* 543 * Add the extent to the list of extents to be free at transaction end. 544 * The list is maintained sorted (by block number). 545 */ 546 void 547 __xfs_bmap_add_free( 548 struct xfs_mount *mp, 549 struct xfs_defer_ops *dfops, 550 xfs_fsblock_t bno, 551 xfs_filblks_t len, 552 struct xfs_owner_info *oinfo, 553 bool skip_discard) 554 { 555 struct xfs_extent_free_item *new; /* new element */ 556 #ifdef DEBUG 557 xfs_agnumber_t agno; 558 xfs_agblock_t agbno; 559 560 ASSERT(bno != NULLFSBLOCK); 561 ASSERT(len > 0); 562 ASSERT(len <= MAXEXTLEN); 563 ASSERT(!isnullstartblock(bno)); 564 agno = XFS_FSB_TO_AGNO(mp, bno); 565 agbno = XFS_FSB_TO_AGBNO(mp, bno); 566 ASSERT(agno < mp->m_sb.sb_agcount); 567 ASSERT(agbno < mp->m_sb.sb_agblocks); 568 ASSERT(len < mp->m_sb.sb_agblocks); 569 ASSERT(agbno + len <= mp->m_sb.sb_agblocks); 570 #endif 571 ASSERT(xfs_bmap_free_item_zone != NULL); 572 573 new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); 574 new->xefi_startblock = bno; 575 new->xefi_blockcount = (xfs_extlen_t)len; 576 if (oinfo) 577 new->xefi_oinfo = *oinfo; 578 else 579 xfs_rmap_skip_owner_update(&new->xefi_oinfo); 580 new->xefi_skip_discard = skip_discard; 581 trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0, 582 XFS_FSB_TO_AGBNO(mp, bno), len); 583 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); 584 } 585 586 /* 587 * Inode fork format manipulation functions 588 */ 589 590 /* 591 * Transform a btree format file with only one leaf node, where the 592 * extents list will fit in the inode, into an extents format file. 593 * Since the file extents are already in-core, all we have to do is 594 * give up the space for the btree root and pitch the leaf block. 595 */ 596 STATIC int /* error */ 597 xfs_bmap_btree_to_extents( 598 xfs_trans_t *tp, /* transaction pointer */ 599 xfs_inode_t *ip, /* incore inode pointer */ 600 xfs_btree_cur_t *cur, /* btree cursor */ 601 int *logflagsp, /* inode logging flags */ 602 int whichfork) /* data or attr fork */ 603 { 604 /* REFERENCED */ 605 struct xfs_btree_block *cblock;/* child btree block */ 606 xfs_fsblock_t cbno; /* child block number */ 607 xfs_buf_t *cbp; /* child block's buffer */ 608 int error; /* error return value */ 609 xfs_ifork_t *ifp; /* inode fork data */ 610 xfs_mount_t *mp; /* mount point structure */ 611 __be64 *pp; /* ptr to block address */ 612 struct xfs_btree_block *rblock;/* root btree block */ 613 struct xfs_owner_info oinfo; 614 615 mp = ip->i_mount; 616 ifp = XFS_IFORK_PTR(ip, whichfork); 617 ASSERT(whichfork != XFS_COW_FORK); 618 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 619 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 620 rblock = ifp->if_broot; 621 ASSERT(be16_to_cpu(rblock->bb_level) == 1); 622 ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); 623 ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1); 624 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); 625 cbno = be64_to_cpu(*pp); 626 *logflagsp = 0; 627 #ifdef DEBUG 628 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 629 xfs_btree_check_lptr(cur, cbno, 1)); 630 #endif 631 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, 632 &xfs_bmbt_buf_ops); 633 if (error) 634 return error; 635 cblock = XFS_BUF_TO_BLOCK(cbp); 636 if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) 637 return error; 638 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 639 xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo); 640 ip->i_d.di_nblocks--; 641 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 642 xfs_trans_binval(tp, cbp); 643 if (cur->bc_bufs[0] == cbp) 644 cur->bc_bufs[0] = NULL; 645 xfs_iroot_realloc(ip, -1, whichfork); 646 ASSERT(ifp->if_broot == NULL); 647 ASSERT((ifp->if_flags & XFS_IFBROOT) == 0); 648 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 649 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 650 return 0; 651 } 652 653 /* 654 * Convert an extents-format file into a btree-format file. 655 * The new file will have a root block (in the inode) and a single child block. 656 */ 657 STATIC int /* error */ 658 xfs_bmap_extents_to_btree( 659 xfs_trans_t *tp, /* transaction pointer */ 660 xfs_inode_t *ip, /* incore inode pointer */ 661 xfs_fsblock_t *firstblock, /* first-block-allocated */ 662 struct xfs_defer_ops *dfops, /* blocks freed in xaction */ 663 xfs_btree_cur_t **curp, /* cursor returned to caller */ 664 int wasdel, /* converting a delayed alloc */ 665 int *logflagsp, /* inode logging flags */ 666 int whichfork) /* data or attr fork */ 667 { 668 struct xfs_btree_block *ablock; /* allocated (child) bt block */ 669 xfs_buf_t *abp; /* buffer for ablock */ 670 xfs_alloc_arg_t args; /* allocation arguments */ 671 xfs_bmbt_rec_t *arp; /* child record pointer */ 672 struct xfs_btree_block *block; /* btree root block */ 673 xfs_btree_cur_t *cur; /* bmap btree cursor */ 674 int error; /* error return value */ 675 xfs_ifork_t *ifp; /* inode fork pointer */ 676 xfs_bmbt_key_t *kp; /* root block key pointer */ 677 xfs_mount_t *mp; /* mount structure */ 678 xfs_bmbt_ptr_t *pp; /* root block address pointer */ 679 struct xfs_iext_cursor icur; 680 struct xfs_bmbt_irec rec; 681 xfs_extnum_t cnt = 0; 682 683 mp = ip->i_mount; 684 ASSERT(whichfork != XFS_COW_FORK); 685 ifp = XFS_IFORK_PTR(ip, whichfork); 686 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); 687 688 /* 689 * Make space in the inode incore. 690 */ 691 xfs_iroot_realloc(ip, 1, whichfork); 692 ifp->if_flags |= XFS_IFBROOT; 693 694 /* 695 * Fill in the root. 696 */ 697 block = ifp->if_broot; 698 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, 699 XFS_BTNUM_BMAP, 1, 1, ip->i_ino, 700 XFS_BTREE_LONG_PTRS); 701 /* 702 * Need a cursor. Can't allocate until bb_level is filled in. 703 */ 704 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 705 cur->bc_private.b.firstblock = *firstblock; 706 cur->bc_private.b.dfops = dfops; 707 cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 708 /* 709 * Convert to a btree with two levels, one record in root. 710 */ 711 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 712 memset(&args, 0, sizeof(args)); 713 args.tp = tp; 714 args.mp = mp; 715 xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); 716 args.firstblock = *firstblock; 717 if (*firstblock == NULLFSBLOCK) { 718 args.type = XFS_ALLOCTYPE_START_BNO; 719 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 720 } else if (dfops->dop_low) { 721 args.type = XFS_ALLOCTYPE_START_BNO; 722 args.fsbno = *firstblock; 723 } else { 724 args.type = XFS_ALLOCTYPE_NEAR_BNO; 725 args.fsbno = *firstblock; 726 } 727 args.minlen = args.maxlen = args.prod = 1; 728 args.wasdel = wasdel; 729 *logflagsp = 0; 730 if ((error = xfs_alloc_vextent(&args))) { 731 xfs_iroot_realloc(ip, -1, whichfork); 732 ASSERT(ifp->if_broot == NULL); 733 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 734 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 735 return error; 736 } 737 738 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { 739 xfs_iroot_realloc(ip, -1, whichfork); 740 ASSERT(ifp->if_broot == NULL); 741 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 742 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 743 return -ENOSPC; 744 } 745 /* 746 * Allocation can't fail, the space was reserved. 747 */ 748 ASSERT(*firstblock == NULLFSBLOCK || 749 args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); 750 *firstblock = cur->bc_private.b.firstblock = args.fsbno; 751 cur->bc_private.b.allocated++; 752 ip->i_d.di_nblocks++; 753 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 754 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); 755 /* 756 * Fill in the child block. 757 */ 758 abp->b_ops = &xfs_bmbt_buf_ops; 759 ablock = XFS_BUF_TO_BLOCK(abp); 760 xfs_btree_init_block_int(mp, ablock, abp->b_bn, 761 XFS_BTNUM_BMAP, 0, 0, ip->i_ino, 762 XFS_BTREE_LONG_PTRS); 763 764 for_each_xfs_iext(ifp, &icur, &rec) { 765 if (isnullstartblock(rec.br_startblock)) 766 continue; 767 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1 + cnt); 768 xfs_bmbt_disk_set_all(arp, &rec); 769 cnt++; 770 } 771 ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork)); 772 xfs_btree_set_numrecs(ablock, cnt); 773 774 /* 775 * Fill in the root key and pointer. 776 */ 777 kp = XFS_BMBT_KEY_ADDR(mp, block, 1); 778 arp = XFS_BMBT_REC_ADDR(mp, ablock, 1); 779 kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp)); 780 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur, 781 be16_to_cpu(block->bb_level))); 782 *pp = cpu_to_be64(args.fsbno); 783 784 /* 785 * Do all this logging at the end so that 786 * the root is at the right level. 787 */ 788 xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS); 789 xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs)); 790 ASSERT(*curp == NULL); 791 *curp = cur; 792 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); 793 return 0; 794 } 795 796 /* 797 * Convert a local file to an extents file. 798 * This code is out of bounds for data forks of regular files, 799 * since the file data needs to get logged so things will stay consistent. 800 * (The bmap-level manipulations are ok, though). 801 */ 802 void 803 xfs_bmap_local_to_extents_empty( 804 struct xfs_inode *ip, 805 int whichfork) 806 { 807 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 808 809 ASSERT(whichfork != XFS_COW_FORK); 810 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 811 ASSERT(ifp->if_bytes == 0); 812 ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); 813 814 xfs_bmap_forkoff_reset(ip, whichfork); 815 ifp->if_flags &= ~XFS_IFINLINE; 816 ifp->if_flags |= XFS_IFEXTENTS; 817 ifp->if_u1.if_root = NULL; 818 ifp->if_height = 0; 819 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 820 } 821 822 823 STATIC int /* error */ 824 xfs_bmap_local_to_extents( 825 xfs_trans_t *tp, /* transaction pointer */ 826 xfs_inode_t *ip, /* incore inode pointer */ 827 xfs_fsblock_t *firstblock, /* first block allocated in xaction */ 828 xfs_extlen_t total, /* total blocks needed by transaction */ 829 int *logflagsp, /* inode logging flags */ 830 int whichfork, 831 void (*init_fn)(struct xfs_trans *tp, 832 struct xfs_buf *bp, 833 struct xfs_inode *ip, 834 struct xfs_ifork *ifp)) 835 { 836 int error = 0; 837 int flags; /* logging flags returned */ 838 xfs_ifork_t *ifp; /* inode fork pointer */ 839 xfs_alloc_arg_t args; /* allocation arguments */ 840 xfs_buf_t *bp; /* buffer for extent block */ 841 struct xfs_bmbt_irec rec; 842 struct xfs_iext_cursor icur; 843 844 /* 845 * We don't want to deal with the case of keeping inode data inline yet. 846 * So sending the data fork of a regular inode is invalid. 847 */ 848 ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK)); 849 ifp = XFS_IFORK_PTR(ip, whichfork); 850 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 851 852 if (!ifp->if_bytes) { 853 xfs_bmap_local_to_extents_empty(ip, whichfork); 854 flags = XFS_ILOG_CORE; 855 goto done; 856 } 857 858 flags = 0; 859 error = 0; 860 ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS)) == XFS_IFINLINE); 861 memset(&args, 0, sizeof(args)); 862 args.tp = tp; 863 args.mp = ip->i_mount; 864 xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); 865 args.firstblock = *firstblock; 866 /* 867 * Allocate a block. We know we need only one, since the 868 * file currently fits in an inode. 869 */ 870 if (*firstblock == NULLFSBLOCK) { 871 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); 872 args.type = XFS_ALLOCTYPE_START_BNO; 873 } else { 874 args.fsbno = *firstblock; 875 args.type = XFS_ALLOCTYPE_NEAR_BNO; 876 } 877 args.total = total; 878 args.minlen = args.maxlen = args.prod = 1; 879 error = xfs_alloc_vextent(&args); 880 if (error) 881 goto done; 882 883 /* Can't fail, the space was reserved. */ 884 ASSERT(args.fsbno != NULLFSBLOCK); 885 ASSERT(args.len == 1); 886 *firstblock = args.fsbno; 887 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 888 889 /* 890 * Initialize the block, copy the data and log the remote buffer. 891 * 892 * The callout is responsible for logging because the remote format 893 * might differ from the local format and thus we don't know how much to 894 * log here. Note that init_fn must also set the buffer log item type 895 * correctly. 896 */ 897 init_fn(tp, bp, ip, ifp); 898 899 /* account for the change in fork size */ 900 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 901 xfs_bmap_local_to_extents_empty(ip, whichfork); 902 flags |= XFS_ILOG_CORE; 903 904 ifp->if_u1.if_root = NULL; 905 ifp->if_height = 0; 906 907 rec.br_startoff = 0; 908 rec.br_startblock = args.fsbno; 909 rec.br_blockcount = 1; 910 rec.br_state = XFS_EXT_NORM; 911 xfs_iext_first(ifp, &icur); 912 xfs_iext_insert(ip, &icur, &rec, 0); 913 914 XFS_IFORK_NEXT_SET(ip, whichfork, 1); 915 ip->i_d.di_nblocks = 1; 916 xfs_trans_mod_dquot_byino(tp, ip, 917 XFS_TRANS_DQ_BCOUNT, 1L); 918 flags |= xfs_ilog_fext(whichfork); 919 920 done: 921 *logflagsp = flags; 922 return error; 923 } 924 925 /* 926 * Called from xfs_bmap_add_attrfork to handle btree format files. 927 */ 928 STATIC int /* error */ 929 xfs_bmap_add_attrfork_btree( 930 xfs_trans_t *tp, /* transaction pointer */ 931 xfs_inode_t *ip, /* incore inode pointer */ 932 xfs_fsblock_t *firstblock, /* first block allocated */ 933 struct xfs_defer_ops *dfops, /* blocks to free at commit */ 934 int *flags) /* inode logging flags */ 935 { 936 xfs_btree_cur_t *cur; /* btree cursor */ 937 int error; /* error return value */ 938 xfs_mount_t *mp; /* file system mount struct */ 939 int stat; /* newroot status */ 940 941 mp = ip->i_mount; 942 if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip)) 943 *flags |= XFS_ILOG_DBROOT; 944 else { 945 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK); 946 cur->bc_private.b.dfops = dfops; 947 cur->bc_private.b.firstblock = *firstblock; 948 error = xfs_bmbt_lookup_first(cur, &stat); 949 if (error) 950 goto error0; 951 /* must be at least one entry */ 952 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0); 953 if ((error = xfs_btree_new_iroot(cur, flags, &stat))) 954 goto error0; 955 if (stat == 0) { 956 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 957 return -ENOSPC; 958 } 959 *firstblock = cur->bc_private.b.firstblock; 960 cur->bc_private.b.allocated = 0; 961 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 962 } 963 return 0; 964 error0: 965 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 966 return error; 967 } 968 969 /* 970 * Called from xfs_bmap_add_attrfork to handle extents format files. 971 */ 972 STATIC int /* error */ 973 xfs_bmap_add_attrfork_extents( 974 xfs_trans_t *tp, /* transaction pointer */ 975 xfs_inode_t *ip, /* incore inode pointer */ 976 xfs_fsblock_t *firstblock, /* first block allocated */ 977 struct xfs_defer_ops *dfops, /* blocks to free at commit */ 978 int *flags) /* inode logging flags */ 979 { 980 xfs_btree_cur_t *cur; /* bmap btree cursor */ 981 int error; /* error return value */ 982 983 if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip)) 984 return 0; 985 cur = NULL; 986 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0, 987 flags, XFS_DATA_FORK); 988 if (cur) { 989 cur->bc_private.b.allocated = 0; 990 xfs_btree_del_cursor(cur, 991 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 992 } 993 return error; 994 } 995 996 /* 997 * Called from xfs_bmap_add_attrfork to handle local format files. Each 998 * different data fork content type needs a different callout to do the 999 * conversion. Some are basic and only require special block initialisation 1000 * callouts for the data formating, others (directories) are so specialised they 1001 * handle everything themselves. 1002 * 1003 * XXX (dgc): investigate whether directory conversion can use the generic 1004 * formatting callout. It should be possible - it's just a very complex 1005 * formatter. 1006 */ 1007 STATIC int /* error */ 1008 xfs_bmap_add_attrfork_local( 1009 xfs_trans_t *tp, /* transaction pointer */ 1010 xfs_inode_t *ip, /* incore inode pointer */ 1011 xfs_fsblock_t *firstblock, /* first block allocated */ 1012 struct xfs_defer_ops *dfops, /* blocks to free at commit */ 1013 int *flags) /* inode logging flags */ 1014 { 1015 xfs_da_args_t dargs; /* args for dir/attr code */ 1016 1017 if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) 1018 return 0; 1019 1020 if (S_ISDIR(VFS_I(ip)->i_mode)) { 1021 memset(&dargs, 0, sizeof(dargs)); 1022 dargs.geo = ip->i_mount->m_dir_geo; 1023 dargs.dp = ip; 1024 dargs.firstblock = firstblock; 1025 dargs.dfops = dfops; 1026 dargs.total = dargs.geo->fsbcount; 1027 dargs.whichfork = XFS_DATA_FORK; 1028 dargs.trans = tp; 1029 return xfs_dir2_sf_to_block(&dargs); 1030 } 1031 1032 if (S_ISLNK(VFS_I(ip)->i_mode)) 1033 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, 1034 flags, XFS_DATA_FORK, 1035 xfs_symlink_local_to_remote); 1036 1037 /* should only be called for types that support local format data */ 1038 ASSERT(0); 1039 return -EFSCORRUPTED; 1040 } 1041 1042 /* 1043 * Convert inode from non-attributed to attributed. 1044 * Must not be in a transaction, ip must not be locked. 1045 */ 1046 int /* error code */ 1047 xfs_bmap_add_attrfork( 1048 xfs_inode_t *ip, /* incore inode pointer */ 1049 int size, /* space new attribute needs */ 1050 int rsvd) /* xact may use reserved blks */ 1051 { 1052 xfs_fsblock_t firstblock; /* 1st block/ag allocated */ 1053 struct xfs_defer_ops dfops; /* freed extent records */ 1054 xfs_mount_t *mp; /* mount structure */ 1055 xfs_trans_t *tp; /* transaction pointer */ 1056 int blks; /* space reservation */ 1057 int version = 1; /* superblock attr version */ 1058 int logflags; /* logging flags */ 1059 int error; /* error return value */ 1060 1061 ASSERT(XFS_IFORK_Q(ip) == 0); 1062 1063 mp = ip->i_mount; 1064 ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); 1065 1066 blks = XFS_ADDAFORK_SPACE_RES(mp); 1067 1068 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0, 1069 rsvd ? XFS_TRANS_RESERVE : 0, &tp); 1070 if (error) 1071 return error; 1072 1073 xfs_ilock(ip, XFS_ILOCK_EXCL); 1074 error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? 1075 XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : 1076 XFS_QMOPT_RES_REGBLKS); 1077 if (error) 1078 goto trans_cancel; 1079 if (XFS_IFORK_Q(ip)) 1080 goto trans_cancel; 1081 if (ip->i_d.di_anextents != 0) { 1082 error = -EFSCORRUPTED; 1083 goto trans_cancel; 1084 } 1085 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { 1086 /* 1087 * For inodes coming from pre-6.2 filesystems. 1088 */ 1089 ASSERT(ip->i_d.di_aformat == 0); 1090 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; 1091 } 1092 1093 xfs_trans_ijoin(tp, ip, 0); 1094 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1095 1096 switch (ip->i_d.di_format) { 1097 case XFS_DINODE_FMT_DEV: 1098 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3; 1099 break; 1100 case XFS_DINODE_FMT_LOCAL: 1101 case XFS_DINODE_FMT_EXTENTS: 1102 case XFS_DINODE_FMT_BTREE: 1103 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size); 1104 if (!ip->i_d.di_forkoff) 1105 ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3; 1106 else if (mp->m_flags & XFS_MOUNT_ATTR2) 1107 version = 2; 1108 break; 1109 default: 1110 ASSERT(0); 1111 error = -EINVAL; 1112 goto trans_cancel; 1113 } 1114 1115 ASSERT(ip->i_afp == NULL); 1116 ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); 1117 ip->i_afp->if_flags = XFS_IFEXTENTS; 1118 logflags = 0; 1119 xfs_defer_init(&dfops, &firstblock); 1120 switch (ip->i_d.di_format) { 1121 case XFS_DINODE_FMT_LOCAL: 1122 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops, 1123 &logflags); 1124 break; 1125 case XFS_DINODE_FMT_EXTENTS: 1126 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock, 1127 &dfops, &logflags); 1128 break; 1129 case XFS_DINODE_FMT_BTREE: 1130 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops, 1131 &logflags); 1132 break; 1133 default: 1134 error = 0; 1135 break; 1136 } 1137 if (logflags) 1138 xfs_trans_log_inode(tp, ip, logflags); 1139 if (error) 1140 goto bmap_cancel; 1141 if (!xfs_sb_version_hasattr(&mp->m_sb) || 1142 (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { 1143 bool log_sb = false; 1144 1145 spin_lock(&mp->m_sb_lock); 1146 if (!xfs_sb_version_hasattr(&mp->m_sb)) { 1147 xfs_sb_version_addattr(&mp->m_sb); 1148 log_sb = true; 1149 } 1150 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) { 1151 xfs_sb_version_addattr2(&mp->m_sb); 1152 log_sb = true; 1153 } 1154 spin_unlock(&mp->m_sb_lock); 1155 if (log_sb) 1156 xfs_log_sb(tp); 1157 } 1158 1159 error = xfs_defer_finish(&tp, &dfops); 1160 if (error) 1161 goto bmap_cancel; 1162 error = xfs_trans_commit(tp); 1163 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1164 return error; 1165 1166 bmap_cancel: 1167 xfs_defer_cancel(&dfops); 1168 trans_cancel: 1169 xfs_trans_cancel(tp); 1170 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1171 return error; 1172 } 1173 1174 /* 1175 * Internal and external extent tree search functions. 1176 */ 1177 1178 /* 1179 * Read in extents from a btree-format inode. 1180 */ 1181 int 1182 xfs_iread_extents( 1183 struct xfs_trans *tp, 1184 struct xfs_inode *ip, 1185 int whichfork) 1186 { 1187 struct xfs_mount *mp = ip->i_mount; 1188 int state = xfs_bmap_fork_to_state(whichfork); 1189 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1190 xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork); 1191 struct xfs_btree_block *block = ifp->if_broot; 1192 struct xfs_iext_cursor icur; 1193 struct xfs_bmbt_irec new; 1194 xfs_fsblock_t bno; 1195 struct xfs_buf *bp; 1196 xfs_extnum_t i, j; 1197 int level; 1198 __be64 *pp; 1199 int error; 1200 1201 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1202 1203 if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 1204 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 1205 return -EFSCORRUPTED; 1206 } 1207 1208 /* 1209 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. 1210 */ 1211 level = be16_to_cpu(block->bb_level); 1212 ASSERT(level > 0); 1213 pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); 1214 bno = be64_to_cpu(*pp); 1215 1216 /* 1217 * Go down the tree until leaf level is reached, following the first 1218 * pointer (leftmost) at each level. 1219 */ 1220 while (level-- > 0) { 1221 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1222 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1223 if (error) 1224 goto out; 1225 block = XFS_BUF_TO_BLOCK(bp); 1226 if (level == 0) 1227 break; 1228 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); 1229 bno = be64_to_cpu(*pp); 1230 XFS_WANT_CORRUPTED_GOTO(mp, 1231 xfs_verify_fsbno(mp, bno), out_brelse); 1232 xfs_trans_brelse(tp, bp); 1233 } 1234 1235 /* 1236 * Here with bp and block set to the leftmost leaf node in the tree. 1237 */ 1238 i = 0; 1239 xfs_iext_first(ifp, &icur); 1240 1241 /* 1242 * Loop over all leaf nodes. Copy information to the extent records. 1243 */ 1244 for (;;) { 1245 xfs_bmbt_rec_t *frp; 1246 xfs_fsblock_t nextbno; 1247 xfs_extnum_t num_recs; 1248 1249 num_recs = xfs_btree_get_numrecs(block); 1250 if (unlikely(i + num_recs > nextents)) { 1251 ASSERT(i + num_recs <= nextents); 1252 xfs_warn(ip->i_mount, 1253 "corrupt dinode %Lu, (btree extents).", 1254 (unsigned long long) ip->i_ino); 1255 xfs_inode_verifier_error(ip, -EFSCORRUPTED, 1256 __func__, block, sizeof(*block), 1257 __this_address); 1258 error = -EFSCORRUPTED; 1259 goto out_brelse; 1260 } 1261 /* 1262 * Read-ahead the next leaf block, if any. 1263 */ 1264 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 1265 if (nextbno != NULLFSBLOCK) 1266 xfs_btree_reada_bufl(mp, nextbno, 1, 1267 &xfs_bmbt_buf_ops); 1268 /* 1269 * Copy records into the extent records. 1270 */ 1271 frp = XFS_BMBT_REC_ADDR(mp, block, 1); 1272 for (j = 0; j < num_recs; j++, frp++, i++) { 1273 xfs_failaddr_t fa; 1274 1275 xfs_bmbt_disk_get_all(frp, &new); 1276 fa = xfs_bmap_validate_extent(ip, whichfork, &new); 1277 if (fa) { 1278 error = -EFSCORRUPTED; 1279 xfs_inode_verifier_error(ip, error, 1280 "xfs_iread_extents(2)", 1281 frp, sizeof(*frp), fa); 1282 goto out_brelse; 1283 } 1284 xfs_iext_insert(ip, &icur, &new, state); 1285 trace_xfs_read_extent(ip, &icur, state, _THIS_IP_); 1286 xfs_iext_next(ifp, &icur); 1287 } 1288 xfs_trans_brelse(tp, bp); 1289 bno = nextbno; 1290 /* 1291 * If we've reached the end, stop. 1292 */ 1293 if (bno == NULLFSBLOCK) 1294 break; 1295 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1296 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1297 if (error) 1298 goto out; 1299 block = XFS_BUF_TO_BLOCK(bp); 1300 } 1301 1302 if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) { 1303 error = -EFSCORRUPTED; 1304 goto out; 1305 } 1306 ASSERT(i == xfs_iext_count(ifp)); 1307 1308 ifp->if_flags |= XFS_IFEXTENTS; 1309 return 0; 1310 1311 out_brelse: 1312 xfs_trans_brelse(tp, bp); 1313 out: 1314 xfs_iext_destroy(ifp); 1315 return error; 1316 } 1317 1318 /* 1319 * Returns the relative block number of the first unused block(s) in the given 1320 * fork with at least "len" logically contiguous blocks free. This is the 1321 * lowest-address hole if the fork has holes, else the first block past the end 1322 * of fork. Return 0 if the fork is currently local (in-inode). 1323 */ 1324 int /* error */ 1325 xfs_bmap_first_unused( 1326 struct xfs_trans *tp, /* transaction pointer */ 1327 struct xfs_inode *ip, /* incore inode */ 1328 xfs_extlen_t len, /* size of hole to find */ 1329 xfs_fileoff_t *first_unused, /* unused block */ 1330 int whichfork) /* data or attr fork */ 1331 { 1332 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1333 struct xfs_bmbt_irec got; 1334 struct xfs_iext_cursor icur; 1335 xfs_fileoff_t lastaddr = 0; 1336 xfs_fileoff_t lowest, max; 1337 int error; 1338 1339 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE || 1340 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS || 1341 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); 1342 1343 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { 1344 *first_unused = 0; 1345 return 0; 1346 } 1347 1348 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1349 error = xfs_iread_extents(tp, ip, whichfork); 1350 if (error) 1351 return error; 1352 } 1353 1354 lowest = max = *first_unused; 1355 for_each_xfs_iext(ifp, &icur, &got) { 1356 /* 1357 * See if the hole before this extent will work. 1358 */ 1359 if (got.br_startoff >= lowest + len && 1360 got.br_startoff - max >= len) 1361 break; 1362 lastaddr = got.br_startoff + got.br_blockcount; 1363 max = XFS_FILEOFF_MAX(lastaddr, lowest); 1364 } 1365 1366 *first_unused = max; 1367 return 0; 1368 } 1369 1370 /* 1371 * Returns the file-relative block number of the last block - 1 before 1372 * last_block (input value) in the file. 1373 * This is not based on i_size, it is based on the extent records. 1374 * Returns 0 for local files, as they do not have extent records. 1375 */ 1376 int /* error */ 1377 xfs_bmap_last_before( 1378 struct xfs_trans *tp, /* transaction pointer */ 1379 struct xfs_inode *ip, /* incore inode */ 1380 xfs_fileoff_t *last_block, /* last block */ 1381 int whichfork) /* data or attr fork */ 1382 { 1383 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1384 struct xfs_bmbt_irec got; 1385 struct xfs_iext_cursor icur; 1386 int error; 1387 1388 switch (XFS_IFORK_FORMAT(ip, whichfork)) { 1389 case XFS_DINODE_FMT_LOCAL: 1390 *last_block = 0; 1391 return 0; 1392 case XFS_DINODE_FMT_BTREE: 1393 case XFS_DINODE_FMT_EXTENTS: 1394 break; 1395 default: 1396 return -EIO; 1397 } 1398 1399 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1400 error = xfs_iread_extents(tp, ip, whichfork); 1401 if (error) 1402 return error; 1403 } 1404 1405 if (!xfs_iext_lookup_extent_before(ip, ifp, last_block, &icur, &got)) 1406 *last_block = 0; 1407 return 0; 1408 } 1409 1410 int 1411 xfs_bmap_last_extent( 1412 struct xfs_trans *tp, 1413 struct xfs_inode *ip, 1414 int whichfork, 1415 struct xfs_bmbt_irec *rec, 1416 int *is_empty) 1417 { 1418 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 1419 struct xfs_iext_cursor icur; 1420 int error; 1421 1422 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1423 error = xfs_iread_extents(tp, ip, whichfork); 1424 if (error) 1425 return error; 1426 } 1427 1428 xfs_iext_last(ifp, &icur); 1429 if (!xfs_iext_get_extent(ifp, &icur, rec)) 1430 *is_empty = 1; 1431 else 1432 *is_empty = 0; 1433 return 0; 1434 } 1435 1436 /* 1437 * Check the last inode extent to determine whether this allocation will result 1438 * in blocks being allocated at the end of the file. When we allocate new data 1439 * blocks at the end of the file which do not start at the previous data block, 1440 * we will try to align the new blocks at stripe unit boundaries. 1441 * 1442 * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be 1443 * at, or past the EOF. 1444 */ 1445 STATIC int 1446 xfs_bmap_isaeof( 1447 struct xfs_bmalloca *bma, 1448 int whichfork) 1449 { 1450 struct xfs_bmbt_irec rec; 1451 int is_empty; 1452 int error; 1453 1454 bma->aeof = false; 1455 error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec, 1456 &is_empty); 1457 if (error) 1458 return error; 1459 1460 if (is_empty) { 1461 bma->aeof = true; 1462 return 0; 1463 } 1464 1465 /* 1466 * Check if we are allocation or past the last extent, or at least into 1467 * the last delayed allocated extent. 1468 */ 1469 bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount || 1470 (bma->offset >= rec.br_startoff && 1471 isnullstartblock(rec.br_startblock)); 1472 return 0; 1473 } 1474 1475 /* 1476 * Returns the file-relative block number of the first block past eof in 1477 * the file. This is not based on i_size, it is based on the extent records. 1478 * Returns 0 for local files, as they do not have extent records. 1479 */ 1480 int 1481 xfs_bmap_last_offset( 1482 struct xfs_inode *ip, 1483 xfs_fileoff_t *last_block, 1484 int whichfork) 1485 { 1486 struct xfs_bmbt_irec rec; 1487 int is_empty; 1488 int error; 1489 1490 *last_block = 0; 1491 1492 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) 1493 return 0; 1494 1495 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && 1496 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 1497 return -EIO; 1498 1499 error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); 1500 if (error || is_empty) 1501 return error; 1502 1503 *last_block = rec.br_startoff + rec.br_blockcount; 1504 return 0; 1505 } 1506 1507 /* 1508 * Returns whether the selected fork of the inode has exactly one 1509 * block or not. For the data fork we check this matches di_size, 1510 * implying the file's range is 0..bsize-1. 1511 */ 1512 int /* 1=>1 block, 0=>otherwise */ 1513 xfs_bmap_one_block( 1514 xfs_inode_t *ip, /* incore inode */ 1515 int whichfork) /* data or attr fork */ 1516 { 1517 xfs_ifork_t *ifp; /* inode fork pointer */ 1518 int rval; /* return value */ 1519 xfs_bmbt_irec_t s; /* internal version of extent */ 1520 struct xfs_iext_cursor icur; 1521 1522 #ifndef DEBUG 1523 if (whichfork == XFS_DATA_FORK) 1524 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize; 1525 #endif /* !DEBUG */ 1526 if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1) 1527 return 0; 1528 if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 1529 return 0; 1530 ifp = XFS_IFORK_PTR(ip, whichfork); 1531 ASSERT(ifp->if_flags & XFS_IFEXTENTS); 1532 xfs_iext_first(ifp, &icur); 1533 xfs_iext_get_extent(ifp, &icur, &s); 1534 rval = s.br_startoff == 0 && s.br_blockcount == 1; 1535 if (rval && whichfork == XFS_DATA_FORK) 1536 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize); 1537 return rval; 1538 } 1539 1540 /* 1541 * Extent tree manipulation functions used during allocation. 1542 */ 1543 1544 /* 1545 * Convert a delayed allocation to a real allocation. 1546 */ 1547 STATIC int /* error */ 1548 xfs_bmap_add_extent_delay_real( 1549 struct xfs_bmalloca *bma, 1550 int whichfork) 1551 { 1552 struct xfs_bmbt_irec *new = &bma->got; 1553 int error; /* error return value */ 1554 int i; /* temp state */ 1555 xfs_ifork_t *ifp; /* inode fork pointer */ 1556 xfs_fileoff_t new_endoff; /* end offset of new entry */ 1557 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 1558 /* left is 0, right is 1, prev is 2 */ 1559 int rval=0; /* return value (logging flags) */ 1560 int state = xfs_bmap_fork_to_state(whichfork); 1561 xfs_filblks_t da_new; /* new count del alloc blocks used */ 1562 xfs_filblks_t da_old; /* old count del alloc blocks used */ 1563 xfs_filblks_t temp=0; /* value for da_new calculations */ 1564 int tmp_rval; /* partial logging flags */ 1565 struct xfs_mount *mp; 1566 xfs_extnum_t *nextents; 1567 struct xfs_bmbt_irec old; 1568 1569 mp = bma->ip->i_mount; 1570 ifp = XFS_IFORK_PTR(bma->ip, whichfork); 1571 ASSERT(whichfork != XFS_ATTR_FORK); 1572 nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents : 1573 &bma->ip->i_d.di_nextents); 1574 1575 ASSERT(!isnullstartblock(new->br_startblock)); 1576 ASSERT(!bma->cur || 1577 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 1578 1579 XFS_STATS_INC(mp, xs_add_exlist); 1580 1581 #define LEFT r[0] 1582 #define RIGHT r[1] 1583 #define PREV r[2] 1584 1585 /* 1586 * Set up a bunch of variables to make the tests simpler. 1587 */ 1588 xfs_iext_get_extent(ifp, &bma->icur, &PREV); 1589 new_endoff = new->br_startoff + new->br_blockcount; 1590 ASSERT(isnullstartblock(PREV.br_startblock)); 1591 ASSERT(PREV.br_startoff <= new->br_startoff); 1592 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 1593 1594 da_old = startblockval(PREV.br_startblock); 1595 da_new = 0; 1596 1597 /* 1598 * Set flags determining what part of the previous delayed allocation 1599 * extent is being replaced by a real allocation. 1600 */ 1601 if (PREV.br_startoff == new->br_startoff) 1602 state |= BMAP_LEFT_FILLING; 1603 if (PREV.br_startoff + PREV.br_blockcount == new_endoff) 1604 state |= BMAP_RIGHT_FILLING; 1605 1606 /* 1607 * Check and set flags if this segment has a left neighbor. 1608 * Don't set contiguous if the combined extent would be too large. 1609 */ 1610 if (xfs_iext_peek_prev_extent(ifp, &bma->icur, &LEFT)) { 1611 state |= BMAP_LEFT_VALID; 1612 if (isnullstartblock(LEFT.br_startblock)) 1613 state |= BMAP_LEFT_DELAY; 1614 } 1615 1616 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 1617 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 1618 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 1619 LEFT.br_state == new->br_state && 1620 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) 1621 state |= BMAP_LEFT_CONTIG; 1622 1623 /* 1624 * Check and set flags if this segment has a right neighbor. 1625 * Don't set contiguous if the combined extent would be too large. 1626 * Also check for all-three-contiguous being too large. 1627 */ 1628 if (xfs_iext_peek_next_extent(ifp, &bma->icur, &RIGHT)) { 1629 state |= BMAP_RIGHT_VALID; 1630 if (isnullstartblock(RIGHT.br_startblock)) 1631 state |= BMAP_RIGHT_DELAY; 1632 } 1633 1634 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 1635 new_endoff == RIGHT.br_startoff && 1636 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 1637 new->br_state == RIGHT.br_state && 1638 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 1639 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 1640 BMAP_RIGHT_FILLING)) != 1641 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 1642 BMAP_RIGHT_FILLING) || 1643 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 1644 <= MAXEXTLEN)) 1645 state |= BMAP_RIGHT_CONTIG; 1646 1647 error = 0; 1648 /* 1649 * Switch out based on the FILLING and CONTIG state bits. 1650 */ 1651 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 1652 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { 1653 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 1654 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1655 /* 1656 * Filling in all of a previously delayed allocation extent. 1657 * The left and right neighbors are both contiguous with new. 1658 */ 1659 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 1660 1661 xfs_iext_remove(bma->ip, &bma->icur, state); 1662 xfs_iext_remove(bma->ip, &bma->icur, state); 1663 xfs_iext_prev(ifp, &bma->icur); 1664 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1665 (*nextents)--; 1666 1667 if (bma->cur == NULL) 1668 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1669 else { 1670 rval = XFS_ILOG_CORE; 1671 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1672 if (error) 1673 goto done; 1674 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1675 error = xfs_btree_delete(bma->cur, &i); 1676 if (error) 1677 goto done; 1678 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1679 error = xfs_btree_decrement(bma->cur, 0, &i); 1680 if (error) 1681 goto done; 1682 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1683 error = xfs_bmbt_update(bma->cur, &LEFT); 1684 if (error) 1685 goto done; 1686 } 1687 break; 1688 1689 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1690 /* 1691 * Filling in all of a previously delayed allocation extent. 1692 * The left neighbor is contiguous, the right is not. 1693 */ 1694 old = LEFT; 1695 LEFT.br_blockcount += PREV.br_blockcount; 1696 1697 xfs_iext_remove(bma->ip, &bma->icur, state); 1698 xfs_iext_prev(ifp, &bma->icur); 1699 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1700 1701 if (bma->cur == NULL) 1702 rval = XFS_ILOG_DEXT; 1703 else { 1704 rval = 0; 1705 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1706 if (error) 1707 goto done; 1708 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1709 error = xfs_bmbt_update(bma->cur, &LEFT); 1710 if (error) 1711 goto done; 1712 } 1713 break; 1714 1715 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1716 /* 1717 * Filling in all of a previously delayed allocation extent. 1718 * The right neighbor is contiguous, the left is not. 1719 */ 1720 PREV.br_startblock = new->br_startblock; 1721 PREV.br_blockcount += RIGHT.br_blockcount; 1722 1723 xfs_iext_next(ifp, &bma->icur); 1724 xfs_iext_remove(bma->ip, &bma->icur, state); 1725 xfs_iext_prev(ifp, &bma->icur); 1726 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1727 1728 if (bma->cur == NULL) 1729 rval = XFS_ILOG_DEXT; 1730 else { 1731 rval = 0; 1732 error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i); 1733 if (error) 1734 goto done; 1735 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1736 error = xfs_bmbt_update(bma->cur, &PREV); 1737 if (error) 1738 goto done; 1739 } 1740 break; 1741 1742 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 1743 /* 1744 * Filling in all of a previously delayed allocation extent. 1745 * Neither the left nor right neighbors are contiguous with 1746 * the new one. 1747 */ 1748 PREV.br_startblock = new->br_startblock; 1749 PREV.br_state = new->br_state; 1750 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1751 1752 (*nextents)++; 1753 if (bma->cur == NULL) 1754 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1755 else { 1756 rval = XFS_ILOG_CORE; 1757 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1758 if (error) 1759 goto done; 1760 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1761 error = xfs_btree_insert(bma->cur, &i); 1762 if (error) 1763 goto done; 1764 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1765 } 1766 break; 1767 1768 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 1769 /* 1770 * Filling in the first part of a previous delayed allocation. 1771 * The left neighbor is contiguous. 1772 */ 1773 old = LEFT; 1774 temp = PREV.br_blockcount - new->br_blockcount; 1775 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1776 startblockval(PREV.br_startblock)); 1777 1778 LEFT.br_blockcount += new->br_blockcount; 1779 1780 PREV.br_blockcount = temp; 1781 PREV.br_startoff += new->br_blockcount; 1782 PREV.br_startblock = nullstartblock(da_new); 1783 1784 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1785 xfs_iext_prev(ifp, &bma->icur); 1786 xfs_iext_update_extent(bma->ip, state, &bma->icur, &LEFT); 1787 1788 if (bma->cur == NULL) 1789 rval = XFS_ILOG_DEXT; 1790 else { 1791 rval = 0; 1792 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1793 if (error) 1794 goto done; 1795 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1796 error = xfs_bmbt_update(bma->cur, &LEFT); 1797 if (error) 1798 goto done; 1799 } 1800 break; 1801 1802 case BMAP_LEFT_FILLING: 1803 /* 1804 * Filling in the first part of a previous delayed allocation. 1805 * The left neighbor is not contiguous. 1806 */ 1807 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1808 (*nextents)++; 1809 if (bma->cur == NULL) 1810 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1811 else { 1812 rval = XFS_ILOG_CORE; 1813 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1814 if (error) 1815 goto done; 1816 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1817 error = xfs_btree_insert(bma->cur, &i); 1818 if (error) 1819 goto done; 1820 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1821 } 1822 1823 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1824 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1825 bma->firstblock, bma->dfops, 1826 &bma->cur, 1, &tmp_rval, whichfork); 1827 rval |= tmp_rval; 1828 if (error) 1829 goto done; 1830 } 1831 1832 temp = PREV.br_blockcount - new->br_blockcount; 1833 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1834 startblockval(PREV.br_startblock) - 1835 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 1836 1837 PREV.br_startoff = new_endoff; 1838 PREV.br_blockcount = temp; 1839 PREV.br_startblock = nullstartblock(da_new); 1840 xfs_iext_next(ifp, &bma->icur); 1841 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1842 xfs_iext_prev(ifp, &bma->icur); 1843 break; 1844 1845 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 1846 /* 1847 * Filling in the last part of a previous delayed allocation. 1848 * The right neighbor is contiguous with the new allocation. 1849 */ 1850 old = RIGHT; 1851 RIGHT.br_startoff = new->br_startoff; 1852 RIGHT.br_startblock = new->br_startblock; 1853 RIGHT.br_blockcount += new->br_blockcount; 1854 1855 if (bma->cur == NULL) 1856 rval = XFS_ILOG_DEXT; 1857 else { 1858 rval = 0; 1859 error = xfs_bmbt_lookup_eq(bma->cur, &old, &i); 1860 if (error) 1861 goto done; 1862 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1863 error = xfs_bmbt_update(bma->cur, &RIGHT); 1864 if (error) 1865 goto done; 1866 } 1867 1868 temp = PREV.br_blockcount - new->br_blockcount; 1869 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1870 startblockval(PREV.br_startblock)); 1871 1872 PREV.br_blockcount = temp; 1873 PREV.br_startblock = nullstartblock(da_new); 1874 1875 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1876 xfs_iext_next(ifp, &bma->icur); 1877 xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT); 1878 break; 1879 1880 case BMAP_RIGHT_FILLING: 1881 /* 1882 * Filling in the last part of a previous delayed allocation. 1883 * The right neighbor is not contiguous. 1884 */ 1885 xfs_iext_update_extent(bma->ip, state, &bma->icur, new); 1886 (*nextents)++; 1887 if (bma->cur == NULL) 1888 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1889 else { 1890 rval = XFS_ILOG_CORE; 1891 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1892 if (error) 1893 goto done; 1894 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1895 error = xfs_btree_insert(bma->cur, &i); 1896 if (error) 1897 goto done; 1898 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1899 } 1900 1901 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1902 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1903 bma->firstblock, bma->dfops, &bma->cur, 1, 1904 &tmp_rval, whichfork); 1905 rval |= tmp_rval; 1906 if (error) 1907 goto done; 1908 } 1909 1910 temp = PREV.br_blockcount - new->br_blockcount; 1911 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), 1912 startblockval(PREV.br_startblock) - 1913 (bma->cur ? bma->cur->bc_private.b.allocated : 0)); 1914 1915 PREV.br_startblock = nullstartblock(da_new); 1916 PREV.br_blockcount = temp; 1917 xfs_iext_insert(bma->ip, &bma->icur, &PREV, state); 1918 xfs_iext_next(ifp, &bma->icur); 1919 break; 1920 1921 case 0: 1922 /* 1923 * Filling in the middle part of a previous delayed allocation. 1924 * Contiguity is impossible here. 1925 * This case is avoided almost all the time. 1926 * 1927 * We start with a delayed allocation: 1928 * 1929 * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+ 1930 * PREV @ idx 1931 * 1932 * and we are allocating: 1933 * +rrrrrrrrrrrrrrrrr+ 1934 * new 1935 * 1936 * and we set it up for insertion as: 1937 * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+ 1938 * new 1939 * PREV @ idx LEFT RIGHT 1940 * inserted at idx + 1 1941 */ 1942 old = PREV; 1943 1944 /* LEFT is the new middle */ 1945 LEFT = *new; 1946 1947 /* RIGHT is the new right */ 1948 RIGHT.br_state = PREV.br_state; 1949 RIGHT.br_startoff = new_endoff; 1950 RIGHT.br_blockcount = 1951 PREV.br_startoff + PREV.br_blockcount - new_endoff; 1952 RIGHT.br_startblock = 1953 nullstartblock(xfs_bmap_worst_indlen(bma->ip, 1954 RIGHT.br_blockcount)); 1955 1956 /* truncate PREV */ 1957 PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 1958 PREV.br_startblock = 1959 nullstartblock(xfs_bmap_worst_indlen(bma->ip, 1960 PREV.br_blockcount)); 1961 xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV); 1962 1963 xfs_iext_next(ifp, &bma->icur); 1964 xfs_iext_insert(bma->ip, &bma->icur, &RIGHT, state); 1965 xfs_iext_insert(bma->ip, &bma->icur, &LEFT, state); 1966 (*nextents)++; 1967 1968 if (bma->cur == NULL) 1969 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 1970 else { 1971 rval = XFS_ILOG_CORE; 1972 error = xfs_bmbt_lookup_eq(bma->cur, new, &i); 1973 if (error) 1974 goto done; 1975 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 1976 error = xfs_btree_insert(bma->cur, &i); 1977 if (error) 1978 goto done; 1979 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 1980 } 1981 1982 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 1983 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 1984 bma->firstblock, bma->dfops, &bma->cur, 1985 1, &tmp_rval, whichfork); 1986 rval |= tmp_rval; 1987 if (error) 1988 goto done; 1989 } 1990 1991 da_new = startblockval(PREV.br_startblock) + 1992 startblockval(RIGHT.br_startblock); 1993 break; 1994 1995 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1996 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 1997 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: 1998 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 1999 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2000 case BMAP_LEFT_CONTIG: 2001 case BMAP_RIGHT_CONTIG: 2002 /* 2003 * These cases are all impossible. 2004 */ 2005 ASSERT(0); 2006 } 2007 2008 /* add reverse mapping unless caller opted out */ 2009 if (!(bma->flags & XFS_BMAPI_NORMAP)) { 2010 error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, 2011 whichfork, new); 2012 if (error) 2013 goto done; 2014 } 2015 2016 /* convert to a btree if necessary */ 2017 if (xfs_bmap_needs_btree(bma->ip, whichfork)) { 2018 int tmp_logflags; /* partial log flag return val */ 2019 2020 ASSERT(bma->cur == NULL); 2021 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2022 bma->firstblock, bma->dfops, &bma->cur, 2023 da_old > 0, &tmp_logflags, whichfork); 2024 bma->logflags |= tmp_logflags; 2025 if (error) 2026 goto done; 2027 } 2028 2029 if (bma->cur) { 2030 da_new += bma->cur->bc_private.b.allocated; 2031 bma->cur->bc_private.b.allocated = 0; 2032 } 2033 2034 /* adjust for changes in reserved delayed indirect blocks */ 2035 if (da_new != da_old) { 2036 ASSERT(state == 0 || da_new < da_old); 2037 error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), 2038 false); 2039 } 2040 2041 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork); 2042 done: 2043 if (whichfork != XFS_COW_FORK) 2044 bma->logflags |= rval; 2045 return error; 2046 #undef LEFT 2047 #undef RIGHT 2048 #undef PREV 2049 } 2050 2051 /* 2052 * Convert an unwritten allocation to a real allocation or vice versa. 2053 */ 2054 STATIC int /* error */ 2055 xfs_bmap_add_extent_unwritten_real( 2056 struct xfs_trans *tp, 2057 xfs_inode_t *ip, /* incore inode pointer */ 2058 int whichfork, 2059 struct xfs_iext_cursor *icur, 2060 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 2061 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2062 xfs_fsblock_t *first, /* pointer to firstblock variable */ 2063 struct xfs_defer_ops *dfops, /* list of extents to be freed */ 2064 int *logflagsp) /* inode logging flags */ 2065 { 2066 xfs_btree_cur_t *cur; /* btree cursor */ 2067 int error; /* error return value */ 2068 int i; /* temp state */ 2069 xfs_ifork_t *ifp; /* inode fork pointer */ 2070 xfs_fileoff_t new_endoff; /* end offset of new entry */ 2071 xfs_bmbt_irec_t r[3]; /* neighbor extent entries */ 2072 /* left is 0, right is 1, prev is 2 */ 2073 int rval=0; /* return value (logging flags) */ 2074 int state = xfs_bmap_fork_to_state(whichfork); 2075 struct xfs_mount *mp = ip->i_mount; 2076 struct xfs_bmbt_irec old; 2077 2078 *logflagsp = 0; 2079 2080 cur = *curp; 2081 ifp = XFS_IFORK_PTR(ip, whichfork); 2082 2083 ASSERT(!isnullstartblock(new->br_startblock)); 2084 2085 XFS_STATS_INC(mp, xs_add_exlist); 2086 2087 #define LEFT r[0] 2088 #define RIGHT r[1] 2089 #define PREV r[2] 2090 2091 /* 2092 * Set up a bunch of variables to make the tests simpler. 2093 */ 2094 error = 0; 2095 xfs_iext_get_extent(ifp, icur, &PREV); 2096 ASSERT(new->br_state != PREV.br_state); 2097 new_endoff = new->br_startoff + new->br_blockcount; 2098 ASSERT(PREV.br_startoff <= new->br_startoff); 2099 ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff); 2100 2101 /* 2102 * Set flags determining what part of the previous oldext allocation 2103 * extent is being replaced by a newext allocation. 2104 */ 2105 if (PREV.br_startoff == new->br_startoff) 2106 state |= BMAP_LEFT_FILLING; 2107 if (PREV.br_startoff + PREV.br_blockcount == new_endoff) 2108 state |= BMAP_RIGHT_FILLING; 2109 2110 /* 2111 * Check and set flags if this segment has a left neighbor. 2112 * Don't set contiguous if the combined extent would be too large. 2113 */ 2114 if (xfs_iext_peek_prev_extent(ifp, icur, &LEFT)) { 2115 state |= BMAP_LEFT_VALID; 2116 if (isnullstartblock(LEFT.br_startblock)) 2117 state |= BMAP_LEFT_DELAY; 2118 } 2119 2120 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2121 LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff && 2122 LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock && 2123 LEFT.br_state == new->br_state && 2124 LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN) 2125 state |= BMAP_LEFT_CONTIG; 2126 2127 /* 2128 * Check and set flags if this segment has a right neighbor. 2129 * Don't set contiguous if the combined extent would be too large. 2130 * Also check for all-three-contiguous being too large. 2131 */ 2132 if (xfs_iext_peek_next_extent(ifp, icur, &RIGHT)) { 2133 state |= BMAP_RIGHT_VALID; 2134 if (isnullstartblock(RIGHT.br_startblock)) 2135 state |= BMAP_RIGHT_DELAY; 2136 } 2137 2138 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2139 new_endoff == RIGHT.br_startoff && 2140 new->br_startblock + new->br_blockcount == RIGHT.br_startblock && 2141 new->br_state == RIGHT.br_state && 2142 new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN && 2143 ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2144 BMAP_RIGHT_FILLING)) != 2145 (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING | 2146 BMAP_RIGHT_FILLING) || 2147 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount 2148 <= MAXEXTLEN)) 2149 state |= BMAP_RIGHT_CONTIG; 2150 2151 /* 2152 * Switch out based on the FILLING and CONTIG state bits. 2153 */ 2154 switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 2155 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) { 2156 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | 2157 BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2158 /* 2159 * Setting all of a previous oldext extent to newext. 2160 * The left and right neighbors are both contiguous with new. 2161 */ 2162 LEFT.br_blockcount += PREV.br_blockcount + RIGHT.br_blockcount; 2163 2164 xfs_iext_remove(ip, icur, state); 2165 xfs_iext_remove(ip, icur, state); 2166 xfs_iext_prev(ifp, icur); 2167 xfs_iext_update_extent(ip, state, icur, &LEFT); 2168 XFS_IFORK_NEXT_SET(ip, whichfork, 2169 XFS_IFORK_NEXTENTS(ip, whichfork) - 2); 2170 if (cur == NULL) 2171 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2172 else { 2173 rval = XFS_ILOG_CORE; 2174 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2175 if (error) 2176 goto done; 2177 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2178 if ((error = xfs_btree_delete(cur, &i))) 2179 goto done; 2180 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2181 if ((error = xfs_btree_decrement(cur, 0, &i))) 2182 goto done; 2183 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2184 if ((error = xfs_btree_delete(cur, &i))) 2185 goto done; 2186 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2187 if ((error = xfs_btree_decrement(cur, 0, &i))) 2188 goto done; 2189 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2190 error = xfs_bmbt_update(cur, &LEFT); 2191 if (error) 2192 goto done; 2193 } 2194 break; 2195 2196 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 2197 /* 2198 * Setting all of a previous oldext extent to newext. 2199 * The left neighbor is contiguous, the right is not. 2200 */ 2201 LEFT.br_blockcount += PREV.br_blockcount; 2202 2203 xfs_iext_remove(ip, icur, state); 2204 xfs_iext_prev(ifp, icur); 2205 xfs_iext_update_extent(ip, state, icur, &LEFT); 2206 XFS_IFORK_NEXT_SET(ip, whichfork, 2207 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2208 if (cur == NULL) 2209 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2210 else { 2211 rval = XFS_ILOG_CORE; 2212 error = xfs_bmbt_lookup_eq(cur, &PREV, &i); 2213 if (error) 2214 goto done; 2215 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2216 if ((error = xfs_btree_delete(cur, &i))) 2217 goto done; 2218 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2219 if ((error = xfs_btree_decrement(cur, 0, &i))) 2220 goto done; 2221 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2222 error = xfs_bmbt_update(cur, &LEFT); 2223 if (error) 2224 goto done; 2225 } 2226 break; 2227 2228 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2229 /* 2230 * Setting all of a previous oldext extent to newext. 2231 * The right neighbor is contiguous, the left is not. 2232 */ 2233 PREV.br_blockcount += RIGHT.br_blockcount; 2234 PREV.br_state = new->br_state; 2235 2236 xfs_iext_next(ifp, icur); 2237 xfs_iext_remove(ip, icur, state); 2238 xfs_iext_prev(ifp, icur); 2239 xfs_iext_update_extent(ip, state, icur, &PREV); 2240 2241 XFS_IFORK_NEXT_SET(ip, whichfork, 2242 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2243 if (cur == NULL) 2244 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2245 else { 2246 rval = XFS_ILOG_CORE; 2247 error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i); 2248 if (error) 2249 goto done; 2250 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2251 if ((error = xfs_btree_delete(cur, &i))) 2252 goto done; 2253 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2254 if ((error = xfs_btree_decrement(cur, 0, &i))) 2255 goto done; 2256 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2257 error = xfs_bmbt_update(cur, &PREV); 2258 if (error) 2259 goto done; 2260 } 2261 break; 2262 2263 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 2264 /* 2265 * Setting all of a previous oldext extent to newext. 2266 * Neither the left nor right neighbors are contiguous with 2267 * the new one. 2268 */ 2269 PREV.br_state = new->br_state; 2270 xfs_iext_update_extent(ip, state, icur, &PREV); 2271 2272 if (cur == NULL) 2273 rval = XFS_ILOG_DEXT; 2274 else { 2275 rval = 0; 2276 error = xfs_bmbt_lookup_eq(cur, new, &i); 2277 if (error) 2278 goto done; 2279 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2280 error = xfs_bmbt_update(cur, &PREV); 2281 if (error) 2282 goto done; 2283 } 2284 break; 2285 2286 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG: 2287 /* 2288 * Setting the first part of a previous oldext extent to newext. 2289 * The left neighbor is contiguous. 2290 */ 2291 LEFT.br_blockcount += new->br_blockcount; 2292 2293 old = PREV; 2294 PREV.br_startoff += new->br_blockcount; 2295 PREV.br_startblock += new->br_blockcount; 2296 PREV.br_blockcount -= new->br_blockcount; 2297 2298 xfs_iext_update_extent(ip, state, icur, &PREV); 2299 xfs_iext_prev(ifp, icur); 2300 xfs_iext_update_extent(ip, state, icur, &LEFT); 2301 2302 if (cur == NULL) 2303 rval = XFS_ILOG_DEXT; 2304 else { 2305 rval = 0; 2306 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2307 if (error) 2308 goto done; 2309 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2310 error = xfs_bmbt_update(cur, &PREV); 2311 if (error) 2312 goto done; 2313 error = xfs_btree_decrement(cur, 0, &i); 2314 if (error) 2315 goto done; 2316 error = xfs_bmbt_update(cur, &LEFT); 2317 if (error) 2318 goto done; 2319 } 2320 break; 2321 2322 case BMAP_LEFT_FILLING: 2323 /* 2324 * Setting the first part of a previous oldext extent to newext. 2325 * The left neighbor is not contiguous. 2326 */ 2327 old = PREV; 2328 PREV.br_startoff += new->br_blockcount; 2329 PREV.br_startblock += new->br_blockcount; 2330 PREV.br_blockcount -= new->br_blockcount; 2331 2332 xfs_iext_update_extent(ip, state, icur, &PREV); 2333 xfs_iext_insert(ip, icur, new, state); 2334 XFS_IFORK_NEXT_SET(ip, whichfork, 2335 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2336 if (cur == NULL) 2337 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2338 else { 2339 rval = XFS_ILOG_CORE; 2340 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2341 if (error) 2342 goto done; 2343 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2344 error = xfs_bmbt_update(cur, &PREV); 2345 if (error) 2346 goto done; 2347 cur->bc_rec.b = *new; 2348 if ((error = xfs_btree_insert(cur, &i))) 2349 goto done; 2350 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2351 } 2352 break; 2353 2354 case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG: 2355 /* 2356 * Setting the last part of a previous oldext extent to newext. 2357 * The right neighbor is contiguous with the new allocation. 2358 */ 2359 old = PREV; 2360 PREV.br_blockcount -= new->br_blockcount; 2361 2362 RIGHT.br_startoff = new->br_startoff; 2363 RIGHT.br_startblock = new->br_startblock; 2364 RIGHT.br_blockcount += new->br_blockcount; 2365 2366 xfs_iext_update_extent(ip, state, icur, &PREV); 2367 xfs_iext_next(ifp, icur); 2368 xfs_iext_update_extent(ip, state, icur, &RIGHT); 2369 2370 if (cur == NULL) 2371 rval = XFS_ILOG_DEXT; 2372 else { 2373 rval = 0; 2374 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2375 if (error) 2376 goto done; 2377 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2378 error = xfs_bmbt_update(cur, &PREV); 2379 if (error) 2380 goto done; 2381 error = xfs_btree_increment(cur, 0, &i); 2382 if (error) 2383 goto done; 2384 error = xfs_bmbt_update(cur, &RIGHT); 2385 if (error) 2386 goto done; 2387 } 2388 break; 2389 2390 case BMAP_RIGHT_FILLING: 2391 /* 2392 * Setting the last part of a previous oldext extent to newext. 2393 * The right neighbor is not contiguous. 2394 */ 2395 old = PREV; 2396 PREV.br_blockcount -= new->br_blockcount; 2397 2398 xfs_iext_update_extent(ip, state, icur, &PREV); 2399 xfs_iext_next(ifp, icur); 2400 xfs_iext_insert(ip, icur, new, state); 2401 2402 XFS_IFORK_NEXT_SET(ip, whichfork, 2403 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2404 if (cur == NULL) 2405 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2406 else { 2407 rval = XFS_ILOG_CORE; 2408 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2409 if (error) 2410 goto done; 2411 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2412 error = xfs_bmbt_update(cur, &PREV); 2413 if (error) 2414 goto done; 2415 error = xfs_bmbt_lookup_eq(cur, new, &i); 2416 if (error) 2417 goto done; 2418 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2419 if ((error = xfs_btree_insert(cur, &i))) 2420 goto done; 2421 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2422 } 2423 break; 2424 2425 case 0: 2426 /* 2427 * Setting the middle part of a previous oldext extent to 2428 * newext. Contiguity is impossible here. 2429 * One extent becomes three extents. 2430 */ 2431 old = PREV; 2432 PREV.br_blockcount = new->br_startoff - PREV.br_startoff; 2433 2434 r[0] = *new; 2435 r[1].br_startoff = new_endoff; 2436 r[1].br_blockcount = 2437 old.br_startoff + old.br_blockcount - new_endoff; 2438 r[1].br_startblock = new->br_startblock + new->br_blockcount; 2439 r[1].br_state = PREV.br_state; 2440 2441 xfs_iext_update_extent(ip, state, icur, &PREV); 2442 xfs_iext_next(ifp, icur); 2443 xfs_iext_insert(ip, icur, &r[1], state); 2444 xfs_iext_insert(ip, icur, &r[0], state); 2445 2446 XFS_IFORK_NEXT_SET(ip, whichfork, 2447 XFS_IFORK_NEXTENTS(ip, whichfork) + 2); 2448 if (cur == NULL) 2449 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2450 else { 2451 rval = XFS_ILOG_CORE; 2452 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2453 if (error) 2454 goto done; 2455 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2456 /* new right extent - oldext */ 2457 error = xfs_bmbt_update(cur, &r[1]); 2458 if (error) 2459 goto done; 2460 /* new left extent - oldext */ 2461 cur->bc_rec.b = PREV; 2462 if ((error = xfs_btree_insert(cur, &i))) 2463 goto done; 2464 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2465 /* 2466 * Reset the cursor to the position of the new extent 2467 * we are about to insert as we can't trust it after 2468 * the previous insert. 2469 */ 2470 error = xfs_bmbt_lookup_eq(cur, new, &i); 2471 if (error) 2472 goto done; 2473 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2474 /* new middle extent - newext */ 2475 if ((error = xfs_btree_insert(cur, &i))) 2476 goto done; 2477 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2478 } 2479 break; 2480 2481 case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2482 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2483 case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG: 2484 case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG: 2485 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2486 case BMAP_LEFT_CONTIG: 2487 case BMAP_RIGHT_CONTIG: 2488 /* 2489 * These cases are all impossible. 2490 */ 2491 ASSERT(0); 2492 } 2493 2494 /* update reverse mappings */ 2495 error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new); 2496 if (error) 2497 goto done; 2498 2499 /* convert to a btree if necessary */ 2500 if (xfs_bmap_needs_btree(ip, whichfork)) { 2501 int tmp_logflags; /* partial log flag return val */ 2502 2503 ASSERT(cur == NULL); 2504 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, 2505 0, &tmp_logflags, whichfork); 2506 *logflagsp |= tmp_logflags; 2507 if (error) 2508 goto done; 2509 } 2510 2511 /* clear out the allocated field, done with it now in any case. */ 2512 if (cur) { 2513 cur->bc_private.b.allocated = 0; 2514 *curp = cur; 2515 } 2516 2517 xfs_bmap_check_leaf_extents(*curp, ip, whichfork); 2518 done: 2519 *logflagsp |= rval; 2520 return error; 2521 #undef LEFT 2522 #undef RIGHT 2523 #undef PREV 2524 } 2525 2526 /* 2527 * Convert a hole to a delayed allocation. 2528 */ 2529 STATIC void 2530 xfs_bmap_add_extent_hole_delay( 2531 xfs_inode_t *ip, /* incore inode pointer */ 2532 int whichfork, 2533 struct xfs_iext_cursor *icur, 2534 xfs_bmbt_irec_t *new) /* new data to add to file extents */ 2535 { 2536 xfs_ifork_t *ifp; /* inode fork pointer */ 2537 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2538 xfs_filblks_t newlen=0; /* new indirect size */ 2539 xfs_filblks_t oldlen=0; /* old indirect size */ 2540 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2541 int state = xfs_bmap_fork_to_state(whichfork); 2542 xfs_filblks_t temp; /* temp for indirect calculations */ 2543 2544 ifp = XFS_IFORK_PTR(ip, whichfork); 2545 ASSERT(isnullstartblock(new->br_startblock)); 2546 2547 /* 2548 * Check and set flags if this segment has a left neighbor 2549 */ 2550 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2551 state |= BMAP_LEFT_VALID; 2552 if (isnullstartblock(left.br_startblock)) 2553 state |= BMAP_LEFT_DELAY; 2554 } 2555 2556 /* 2557 * Check and set flags if the current (right) segment exists. 2558 * If it doesn't exist, we're converting the hole at end-of-file. 2559 */ 2560 if (xfs_iext_get_extent(ifp, icur, &right)) { 2561 state |= BMAP_RIGHT_VALID; 2562 if (isnullstartblock(right.br_startblock)) 2563 state |= BMAP_RIGHT_DELAY; 2564 } 2565 2566 /* 2567 * Set contiguity flags on the left and right neighbors. 2568 * Don't let extents get too large, even if the pieces are contiguous. 2569 */ 2570 if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) && 2571 left.br_startoff + left.br_blockcount == new->br_startoff && 2572 left.br_blockcount + new->br_blockcount <= MAXEXTLEN) 2573 state |= BMAP_LEFT_CONTIG; 2574 2575 if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) && 2576 new->br_startoff + new->br_blockcount == right.br_startoff && 2577 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 2578 (!(state & BMAP_LEFT_CONTIG) || 2579 (left.br_blockcount + new->br_blockcount + 2580 right.br_blockcount <= MAXEXTLEN))) 2581 state |= BMAP_RIGHT_CONTIG; 2582 2583 /* 2584 * Switch out based on the contiguity flags. 2585 */ 2586 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 2587 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2588 /* 2589 * New allocation is contiguous with delayed allocations 2590 * on the left and on the right. 2591 * Merge all three into a single extent record. 2592 */ 2593 temp = left.br_blockcount + new->br_blockcount + 2594 right.br_blockcount; 2595 2596 oldlen = startblockval(left.br_startblock) + 2597 startblockval(new->br_startblock) + 2598 startblockval(right.br_startblock); 2599 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2600 oldlen); 2601 left.br_startblock = nullstartblock(newlen); 2602 left.br_blockcount = temp; 2603 2604 xfs_iext_remove(ip, icur, state); 2605 xfs_iext_prev(ifp, icur); 2606 xfs_iext_update_extent(ip, state, icur, &left); 2607 break; 2608 2609 case BMAP_LEFT_CONTIG: 2610 /* 2611 * New allocation is contiguous with a delayed allocation 2612 * on the left. 2613 * Merge the new allocation with the left neighbor. 2614 */ 2615 temp = left.br_blockcount + new->br_blockcount; 2616 2617 oldlen = startblockval(left.br_startblock) + 2618 startblockval(new->br_startblock); 2619 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2620 oldlen); 2621 left.br_blockcount = temp; 2622 left.br_startblock = nullstartblock(newlen); 2623 2624 xfs_iext_prev(ifp, icur); 2625 xfs_iext_update_extent(ip, state, icur, &left); 2626 break; 2627 2628 case BMAP_RIGHT_CONTIG: 2629 /* 2630 * New allocation is contiguous with a delayed allocation 2631 * on the right. 2632 * Merge the new allocation with the right neighbor. 2633 */ 2634 temp = new->br_blockcount + right.br_blockcount; 2635 oldlen = startblockval(new->br_startblock) + 2636 startblockval(right.br_startblock); 2637 newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), 2638 oldlen); 2639 right.br_startoff = new->br_startoff; 2640 right.br_startblock = nullstartblock(newlen); 2641 right.br_blockcount = temp; 2642 xfs_iext_update_extent(ip, state, icur, &right); 2643 break; 2644 2645 case 0: 2646 /* 2647 * New allocation is not contiguous with another 2648 * delayed allocation. 2649 * Insert a new entry. 2650 */ 2651 oldlen = newlen = 0; 2652 xfs_iext_insert(ip, icur, new, state); 2653 break; 2654 } 2655 if (oldlen != newlen) { 2656 ASSERT(oldlen > newlen); 2657 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen), 2658 false); 2659 /* 2660 * Nothing to do for disk quota accounting here. 2661 */ 2662 } 2663 } 2664 2665 /* 2666 * Convert a hole to a real allocation. 2667 */ 2668 STATIC int /* error */ 2669 xfs_bmap_add_extent_hole_real( 2670 struct xfs_trans *tp, 2671 struct xfs_inode *ip, 2672 int whichfork, 2673 struct xfs_iext_cursor *icur, 2674 struct xfs_btree_cur **curp, 2675 struct xfs_bmbt_irec *new, 2676 xfs_fsblock_t *first, 2677 struct xfs_defer_ops *dfops, 2678 int *logflagsp, 2679 int flags) 2680 { 2681 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 2682 struct xfs_mount *mp = ip->i_mount; 2683 struct xfs_btree_cur *cur = *curp; 2684 int error; /* error return value */ 2685 int i; /* temp state */ 2686 xfs_bmbt_irec_t left; /* left neighbor extent entry */ 2687 xfs_bmbt_irec_t right; /* right neighbor extent entry */ 2688 int rval=0; /* return value (logging flags) */ 2689 int state = xfs_bmap_fork_to_state(whichfork); 2690 struct xfs_bmbt_irec old; 2691 2692 ASSERT(!isnullstartblock(new->br_startblock)); 2693 ASSERT(!cur || !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 2694 2695 XFS_STATS_INC(mp, xs_add_exlist); 2696 2697 /* 2698 * Check and set flags if this segment has a left neighbor. 2699 */ 2700 if (xfs_iext_peek_prev_extent(ifp, icur, &left)) { 2701 state |= BMAP_LEFT_VALID; 2702 if (isnullstartblock(left.br_startblock)) 2703 state |= BMAP_LEFT_DELAY; 2704 } 2705 2706 /* 2707 * Check and set flags if this segment has a current value. 2708 * Not true if we're inserting into the "hole" at eof. 2709 */ 2710 if (xfs_iext_get_extent(ifp, icur, &right)) { 2711 state |= BMAP_RIGHT_VALID; 2712 if (isnullstartblock(right.br_startblock)) 2713 state |= BMAP_RIGHT_DELAY; 2714 } 2715 2716 /* 2717 * We're inserting a real allocation between "left" and "right". 2718 * Set the contiguity flags. Don't let extents get too large. 2719 */ 2720 if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) && 2721 left.br_startoff + left.br_blockcount == new->br_startoff && 2722 left.br_startblock + left.br_blockcount == new->br_startblock && 2723 left.br_state == new->br_state && 2724 left.br_blockcount + new->br_blockcount <= MAXEXTLEN) 2725 state |= BMAP_LEFT_CONTIG; 2726 2727 if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) && 2728 new->br_startoff + new->br_blockcount == right.br_startoff && 2729 new->br_startblock + new->br_blockcount == right.br_startblock && 2730 new->br_state == right.br_state && 2731 new->br_blockcount + right.br_blockcount <= MAXEXTLEN && 2732 (!(state & BMAP_LEFT_CONTIG) || 2733 left.br_blockcount + new->br_blockcount + 2734 right.br_blockcount <= MAXEXTLEN)) 2735 state |= BMAP_RIGHT_CONTIG; 2736 2737 error = 0; 2738 /* 2739 * Select which case we're in here, and implement it. 2740 */ 2741 switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { 2742 case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: 2743 /* 2744 * New allocation is contiguous with real allocations on the 2745 * left and on the right. 2746 * Merge all three into a single extent record. 2747 */ 2748 left.br_blockcount += new->br_blockcount + right.br_blockcount; 2749 2750 xfs_iext_remove(ip, icur, state); 2751 xfs_iext_prev(ifp, icur); 2752 xfs_iext_update_extent(ip, state, icur, &left); 2753 2754 XFS_IFORK_NEXT_SET(ip, whichfork, 2755 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 2756 if (cur == NULL) { 2757 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2758 } else { 2759 rval = XFS_ILOG_CORE; 2760 error = xfs_bmbt_lookup_eq(cur, &right, &i); 2761 if (error) 2762 goto done; 2763 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2764 error = xfs_btree_delete(cur, &i); 2765 if (error) 2766 goto done; 2767 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2768 error = xfs_btree_decrement(cur, 0, &i); 2769 if (error) 2770 goto done; 2771 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2772 error = xfs_bmbt_update(cur, &left); 2773 if (error) 2774 goto done; 2775 } 2776 break; 2777 2778 case BMAP_LEFT_CONTIG: 2779 /* 2780 * New allocation is contiguous with a real allocation 2781 * on the left. 2782 * Merge the new allocation with the left neighbor. 2783 */ 2784 old = left; 2785 left.br_blockcount += new->br_blockcount; 2786 2787 xfs_iext_prev(ifp, icur); 2788 xfs_iext_update_extent(ip, state, icur, &left); 2789 2790 if (cur == NULL) { 2791 rval = xfs_ilog_fext(whichfork); 2792 } else { 2793 rval = 0; 2794 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2795 if (error) 2796 goto done; 2797 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2798 error = xfs_bmbt_update(cur, &left); 2799 if (error) 2800 goto done; 2801 } 2802 break; 2803 2804 case BMAP_RIGHT_CONTIG: 2805 /* 2806 * New allocation is contiguous with a real allocation 2807 * on the right. 2808 * Merge the new allocation with the right neighbor. 2809 */ 2810 old = right; 2811 2812 right.br_startoff = new->br_startoff; 2813 right.br_startblock = new->br_startblock; 2814 right.br_blockcount += new->br_blockcount; 2815 xfs_iext_update_extent(ip, state, icur, &right); 2816 2817 if (cur == NULL) { 2818 rval = xfs_ilog_fext(whichfork); 2819 } else { 2820 rval = 0; 2821 error = xfs_bmbt_lookup_eq(cur, &old, &i); 2822 if (error) 2823 goto done; 2824 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2825 error = xfs_bmbt_update(cur, &right); 2826 if (error) 2827 goto done; 2828 } 2829 break; 2830 2831 case 0: 2832 /* 2833 * New allocation is not contiguous with another 2834 * real allocation. 2835 * Insert a new entry. 2836 */ 2837 xfs_iext_insert(ip, icur, new, state); 2838 XFS_IFORK_NEXT_SET(ip, whichfork, 2839 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 2840 if (cur == NULL) { 2841 rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork); 2842 } else { 2843 rval = XFS_ILOG_CORE; 2844 error = xfs_bmbt_lookup_eq(cur, new, &i); 2845 if (error) 2846 goto done; 2847 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done); 2848 error = xfs_btree_insert(cur, &i); 2849 if (error) 2850 goto done; 2851 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2852 } 2853 break; 2854 } 2855 2856 /* add reverse mapping unless caller opted out */ 2857 if (!(flags & XFS_BMAPI_NORMAP)) { 2858 error = xfs_rmap_map_extent(mp, dfops, ip, whichfork, new); 2859 if (error) 2860 goto done; 2861 } 2862 2863 /* convert to a btree if necessary */ 2864 if (xfs_bmap_needs_btree(ip, whichfork)) { 2865 int tmp_logflags; /* partial log flag return val */ 2866 2867 ASSERT(cur == NULL); 2868 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, curp, 2869 0, &tmp_logflags, whichfork); 2870 *logflagsp |= tmp_logflags; 2871 cur = *curp; 2872 if (error) 2873 goto done; 2874 } 2875 2876 /* clear out the allocated field, done with it now in any case. */ 2877 if (cur) 2878 cur->bc_private.b.allocated = 0; 2879 2880 xfs_bmap_check_leaf_extents(cur, ip, whichfork); 2881 done: 2882 *logflagsp |= rval; 2883 return error; 2884 } 2885 2886 /* 2887 * Functions used in the extent read, allocate and remove paths 2888 */ 2889 2890 /* 2891 * Adjust the size of the new extent based on di_extsize and rt extsize. 2892 */ 2893 int 2894 xfs_bmap_extsize_align( 2895 xfs_mount_t *mp, 2896 xfs_bmbt_irec_t *gotp, /* next extent pointer */ 2897 xfs_bmbt_irec_t *prevp, /* previous extent pointer */ 2898 xfs_extlen_t extsz, /* align to this extent size */ 2899 int rt, /* is this a realtime inode? */ 2900 int eof, /* is extent at end-of-file? */ 2901 int delay, /* creating delalloc extent? */ 2902 int convert, /* overwriting unwritten extent? */ 2903 xfs_fileoff_t *offp, /* in/out: aligned offset */ 2904 xfs_extlen_t *lenp) /* in/out: aligned length */ 2905 { 2906 xfs_fileoff_t orig_off; /* original offset */ 2907 xfs_extlen_t orig_alen; /* original length */ 2908 xfs_fileoff_t orig_end; /* original off+len */ 2909 xfs_fileoff_t nexto; /* next file offset */ 2910 xfs_fileoff_t prevo; /* previous file offset */ 2911 xfs_fileoff_t align_off; /* temp for offset */ 2912 xfs_extlen_t align_alen; /* temp for length */ 2913 xfs_extlen_t temp; /* temp for calculations */ 2914 2915 if (convert) 2916 return 0; 2917 2918 orig_off = align_off = *offp; 2919 orig_alen = align_alen = *lenp; 2920 orig_end = orig_off + orig_alen; 2921 2922 /* 2923 * If this request overlaps an existing extent, then don't 2924 * attempt to perform any additional alignment. 2925 */ 2926 if (!delay && !eof && 2927 (orig_off >= gotp->br_startoff) && 2928 (orig_end <= gotp->br_startoff + gotp->br_blockcount)) { 2929 return 0; 2930 } 2931 2932 /* 2933 * If the file offset is unaligned vs. the extent size 2934 * we need to align it. This will be possible unless 2935 * the file was previously written with a kernel that didn't 2936 * perform this alignment, or if a truncate shot us in the 2937 * foot. 2938 */ 2939 temp = do_mod(orig_off, extsz); 2940 if (temp) { 2941 align_alen += temp; 2942 align_off -= temp; 2943 } 2944 2945 /* Same adjustment for the end of the requested area. */ 2946 temp = (align_alen % extsz); 2947 if (temp) 2948 align_alen += extsz - temp; 2949 2950 /* 2951 * For large extent hint sizes, the aligned extent might be larger than 2952 * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls 2953 * the length back under MAXEXTLEN. The outer allocation loops handle 2954 * short allocation just fine, so it is safe to do this. We only want to 2955 * do it when we are forced to, though, because it means more allocation 2956 * operations are required. 2957 */ 2958 while (align_alen > MAXEXTLEN) 2959 align_alen -= extsz; 2960 ASSERT(align_alen <= MAXEXTLEN); 2961 2962 /* 2963 * If the previous block overlaps with this proposed allocation 2964 * then move the start forward without adjusting the length. 2965 */ 2966 if (prevp->br_startoff != NULLFILEOFF) { 2967 if (prevp->br_startblock == HOLESTARTBLOCK) 2968 prevo = prevp->br_startoff; 2969 else 2970 prevo = prevp->br_startoff + prevp->br_blockcount; 2971 } else 2972 prevo = 0; 2973 if (align_off != orig_off && align_off < prevo) 2974 align_off = prevo; 2975 /* 2976 * If the next block overlaps with this proposed allocation 2977 * then move the start back without adjusting the length, 2978 * but not before offset 0. 2979 * This may of course make the start overlap previous block, 2980 * and if we hit the offset 0 limit then the next block 2981 * can still overlap too. 2982 */ 2983 if (!eof && gotp->br_startoff != NULLFILEOFF) { 2984 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) || 2985 (!delay && gotp->br_startblock == DELAYSTARTBLOCK)) 2986 nexto = gotp->br_startoff + gotp->br_blockcount; 2987 else 2988 nexto = gotp->br_startoff; 2989 } else 2990 nexto = NULLFILEOFF; 2991 if (!eof && 2992 align_off + align_alen != orig_end && 2993 align_off + align_alen > nexto) 2994 align_off = nexto > align_alen ? nexto - align_alen : 0; 2995 /* 2996 * If we're now overlapping the next or previous extent that 2997 * means we can't fit an extsz piece in this hole. Just move 2998 * the start forward to the first valid spot and set 2999 * the length so we hit the end. 3000 */ 3001 if (align_off != orig_off && align_off < prevo) 3002 align_off = prevo; 3003 if (align_off + align_alen != orig_end && 3004 align_off + align_alen > nexto && 3005 nexto != NULLFILEOFF) { 3006 ASSERT(nexto > prevo); 3007 align_alen = nexto - align_off; 3008 } 3009 3010 /* 3011 * If realtime, and the result isn't a multiple of the realtime 3012 * extent size we need to remove blocks until it is. 3013 */ 3014 if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) { 3015 /* 3016 * We're not covering the original request, or 3017 * we won't be able to once we fix the length. 3018 */ 3019 if (orig_off < align_off || 3020 orig_end > align_off + align_alen || 3021 align_alen - temp < orig_alen) 3022 return -EINVAL; 3023 /* 3024 * Try to fix it by moving the start up. 3025 */ 3026 if (align_off + temp <= orig_off) { 3027 align_alen -= temp; 3028 align_off += temp; 3029 } 3030 /* 3031 * Try to fix it by moving the end in. 3032 */ 3033 else if (align_off + align_alen - temp >= orig_end) 3034 align_alen -= temp; 3035 /* 3036 * Set the start to the minimum then trim the length. 3037 */ 3038 else { 3039 align_alen -= orig_off - align_off; 3040 align_off = orig_off; 3041 align_alen -= align_alen % mp->m_sb.sb_rextsize; 3042 } 3043 /* 3044 * Result doesn't cover the request, fail it. 3045 */ 3046 if (orig_off < align_off || orig_end > align_off + align_alen) 3047 return -EINVAL; 3048 } else { 3049 ASSERT(orig_off >= align_off); 3050 /* see MAXEXTLEN handling above */ 3051 ASSERT(orig_end <= align_off + align_alen || 3052 align_alen + extsz > MAXEXTLEN); 3053 } 3054 3055 #ifdef DEBUG 3056 if (!eof && gotp->br_startoff != NULLFILEOFF) 3057 ASSERT(align_off + align_alen <= gotp->br_startoff); 3058 if (prevp->br_startoff != NULLFILEOFF) 3059 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount); 3060 #endif 3061 3062 *lenp = align_alen; 3063 *offp = align_off; 3064 return 0; 3065 } 3066 3067 #define XFS_ALLOC_GAP_UNITS 4 3068 3069 void 3070 xfs_bmap_adjacent( 3071 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3072 { 3073 xfs_fsblock_t adjust; /* adjustment to block numbers */ 3074 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 3075 xfs_mount_t *mp; /* mount point structure */ 3076 int nullfb; /* true if ap->firstblock isn't set */ 3077 int rt; /* true if inode is realtime */ 3078 3079 #define ISVALID(x,y) \ 3080 (rt ? \ 3081 (x) < mp->m_sb.sb_rblocks : \ 3082 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \ 3083 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \ 3084 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) 3085 3086 mp = ap->ip->i_mount; 3087 nullfb = *ap->firstblock == NULLFSBLOCK; 3088 rt = XFS_IS_REALTIME_INODE(ap->ip) && 3089 xfs_alloc_is_userdata(ap->datatype); 3090 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); 3091 /* 3092 * If allocating at eof, and there's a previous real block, 3093 * try to use its last block as our starting point. 3094 */ 3095 if (ap->eof && ap->prev.br_startoff != NULLFILEOFF && 3096 !isnullstartblock(ap->prev.br_startblock) && 3097 ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount, 3098 ap->prev.br_startblock)) { 3099 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount; 3100 /* 3101 * Adjust for the gap between prevp and us. 3102 */ 3103 adjust = ap->offset - 3104 (ap->prev.br_startoff + ap->prev.br_blockcount); 3105 if (adjust && 3106 ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) 3107 ap->blkno += adjust; 3108 } 3109 /* 3110 * If not at eof, then compare the two neighbor blocks. 3111 * Figure out whether either one gives us a good starting point, 3112 * and pick the better one. 3113 */ 3114 else if (!ap->eof) { 3115 xfs_fsblock_t gotbno; /* right side block number */ 3116 xfs_fsblock_t gotdiff=0; /* right side difference */ 3117 xfs_fsblock_t prevbno; /* left side block number */ 3118 xfs_fsblock_t prevdiff=0; /* left side difference */ 3119 3120 /* 3121 * If there's a previous (left) block, select a requested 3122 * start block based on it. 3123 */ 3124 if (ap->prev.br_startoff != NULLFILEOFF && 3125 !isnullstartblock(ap->prev.br_startblock) && 3126 (prevbno = ap->prev.br_startblock + 3127 ap->prev.br_blockcount) && 3128 ISVALID(prevbno, ap->prev.br_startblock)) { 3129 /* 3130 * Calculate gap to end of previous block. 3131 */ 3132 adjust = prevdiff = ap->offset - 3133 (ap->prev.br_startoff + 3134 ap->prev.br_blockcount); 3135 /* 3136 * Figure the startblock based on the previous block's 3137 * end and the gap size. 3138 * Heuristic! 3139 * If the gap is large relative to the piece we're 3140 * allocating, or using it gives us an invalid block 3141 * number, then just use the end of the previous block. 3142 */ 3143 if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length && 3144 ISVALID(prevbno + prevdiff, 3145 ap->prev.br_startblock)) 3146 prevbno += adjust; 3147 else 3148 prevdiff += adjust; 3149 /* 3150 * If the firstblock forbids it, can't use it, 3151 * must use default. 3152 */ 3153 if (!rt && !nullfb && 3154 XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) 3155 prevbno = NULLFSBLOCK; 3156 } 3157 /* 3158 * No previous block or can't follow it, just default. 3159 */ 3160 else 3161 prevbno = NULLFSBLOCK; 3162 /* 3163 * If there's a following (right) block, select a requested 3164 * start block based on it. 3165 */ 3166 if (!isnullstartblock(ap->got.br_startblock)) { 3167 /* 3168 * Calculate gap to start of next block. 3169 */ 3170 adjust = gotdiff = ap->got.br_startoff - ap->offset; 3171 /* 3172 * Figure the startblock based on the next block's 3173 * start and the gap size. 3174 */ 3175 gotbno = ap->got.br_startblock; 3176 /* 3177 * Heuristic! 3178 * If the gap is large relative to the piece we're 3179 * allocating, or using it gives us an invalid block 3180 * number, then just use the start of the next block 3181 * offset by our length. 3182 */ 3183 if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length && 3184 ISVALID(gotbno - gotdiff, gotbno)) 3185 gotbno -= adjust; 3186 else if (ISVALID(gotbno - ap->length, gotbno)) { 3187 gotbno -= ap->length; 3188 gotdiff += adjust - ap->length; 3189 } else 3190 gotdiff += adjust; 3191 /* 3192 * If the firstblock forbids it, can't use it, 3193 * must use default. 3194 */ 3195 if (!rt && !nullfb && 3196 XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) 3197 gotbno = NULLFSBLOCK; 3198 } 3199 /* 3200 * No next block, just default. 3201 */ 3202 else 3203 gotbno = NULLFSBLOCK; 3204 /* 3205 * If both valid, pick the better one, else the only good 3206 * one, else ap->blkno is already set (to 0 or the inode block). 3207 */ 3208 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) 3209 ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; 3210 else if (prevbno != NULLFSBLOCK) 3211 ap->blkno = prevbno; 3212 else if (gotbno != NULLFSBLOCK) 3213 ap->blkno = gotbno; 3214 } 3215 #undef ISVALID 3216 } 3217 3218 static int 3219 xfs_bmap_longest_free_extent( 3220 struct xfs_trans *tp, 3221 xfs_agnumber_t ag, 3222 xfs_extlen_t *blen, 3223 int *notinit) 3224 { 3225 struct xfs_mount *mp = tp->t_mountp; 3226 struct xfs_perag *pag; 3227 xfs_extlen_t longest; 3228 int error = 0; 3229 3230 pag = xfs_perag_get(mp, ag); 3231 if (!pag->pagf_init) { 3232 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); 3233 if (error) 3234 goto out; 3235 3236 if (!pag->pagf_init) { 3237 *notinit = 1; 3238 goto out; 3239 } 3240 } 3241 3242 longest = xfs_alloc_longest_free_extent(pag, 3243 xfs_alloc_min_freelist(mp, pag), 3244 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); 3245 if (*blen < longest) 3246 *blen = longest; 3247 3248 out: 3249 xfs_perag_put(pag); 3250 return error; 3251 } 3252 3253 static void 3254 xfs_bmap_select_minlen( 3255 struct xfs_bmalloca *ap, 3256 struct xfs_alloc_arg *args, 3257 xfs_extlen_t *blen, 3258 int notinit) 3259 { 3260 if (notinit || *blen < ap->minlen) { 3261 /* 3262 * Since we did a BUF_TRYLOCK above, it is possible that 3263 * there is space for this request. 3264 */ 3265 args->minlen = ap->minlen; 3266 } else if (*blen < args->maxlen) { 3267 /* 3268 * If the best seen length is less than the request length, 3269 * use the best as the minimum. 3270 */ 3271 args->minlen = *blen; 3272 } else { 3273 /* 3274 * Otherwise we've seen an extent as big as maxlen, use that 3275 * as the minimum. 3276 */ 3277 args->minlen = args->maxlen; 3278 } 3279 } 3280 3281 STATIC int 3282 xfs_bmap_btalloc_nullfb( 3283 struct xfs_bmalloca *ap, 3284 struct xfs_alloc_arg *args, 3285 xfs_extlen_t *blen) 3286 { 3287 struct xfs_mount *mp = ap->ip->i_mount; 3288 xfs_agnumber_t ag, startag; 3289 int notinit = 0; 3290 int error; 3291 3292 args->type = XFS_ALLOCTYPE_START_BNO; 3293 args->total = ap->total; 3294 3295 startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); 3296 if (startag == NULLAGNUMBER) 3297 startag = ag = 0; 3298 3299 while (*blen < args->maxlen) { 3300 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, 3301 ¬init); 3302 if (error) 3303 return error; 3304 3305 if (++ag == mp->m_sb.sb_agcount) 3306 ag = 0; 3307 if (ag == startag) 3308 break; 3309 } 3310 3311 xfs_bmap_select_minlen(ap, args, blen, notinit); 3312 return 0; 3313 } 3314 3315 STATIC int 3316 xfs_bmap_btalloc_filestreams( 3317 struct xfs_bmalloca *ap, 3318 struct xfs_alloc_arg *args, 3319 xfs_extlen_t *blen) 3320 { 3321 struct xfs_mount *mp = ap->ip->i_mount; 3322 xfs_agnumber_t ag; 3323 int notinit = 0; 3324 int error; 3325 3326 args->type = XFS_ALLOCTYPE_NEAR_BNO; 3327 args->total = ap->total; 3328 3329 ag = XFS_FSB_TO_AGNO(mp, args->fsbno); 3330 if (ag == NULLAGNUMBER) 3331 ag = 0; 3332 3333 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); 3334 if (error) 3335 return error; 3336 3337 if (*blen < args->maxlen) { 3338 error = xfs_filestream_new_ag(ap, &ag); 3339 if (error) 3340 return error; 3341 3342 error = xfs_bmap_longest_free_extent(args->tp, ag, blen, 3343 ¬init); 3344 if (error) 3345 return error; 3346 3347 } 3348 3349 xfs_bmap_select_minlen(ap, args, blen, notinit); 3350 3351 /* 3352 * Set the failure fallback case to look in the selected AG as stream 3353 * may have moved. 3354 */ 3355 ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); 3356 return 0; 3357 } 3358 3359 /* Update all inode and quota accounting for the allocation we just did. */ 3360 static void 3361 xfs_bmap_btalloc_accounting( 3362 struct xfs_bmalloca *ap, 3363 struct xfs_alloc_arg *args) 3364 { 3365 if (ap->flags & XFS_BMAPI_COWFORK) { 3366 /* 3367 * COW fork blocks are in-core only and thus are treated as 3368 * in-core quota reservation (like delalloc blocks) even when 3369 * converted to real blocks. The quota reservation is not 3370 * accounted to disk until blocks are remapped to the data 3371 * fork. So if these blocks were previously delalloc, we 3372 * already have quota reservation and there's nothing to do 3373 * yet. 3374 */ 3375 if (ap->wasdel) 3376 return; 3377 3378 /* 3379 * Otherwise, we've allocated blocks in a hole. The transaction 3380 * has acquired in-core quota reservation for this extent. 3381 * Rather than account these as real blocks, however, we reduce 3382 * the transaction quota reservation based on the allocation. 3383 * This essentially transfers the transaction quota reservation 3384 * to that of a delalloc extent. 3385 */ 3386 ap->ip->i_delayed_blks += args->len; 3387 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, 3388 -(long)args->len); 3389 return; 3390 } 3391 3392 /* data/attr fork only */ 3393 ap->ip->i_d.di_nblocks += args->len; 3394 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); 3395 if (ap->wasdel) 3396 ap->ip->i_delayed_blks -= args->len; 3397 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 3398 ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, 3399 args->len); 3400 } 3401 3402 STATIC int 3403 xfs_bmap_btalloc( 3404 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3405 { 3406 xfs_mount_t *mp; /* mount point structure */ 3407 xfs_alloctype_t atype = 0; /* type for allocation routines */ 3408 xfs_extlen_t align = 0; /* minimum allocation alignment */ 3409 xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ 3410 xfs_agnumber_t ag; 3411 xfs_alloc_arg_t args; 3412 xfs_fileoff_t orig_offset; 3413 xfs_extlen_t orig_length; 3414 xfs_extlen_t blen; 3415 xfs_extlen_t nextminlen = 0; 3416 int nullfb; /* true if ap->firstblock isn't set */ 3417 int isaligned; 3418 int tryagain; 3419 int error; 3420 int stripe_align; 3421 3422 ASSERT(ap->length); 3423 orig_offset = ap->offset; 3424 orig_length = ap->length; 3425 3426 mp = ap->ip->i_mount; 3427 3428 /* stripe alignment for allocation is determined by mount parameters */ 3429 stripe_align = 0; 3430 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) 3431 stripe_align = mp->m_swidth; 3432 else if (mp->m_dalign) 3433 stripe_align = mp->m_dalign; 3434 3435 if (ap->flags & XFS_BMAPI_COWFORK) 3436 align = xfs_get_cowextsz_hint(ap->ip); 3437 else if (xfs_alloc_is_userdata(ap->datatype)) 3438 align = xfs_get_extsz_hint(ap->ip); 3439 if (align) { 3440 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 3441 align, 0, ap->eof, 0, ap->conv, 3442 &ap->offset, &ap->length); 3443 ASSERT(!error); 3444 ASSERT(ap->length); 3445 } 3446 3447 3448 nullfb = *ap->firstblock == NULLFSBLOCK; 3449 fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); 3450 if (nullfb) { 3451 if (xfs_alloc_is_userdata(ap->datatype) && 3452 xfs_inode_is_filestream(ap->ip)) { 3453 ag = xfs_filestream_lookup_ag(ap->ip); 3454 ag = (ag != NULLAGNUMBER) ? ag : 0; 3455 ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); 3456 } else { 3457 ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); 3458 } 3459 } else 3460 ap->blkno = *ap->firstblock; 3461 3462 xfs_bmap_adjacent(ap); 3463 3464 /* 3465 * If allowed, use ap->blkno; otherwise must use firstblock since 3466 * it's in the right allocation group. 3467 */ 3468 if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) 3469 ; 3470 else 3471 ap->blkno = *ap->firstblock; 3472 /* 3473 * Normal allocation, done through xfs_alloc_vextent. 3474 */ 3475 tryagain = isaligned = 0; 3476 memset(&args, 0, sizeof(args)); 3477 args.tp = ap->tp; 3478 args.mp = mp; 3479 args.fsbno = ap->blkno; 3480 xfs_rmap_skip_owner_update(&args.oinfo); 3481 3482 /* Trim the allocation back to the maximum an AG can fit. */ 3483 args.maxlen = MIN(ap->length, mp->m_ag_max_usable); 3484 args.firstblock = *ap->firstblock; 3485 blen = 0; 3486 if (nullfb) { 3487 /* 3488 * Search for an allocation group with a single extent large 3489 * enough for the request. If one isn't found, then adjust 3490 * the minimum allocation size to the largest space found. 3491 */ 3492 if (xfs_alloc_is_userdata(ap->datatype) && 3493 xfs_inode_is_filestream(ap->ip)) 3494 error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); 3495 else 3496 error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); 3497 if (error) 3498 return error; 3499 } else if (ap->dfops->dop_low) { 3500 if (xfs_inode_is_filestream(ap->ip)) 3501 args.type = XFS_ALLOCTYPE_FIRST_AG; 3502 else 3503 args.type = XFS_ALLOCTYPE_START_BNO; 3504 args.total = args.minlen = ap->minlen; 3505 } else { 3506 args.type = XFS_ALLOCTYPE_NEAR_BNO; 3507 args.total = ap->total; 3508 args.minlen = ap->minlen; 3509 } 3510 /* apply extent size hints if obtained earlier */ 3511 if (align) { 3512 args.prod = align; 3513 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) 3514 args.mod = (xfs_extlen_t)(args.prod - args.mod); 3515 } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) { 3516 args.prod = 1; 3517 args.mod = 0; 3518 } else { 3519 args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog; 3520 if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod)))) 3521 args.mod = (xfs_extlen_t)(args.prod - args.mod); 3522 } 3523 /* 3524 * If we are not low on available data blocks, and the 3525 * underlying logical volume manager is a stripe, and 3526 * the file offset is zero then try to allocate data 3527 * blocks on stripe unit boundary. 3528 * NOTE: ap->aeof is only set if the allocation length 3529 * is >= the stripe unit and the allocation offset is 3530 * at the end of file. 3531 */ 3532 if (!ap->dfops->dop_low && ap->aeof) { 3533 if (!ap->offset) { 3534 args.alignment = stripe_align; 3535 atype = args.type; 3536 isaligned = 1; 3537 /* 3538 * Adjust for alignment 3539 */ 3540 if (blen > args.alignment && blen <= args.maxlen) 3541 args.minlen = blen - args.alignment; 3542 args.minalignslop = 0; 3543 } else { 3544 /* 3545 * First try an exact bno allocation. 3546 * If it fails then do a near or start bno 3547 * allocation with alignment turned on. 3548 */ 3549 atype = args.type; 3550 tryagain = 1; 3551 args.type = XFS_ALLOCTYPE_THIS_BNO; 3552 args.alignment = 1; 3553 /* 3554 * Compute the minlen+alignment for the 3555 * next case. Set slop so that the value 3556 * of minlen+alignment+slop doesn't go up 3557 * between the calls. 3558 */ 3559 if (blen > stripe_align && blen <= args.maxlen) 3560 nextminlen = blen - stripe_align; 3561 else 3562 nextminlen = args.minlen; 3563 if (nextminlen + stripe_align > args.minlen + 1) 3564 args.minalignslop = 3565 nextminlen + stripe_align - 3566 args.minlen - 1; 3567 else 3568 args.minalignslop = 0; 3569 } 3570 } else { 3571 args.alignment = 1; 3572 args.minalignslop = 0; 3573 } 3574 args.minleft = ap->minleft; 3575 args.wasdel = ap->wasdel; 3576 args.resv = XFS_AG_RESV_NONE; 3577 args.datatype = ap->datatype; 3578 if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) 3579 args.ip = ap->ip; 3580 3581 error = xfs_alloc_vextent(&args); 3582 if (error) 3583 return error; 3584 3585 if (tryagain && args.fsbno == NULLFSBLOCK) { 3586 /* 3587 * Exact allocation failed. Now try with alignment 3588 * turned on. 3589 */ 3590 args.type = atype; 3591 args.fsbno = ap->blkno; 3592 args.alignment = stripe_align; 3593 args.minlen = nextminlen; 3594 args.minalignslop = 0; 3595 isaligned = 1; 3596 if ((error = xfs_alloc_vextent(&args))) 3597 return error; 3598 } 3599 if (isaligned && args.fsbno == NULLFSBLOCK) { 3600 /* 3601 * allocation failed, so turn off alignment and 3602 * try again. 3603 */ 3604 args.type = atype; 3605 args.fsbno = ap->blkno; 3606 args.alignment = 0; 3607 if ((error = xfs_alloc_vextent(&args))) 3608 return error; 3609 } 3610 if (args.fsbno == NULLFSBLOCK && nullfb && 3611 args.minlen > ap->minlen) { 3612 args.minlen = ap->minlen; 3613 args.type = XFS_ALLOCTYPE_START_BNO; 3614 args.fsbno = ap->blkno; 3615 if ((error = xfs_alloc_vextent(&args))) 3616 return error; 3617 } 3618 if (args.fsbno == NULLFSBLOCK && nullfb) { 3619 args.fsbno = 0; 3620 args.type = XFS_ALLOCTYPE_FIRST_AG; 3621 args.total = ap->minlen; 3622 if ((error = xfs_alloc_vextent(&args))) 3623 return error; 3624 ap->dfops->dop_low = true; 3625 } 3626 if (args.fsbno != NULLFSBLOCK) { 3627 /* 3628 * check the allocation happened at the same or higher AG than 3629 * the first block that was allocated. 3630 */ 3631 ASSERT(*ap->firstblock == NULLFSBLOCK || 3632 XFS_FSB_TO_AGNO(mp, *ap->firstblock) <= 3633 XFS_FSB_TO_AGNO(mp, args.fsbno)); 3634 3635 ap->blkno = args.fsbno; 3636 if (*ap->firstblock == NULLFSBLOCK) 3637 *ap->firstblock = args.fsbno; 3638 ASSERT(nullfb || fb_agno <= args.agno); 3639 ap->length = args.len; 3640 /* 3641 * If the extent size hint is active, we tried to round the 3642 * caller's allocation request offset down to extsz and the 3643 * length up to another extsz boundary. If we found a free 3644 * extent we mapped it in starting at this new offset. If the 3645 * newly mapped space isn't long enough to cover any of the 3646 * range of offsets that was originally requested, move the 3647 * mapping up so that we can fill as much of the caller's 3648 * original request as possible. Free space is apparently 3649 * very fragmented so we're unlikely to be able to satisfy the 3650 * hints anyway. 3651 */ 3652 if (ap->length <= orig_length) 3653 ap->offset = orig_offset; 3654 else if (ap->offset + ap->length < orig_offset + orig_length) 3655 ap->offset = orig_offset + orig_length - ap->length; 3656 xfs_bmap_btalloc_accounting(ap, &args); 3657 } else { 3658 ap->blkno = NULLFSBLOCK; 3659 ap->length = 0; 3660 } 3661 return 0; 3662 } 3663 3664 /* 3665 * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. 3666 * It figures out where to ask the underlying allocator to put the new extent. 3667 */ 3668 STATIC int 3669 xfs_bmap_alloc( 3670 struct xfs_bmalloca *ap) /* bmap alloc argument struct */ 3671 { 3672 if (XFS_IS_REALTIME_INODE(ap->ip) && 3673 xfs_alloc_is_userdata(ap->datatype)) 3674 return xfs_bmap_rtalloc(ap); 3675 return xfs_bmap_btalloc(ap); 3676 } 3677 3678 /* Trim extent to fit a logical block range. */ 3679 void 3680 xfs_trim_extent( 3681 struct xfs_bmbt_irec *irec, 3682 xfs_fileoff_t bno, 3683 xfs_filblks_t len) 3684 { 3685 xfs_fileoff_t distance; 3686 xfs_fileoff_t end = bno + len; 3687 3688 if (irec->br_startoff + irec->br_blockcount <= bno || 3689 irec->br_startoff >= end) { 3690 irec->br_blockcount = 0; 3691 return; 3692 } 3693 3694 if (irec->br_startoff < bno) { 3695 distance = bno - irec->br_startoff; 3696 if (isnullstartblock(irec->br_startblock)) 3697 irec->br_startblock = DELAYSTARTBLOCK; 3698 if (irec->br_startblock != DELAYSTARTBLOCK && 3699 irec->br_startblock != HOLESTARTBLOCK) 3700 irec->br_startblock += distance; 3701 irec->br_startoff += distance; 3702 irec->br_blockcount -= distance; 3703 } 3704 3705 if (end < irec->br_startoff + irec->br_blockcount) { 3706 distance = irec->br_startoff + irec->br_blockcount - end; 3707 irec->br_blockcount -= distance; 3708 } 3709 } 3710 3711 /* trim extent to within eof */ 3712 void 3713 xfs_trim_extent_eof( 3714 struct xfs_bmbt_irec *irec, 3715 struct xfs_inode *ip) 3716 3717 { 3718 xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, 3719 i_size_read(VFS_I(ip)))); 3720 } 3721 3722 /* 3723 * Trim the returned map to the required bounds 3724 */ 3725 STATIC void 3726 xfs_bmapi_trim_map( 3727 struct xfs_bmbt_irec *mval, 3728 struct xfs_bmbt_irec *got, 3729 xfs_fileoff_t *bno, 3730 xfs_filblks_t len, 3731 xfs_fileoff_t obno, 3732 xfs_fileoff_t end, 3733 int n, 3734 int flags) 3735 { 3736 if ((flags & XFS_BMAPI_ENTIRE) || 3737 got->br_startoff + got->br_blockcount <= obno) { 3738 *mval = *got; 3739 if (isnullstartblock(got->br_startblock)) 3740 mval->br_startblock = DELAYSTARTBLOCK; 3741 return; 3742 } 3743 3744 if (obno > *bno) 3745 *bno = obno; 3746 ASSERT((*bno >= obno) || (n == 0)); 3747 ASSERT(*bno < end); 3748 mval->br_startoff = *bno; 3749 if (isnullstartblock(got->br_startblock)) 3750 mval->br_startblock = DELAYSTARTBLOCK; 3751 else 3752 mval->br_startblock = got->br_startblock + 3753 (*bno - got->br_startoff); 3754 /* 3755 * Return the minimum of what we got and what we asked for for 3756 * the length. We can use the len variable here because it is 3757 * modified below and we could have been there before coming 3758 * here if the first part of the allocation didn't overlap what 3759 * was asked for. 3760 */ 3761 mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno, 3762 got->br_blockcount - (*bno - got->br_startoff)); 3763 mval->br_state = got->br_state; 3764 ASSERT(mval->br_blockcount <= len); 3765 return; 3766 } 3767 3768 /* 3769 * Update and validate the extent map to return 3770 */ 3771 STATIC void 3772 xfs_bmapi_update_map( 3773 struct xfs_bmbt_irec **map, 3774 xfs_fileoff_t *bno, 3775 xfs_filblks_t *len, 3776 xfs_fileoff_t obno, 3777 xfs_fileoff_t end, 3778 int *n, 3779 int flags) 3780 { 3781 xfs_bmbt_irec_t *mval = *map; 3782 3783 ASSERT((flags & XFS_BMAPI_ENTIRE) || 3784 ((mval->br_startoff + mval->br_blockcount) <= end)); 3785 ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) || 3786 (mval->br_startoff < obno)); 3787 3788 *bno = mval->br_startoff + mval->br_blockcount; 3789 *len = end - *bno; 3790 if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) { 3791 /* update previous map with new information */ 3792 ASSERT(mval->br_startblock == mval[-1].br_startblock); 3793 ASSERT(mval->br_blockcount > mval[-1].br_blockcount); 3794 ASSERT(mval->br_state == mval[-1].br_state); 3795 mval[-1].br_blockcount = mval->br_blockcount; 3796 mval[-1].br_state = mval->br_state; 3797 } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK && 3798 mval[-1].br_startblock != DELAYSTARTBLOCK && 3799 mval[-1].br_startblock != HOLESTARTBLOCK && 3800 mval->br_startblock == mval[-1].br_startblock + 3801 mval[-1].br_blockcount && 3802 ((flags & XFS_BMAPI_IGSTATE) || 3803 mval[-1].br_state == mval->br_state)) { 3804 ASSERT(mval->br_startoff == 3805 mval[-1].br_startoff + mval[-1].br_blockcount); 3806 mval[-1].br_blockcount += mval->br_blockcount; 3807 } else if (*n > 0 && 3808 mval->br_startblock == DELAYSTARTBLOCK && 3809 mval[-1].br_startblock == DELAYSTARTBLOCK && 3810 mval->br_startoff == 3811 mval[-1].br_startoff + mval[-1].br_blockcount) { 3812 mval[-1].br_blockcount += mval->br_blockcount; 3813 mval[-1].br_state = mval->br_state; 3814 } else if (!((*n == 0) && 3815 ((mval->br_startoff + mval->br_blockcount) <= 3816 obno))) { 3817 mval++; 3818 (*n)++; 3819 } 3820 *map = mval; 3821 } 3822 3823 /* 3824 * Map file blocks to filesystem blocks without allocation. 3825 */ 3826 int 3827 xfs_bmapi_read( 3828 struct xfs_inode *ip, 3829 xfs_fileoff_t bno, 3830 xfs_filblks_t len, 3831 struct xfs_bmbt_irec *mval, 3832 int *nmap, 3833 int flags) 3834 { 3835 struct xfs_mount *mp = ip->i_mount; 3836 struct xfs_ifork *ifp; 3837 struct xfs_bmbt_irec got; 3838 xfs_fileoff_t obno; 3839 xfs_fileoff_t end; 3840 struct xfs_iext_cursor icur; 3841 int error; 3842 bool eof = false; 3843 int n = 0; 3844 int whichfork = xfs_bmapi_whichfork(flags); 3845 3846 ASSERT(*nmap >= 1); 3847 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE| 3848 XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK))); 3849 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); 3850 3851 if (unlikely(XFS_TEST_ERROR( 3852 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 3853 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 3854 mp, XFS_ERRTAG_BMAPIFORMAT))) { 3855 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp); 3856 return -EFSCORRUPTED; 3857 } 3858 3859 if (XFS_FORCED_SHUTDOWN(mp)) 3860 return -EIO; 3861 3862 XFS_STATS_INC(mp, xs_blk_mapr); 3863 3864 ifp = XFS_IFORK_PTR(ip, whichfork); 3865 3866 /* No CoW fork? Return a hole. */ 3867 if (whichfork == XFS_COW_FORK && !ifp) { 3868 mval->br_startoff = bno; 3869 mval->br_startblock = HOLESTARTBLOCK; 3870 mval->br_blockcount = len; 3871 mval->br_state = XFS_EXT_NORM; 3872 *nmap = 1; 3873 return 0; 3874 } 3875 3876 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 3877 error = xfs_iread_extents(NULL, ip, whichfork); 3878 if (error) 3879 return error; 3880 } 3881 3882 if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) 3883 eof = true; 3884 end = bno + len; 3885 obno = bno; 3886 3887 while (bno < end && n < *nmap) { 3888 /* Reading past eof, act as though there's a hole up to end. */ 3889 if (eof) 3890 got.br_startoff = end; 3891 if (got.br_startoff > bno) { 3892 /* Reading in a hole. */ 3893 mval->br_startoff = bno; 3894 mval->br_startblock = HOLESTARTBLOCK; 3895 mval->br_blockcount = 3896 XFS_FILBLKS_MIN(len, got.br_startoff - bno); 3897 mval->br_state = XFS_EXT_NORM; 3898 bno += mval->br_blockcount; 3899 len -= mval->br_blockcount; 3900 mval++; 3901 n++; 3902 continue; 3903 } 3904 3905 /* set up the extent map to return. */ 3906 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags); 3907 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); 3908 3909 /* If we're done, stop now. */ 3910 if (bno >= end || n >= *nmap) 3911 break; 3912 3913 /* Else go on to the next record. */ 3914 if (!xfs_iext_next_extent(ifp, &icur, &got)) 3915 eof = true; 3916 } 3917 *nmap = n; 3918 return 0; 3919 } 3920 3921 /* 3922 * Add a delayed allocation extent to an inode. Blocks are reserved from the 3923 * global pool and the extent inserted into the inode in-core extent tree. 3924 * 3925 * On entry, got refers to the first extent beyond the offset of the extent to 3926 * allocate or eof is specified if no such extent exists. On return, got refers 3927 * to the extent record that was inserted to the inode fork. 3928 * 3929 * Note that the allocated extent may have been merged with contiguous extents 3930 * during insertion into the inode fork. Thus, got does not reflect the current 3931 * state of the inode fork on return. If necessary, the caller can use lastx to 3932 * look up the updated record in the inode fork. 3933 */ 3934 int 3935 xfs_bmapi_reserve_delalloc( 3936 struct xfs_inode *ip, 3937 int whichfork, 3938 xfs_fileoff_t off, 3939 xfs_filblks_t len, 3940 xfs_filblks_t prealloc, 3941 struct xfs_bmbt_irec *got, 3942 struct xfs_iext_cursor *icur, 3943 int eof) 3944 { 3945 struct xfs_mount *mp = ip->i_mount; 3946 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3947 xfs_extlen_t alen; 3948 xfs_extlen_t indlen; 3949 int error; 3950 xfs_fileoff_t aoff = off; 3951 3952 /* 3953 * Cap the alloc length. Keep track of prealloc so we know whether to 3954 * tag the inode before we return. 3955 */ 3956 alen = XFS_FILBLKS_MIN(len + prealloc, MAXEXTLEN); 3957 if (!eof) 3958 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff); 3959 if (prealloc && alen >= len) 3960 prealloc = alen - len; 3961 3962 /* Figure out the extent size, adjust alen */ 3963 if (whichfork == XFS_COW_FORK) { 3964 struct xfs_bmbt_irec prev; 3965 xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip); 3966 3967 if (!xfs_iext_peek_prev_extent(ifp, icur, &prev)) 3968 prev.br_startoff = NULLFILEOFF; 3969 3970 error = xfs_bmap_extsize_align(mp, got, &prev, extsz, 0, eof, 3971 1, 0, &aoff, &alen); 3972 ASSERT(!error); 3973 } 3974 3975 /* 3976 * Make a transaction-less quota reservation for delayed allocation 3977 * blocks. This number gets adjusted later. We return if we haven't 3978 * allocated blocks already inside this loop. 3979 */ 3980 error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0, 3981 XFS_QMOPT_RES_REGBLKS); 3982 if (error) 3983 return error; 3984 3985 /* 3986 * Split changing sb for alen and indlen since they could be coming 3987 * from different places. 3988 */ 3989 indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen); 3990 ASSERT(indlen > 0); 3991 3992 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false); 3993 if (error) 3994 goto out_unreserve_quota; 3995 3996 error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false); 3997 if (error) 3998 goto out_unreserve_blocks; 3999 4000 4001 ip->i_delayed_blks += alen; 4002 4003 got->br_startoff = aoff; 4004 got->br_startblock = nullstartblock(indlen); 4005 got->br_blockcount = alen; 4006 got->br_state = XFS_EXT_NORM; 4007 4008 xfs_bmap_add_extent_hole_delay(ip, whichfork, icur, got); 4009 4010 /* 4011 * Tag the inode if blocks were preallocated. Note that COW fork 4012 * preallocation can occur at the start or end of the extent, even when 4013 * prealloc == 0, so we must also check the aligned offset and length. 4014 */ 4015 if (whichfork == XFS_DATA_FORK && prealloc) 4016 xfs_inode_set_eofblocks_tag(ip); 4017 if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) 4018 xfs_inode_set_cowblocks_tag(ip); 4019 4020 return 0; 4021 4022 out_unreserve_blocks: 4023 xfs_mod_fdblocks(mp, alen, false); 4024 out_unreserve_quota: 4025 if (XFS_IS_QUOTA_ON(mp)) 4026 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, 4027 XFS_QMOPT_RES_REGBLKS); 4028 return error; 4029 } 4030 4031 static int 4032 xfs_bmapi_allocate( 4033 struct xfs_bmalloca *bma) 4034 { 4035 struct xfs_mount *mp = bma->ip->i_mount; 4036 int whichfork = xfs_bmapi_whichfork(bma->flags); 4037 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4038 int tmp_logflags = 0; 4039 int error; 4040 4041 ASSERT(bma->length > 0); 4042 4043 /* 4044 * For the wasdelay case, we could also just allocate the stuff asked 4045 * for in this bmap call but that wouldn't be as good. 4046 */ 4047 if (bma->wasdel) { 4048 bma->length = (xfs_extlen_t)bma->got.br_blockcount; 4049 bma->offset = bma->got.br_startoff; 4050 xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev); 4051 } else { 4052 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN); 4053 if (!bma->eof) 4054 bma->length = XFS_FILBLKS_MIN(bma->length, 4055 bma->got.br_startoff - bma->offset); 4056 } 4057 4058 /* 4059 * Set the data type being allocated. For the data fork, the first data 4060 * in the file is treated differently to all other allocations. For the 4061 * attribute fork, we only need to ensure the allocated range is not on 4062 * the busy list. 4063 */ 4064 if (!(bma->flags & XFS_BMAPI_METADATA)) { 4065 bma->datatype = XFS_ALLOC_NOBUSY; 4066 if (whichfork == XFS_DATA_FORK) { 4067 if (bma->offset == 0) 4068 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; 4069 else 4070 bma->datatype |= XFS_ALLOC_USERDATA; 4071 } 4072 if (bma->flags & XFS_BMAPI_ZERO) 4073 bma->datatype |= XFS_ALLOC_USERDATA_ZERO; 4074 } 4075 4076 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4077 4078 /* 4079 * Only want to do the alignment at the eof if it is userdata and 4080 * allocation length is larger than a stripe unit. 4081 */ 4082 if (mp->m_dalign && bma->length >= mp->m_dalign && 4083 !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) { 4084 error = xfs_bmap_isaeof(bma, whichfork); 4085 if (error) 4086 return error; 4087 } 4088 4089 error = xfs_bmap_alloc(bma); 4090 if (error) 4091 return error; 4092 4093 if (bma->cur) 4094 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4095 if (bma->blkno == NULLFSBLOCK) 4096 return 0; 4097 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { 4098 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork); 4099 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4100 bma->cur->bc_private.b.dfops = bma->dfops; 4101 } 4102 /* 4103 * Bump the number of extents we've allocated 4104 * in this call. 4105 */ 4106 bma->nallocs++; 4107 4108 if (bma->cur) 4109 bma->cur->bc_private.b.flags = 4110 bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0; 4111 4112 bma->got.br_startoff = bma->offset; 4113 bma->got.br_startblock = bma->blkno; 4114 bma->got.br_blockcount = bma->length; 4115 bma->got.br_state = XFS_EXT_NORM; 4116 4117 /* 4118 * In the data fork, a wasdelay extent has been initialized, so 4119 * shouldn't be flagged as unwritten. 4120 * 4121 * For the cow fork, however, we convert delalloc reservations 4122 * (extents allocated for speculative preallocation) to 4123 * allocated unwritten extents, and only convert the unwritten 4124 * extents to real extents when we're about to write the data. 4125 */ 4126 if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && 4127 (bma->flags & XFS_BMAPI_PREALLOC) && 4128 xfs_sb_version_hasextflgbit(&mp->m_sb)) 4129 bma->got.br_state = XFS_EXT_UNWRITTEN; 4130 4131 if (bma->wasdel) 4132 error = xfs_bmap_add_extent_delay_real(bma, whichfork); 4133 else 4134 error = xfs_bmap_add_extent_hole_real(bma->tp, bma->ip, 4135 whichfork, &bma->icur, &bma->cur, &bma->got, 4136 bma->firstblock, bma->dfops, &bma->logflags, 4137 bma->flags); 4138 4139 bma->logflags |= tmp_logflags; 4140 if (error) 4141 return error; 4142 4143 /* 4144 * Update our extent pointer, given that xfs_bmap_add_extent_delay_real 4145 * or xfs_bmap_add_extent_hole_real might have merged it into one of 4146 * the neighbouring ones. 4147 */ 4148 xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4149 4150 ASSERT(bma->got.br_startoff <= bma->offset); 4151 ASSERT(bma->got.br_startoff + bma->got.br_blockcount >= 4152 bma->offset + bma->length); 4153 ASSERT(bma->got.br_state == XFS_EXT_NORM || 4154 bma->got.br_state == XFS_EXT_UNWRITTEN); 4155 return 0; 4156 } 4157 4158 STATIC int 4159 xfs_bmapi_convert_unwritten( 4160 struct xfs_bmalloca *bma, 4161 struct xfs_bmbt_irec *mval, 4162 xfs_filblks_t len, 4163 int flags) 4164 { 4165 int whichfork = xfs_bmapi_whichfork(flags); 4166 struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork); 4167 int tmp_logflags = 0; 4168 int error; 4169 4170 /* check if we need to do unwritten->real conversion */ 4171 if (mval->br_state == XFS_EXT_UNWRITTEN && 4172 (flags & XFS_BMAPI_PREALLOC)) 4173 return 0; 4174 4175 /* check if we need to do real->unwritten conversion */ 4176 if (mval->br_state == XFS_EXT_NORM && 4177 (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) != 4178 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) 4179 return 0; 4180 4181 /* 4182 * Modify (by adding) the state flag, if writing. 4183 */ 4184 ASSERT(mval->br_blockcount <= len); 4185 if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) { 4186 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp, 4187 bma->ip, whichfork); 4188 bma->cur->bc_private.b.firstblock = *bma->firstblock; 4189 bma->cur->bc_private.b.dfops = bma->dfops; 4190 } 4191 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4192 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 4193 4194 /* 4195 * Before insertion into the bmbt, zero the range being converted 4196 * if required. 4197 */ 4198 if (flags & XFS_BMAPI_ZERO) { 4199 error = xfs_zero_extent(bma->ip, mval->br_startblock, 4200 mval->br_blockcount); 4201 if (error) 4202 return error; 4203 } 4204 4205 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork, 4206 &bma->icur, &bma->cur, mval, bma->firstblock, 4207 bma->dfops, &tmp_logflags); 4208 /* 4209 * Log the inode core unconditionally in the unwritten extent conversion 4210 * path because the conversion might not have done so (e.g., if the 4211 * extent count hasn't changed). We need to make sure the inode is dirty 4212 * in the transaction for the sake of fsync(), even if nothing has 4213 * changed, because fsync() will not force the log for this transaction 4214 * unless it sees the inode pinned. 4215 * 4216 * Note: If we're only converting cow fork extents, there aren't 4217 * any on-disk updates to make, so we don't need to log anything. 4218 */ 4219 if (whichfork != XFS_COW_FORK) 4220 bma->logflags |= tmp_logflags | XFS_ILOG_CORE; 4221 if (error) 4222 return error; 4223 4224 /* 4225 * Update our extent pointer, given that 4226 * xfs_bmap_add_extent_unwritten_real might have merged it into one 4227 * of the neighbouring ones. 4228 */ 4229 xfs_iext_get_extent(ifp, &bma->icur, &bma->got); 4230 4231 /* 4232 * We may have combined previously unwritten space with written space, 4233 * so generate another request. 4234 */ 4235 if (mval->br_blockcount < len) 4236 return -EAGAIN; 4237 return 0; 4238 } 4239 4240 /* 4241 * Map file blocks to filesystem blocks, and allocate blocks or convert the 4242 * extent state if necessary. Details behaviour is controlled by the flags 4243 * parameter. Only allocates blocks from a single allocation group, to avoid 4244 * locking problems. 4245 * 4246 * The returned value in "firstblock" from the first call in a transaction 4247 * must be remembered and presented to subsequent calls in "firstblock". 4248 * An upper bound for the number of blocks to be allocated is supplied to 4249 * the first call in "total"; if no allocation group has that many free 4250 * blocks then the call will fail (return NULLFSBLOCK in "firstblock"). 4251 */ 4252 int 4253 xfs_bmapi_write( 4254 struct xfs_trans *tp, /* transaction pointer */ 4255 struct xfs_inode *ip, /* incore inode */ 4256 xfs_fileoff_t bno, /* starting file offs. mapped */ 4257 xfs_filblks_t len, /* length to map in file */ 4258 int flags, /* XFS_BMAPI_... */ 4259 xfs_fsblock_t *firstblock, /* first allocated block 4260 controls a.g. for allocs */ 4261 xfs_extlen_t total, /* total blocks needed */ 4262 struct xfs_bmbt_irec *mval, /* output: map values */ 4263 int *nmap, /* i/o: mval size/count */ 4264 struct xfs_defer_ops *dfops) /* i/o: list extents to free */ 4265 { 4266 struct xfs_mount *mp = ip->i_mount; 4267 struct xfs_ifork *ifp; 4268 struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ 4269 xfs_fileoff_t end; /* end of mapped file region */ 4270 bool eof = false; /* after the end of extents */ 4271 int error; /* error return */ 4272 int n; /* current extent index */ 4273 xfs_fileoff_t obno; /* old block number (offset) */ 4274 int whichfork; /* data or attr fork */ 4275 4276 #ifdef DEBUG 4277 xfs_fileoff_t orig_bno; /* original block number value */ 4278 int orig_flags; /* original flags arg value */ 4279 xfs_filblks_t orig_len; /* original value of len arg */ 4280 struct xfs_bmbt_irec *orig_mval; /* original value of mval */ 4281 int orig_nmap; /* original value of *nmap */ 4282 4283 orig_bno = bno; 4284 orig_len = len; 4285 orig_flags = flags; 4286 orig_mval = mval; 4287 orig_nmap = *nmap; 4288 #endif 4289 whichfork = xfs_bmapi_whichfork(flags); 4290 4291 ASSERT(*nmap >= 1); 4292 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); 4293 ASSERT(!(flags & XFS_BMAPI_IGSTATE)); 4294 ASSERT(tp != NULL || 4295 (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) == 4296 (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)); 4297 ASSERT(len > 0); 4298 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); 4299 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4300 ASSERT(!(flags & XFS_BMAPI_REMAP)); 4301 4302 /* zeroing is for currently only for data extents, not metadata */ 4303 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != 4304 (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)); 4305 /* 4306 * we can allocate unwritten extents or pre-zero allocated blocks, 4307 * but it makes no sense to do both at once. This would result in 4308 * zeroing the unwritten extent twice, but it still being an 4309 * unwritten extent.... 4310 */ 4311 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) != 4312 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)); 4313 4314 if (unlikely(XFS_TEST_ERROR( 4315 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 4316 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 4317 mp, XFS_ERRTAG_BMAPIFORMAT))) { 4318 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); 4319 return -EFSCORRUPTED; 4320 } 4321 4322 if (XFS_FORCED_SHUTDOWN(mp)) 4323 return -EIO; 4324 4325 ifp = XFS_IFORK_PTR(ip, whichfork); 4326 4327 XFS_STATS_INC(mp, xs_blk_mapw); 4328 4329 if (*firstblock == NULLFSBLOCK) { 4330 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) 4331 bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; 4332 else 4333 bma.minleft = 1; 4334 } else { 4335 bma.minleft = 0; 4336 } 4337 4338 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 4339 error = xfs_iread_extents(tp, ip, whichfork); 4340 if (error) 4341 goto error0; 4342 } 4343 4344 n = 0; 4345 end = bno + len; 4346 obno = bno; 4347 4348 if (!xfs_iext_lookup_extent(ip, ifp, bno, &bma.icur, &bma.got)) 4349 eof = true; 4350 if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) 4351 bma.prev.br_startoff = NULLFILEOFF; 4352 bma.tp = tp; 4353 bma.ip = ip; 4354 bma.total = total; 4355 bma.datatype = 0; 4356 bma.dfops = dfops; 4357 bma.firstblock = firstblock; 4358 4359 while (bno < end && n < *nmap) { 4360 bool need_alloc = false, wasdelay = false; 4361 4362 /* in hole or beyond EOF? */ 4363 if (eof || bma.got.br_startoff > bno) { 4364 /* 4365 * CoW fork conversions should /never/ hit EOF or 4366 * holes. There should always be something for us 4367 * to work on. 4368 */ 4369 ASSERT(!((flags & XFS_BMAPI_CONVERT) && 4370 (flags & XFS_BMAPI_COWFORK))); 4371 4372 if (flags & XFS_BMAPI_DELALLOC) { 4373 /* 4374 * For the COW fork we can reasonably get a 4375 * request for converting an extent that races 4376 * with other threads already having converted 4377 * part of it, as there converting COW to 4378 * regular blocks is not protected using the 4379 * IOLOCK. 4380 */ 4381 ASSERT(flags & XFS_BMAPI_COWFORK); 4382 if (!(flags & XFS_BMAPI_COWFORK)) { 4383 error = -EIO; 4384 goto error0; 4385 } 4386 4387 if (eof || bno >= end) 4388 break; 4389 } else { 4390 need_alloc = true; 4391 } 4392 } else if (isnullstartblock(bma.got.br_startblock)) { 4393 wasdelay = true; 4394 } 4395 4396 /* 4397 * First, deal with the hole before the allocated space 4398 * that we found, if any. 4399 */ 4400 if ((need_alloc || wasdelay) && 4401 !(flags & XFS_BMAPI_CONVERT_ONLY)) { 4402 bma.eof = eof; 4403 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4404 bma.wasdel = wasdelay; 4405 bma.offset = bno; 4406 bma.flags = flags; 4407 4408 /* 4409 * There's a 32/64 bit type mismatch between the 4410 * allocation length request (which can be 64 bits in 4411 * length) and the bma length request, which is 4412 * xfs_extlen_t and therefore 32 bits. Hence we have to 4413 * check for 32-bit overflows and handle them here. 4414 */ 4415 if (len > (xfs_filblks_t)MAXEXTLEN) 4416 bma.length = MAXEXTLEN; 4417 else 4418 bma.length = len; 4419 4420 ASSERT(len > 0); 4421 ASSERT(bma.length > 0); 4422 error = xfs_bmapi_allocate(&bma); 4423 if (error) 4424 goto error0; 4425 if (bma.blkno == NULLFSBLOCK) 4426 break; 4427 4428 /* 4429 * If this is a CoW allocation, record the data in 4430 * the refcount btree for orphan recovery. 4431 */ 4432 if (whichfork == XFS_COW_FORK) { 4433 error = xfs_refcount_alloc_cow_extent(mp, dfops, 4434 bma.blkno, bma.length); 4435 if (error) 4436 goto error0; 4437 } 4438 } 4439 4440 /* Deal with the allocated space we found. */ 4441 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno, 4442 end, n, flags); 4443 4444 /* Execute unwritten extent conversion if necessary */ 4445 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags); 4446 if (error == -EAGAIN) 4447 continue; 4448 if (error) 4449 goto error0; 4450 4451 /* update the extent map to return */ 4452 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags); 4453 4454 /* 4455 * If we're done, stop now. Stop when we've allocated 4456 * XFS_BMAP_MAX_NMAP extents no matter what. Otherwise 4457 * the transaction may get too big. 4458 */ 4459 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap) 4460 break; 4461 4462 /* Else go on to the next record. */ 4463 bma.prev = bma.got; 4464 if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got)) 4465 eof = true; 4466 } 4467 *nmap = n; 4468 4469 /* 4470 * Transform from btree to extents, give it cur. 4471 */ 4472 if (xfs_bmap_wants_extents(ip, whichfork)) { 4473 int tmp_logflags = 0; 4474 4475 ASSERT(bma.cur); 4476 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, 4477 &tmp_logflags, whichfork); 4478 bma.logflags |= tmp_logflags; 4479 if (error) 4480 goto error0; 4481 } 4482 4483 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE || 4484 XFS_IFORK_NEXTENTS(ip, whichfork) > 4485 XFS_IFORK_MAXEXT(ip, whichfork)); 4486 error = 0; 4487 error0: 4488 /* 4489 * Log everything. Do this after conversion, there's no point in 4490 * logging the extent records if we've converted to btree format. 4491 */ 4492 if ((bma.logflags & xfs_ilog_fext(whichfork)) && 4493 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 4494 bma.logflags &= ~xfs_ilog_fext(whichfork); 4495 else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) && 4496 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) 4497 bma.logflags &= ~xfs_ilog_fbroot(whichfork); 4498 /* 4499 * Log whatever the flags say, even if error. Otherwise we might miss 4500 * detecting a case where the data is changed, there's an error, 4501 * and it's not logged so we don't shutdown when we should. 4502 */ 4503 if (bma.logflags) 4504 xfs_trans_log_inode(tp, ip, bma.logflags); 4505 4506 if (bma.cur) { 4507 if (!error) { 4508 ASSERT(*firstblock == NULLFSBLOCK || 4509 XFS_FSB_TO_AGNO(mp, *firstblock) <= 4510 XFS_FSB_TO_AGNO(mp, 4511 bma.cur->bc_private.b.firstblock)); 4512 *firstblock = bma.cur->bc_private.b.firstblock; 4513 } 4514 xfs_btree_del_cursor(bma.cur, 4515 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 4516 } 4517 if (!error) 4518 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval, 4519 orig_nmap, *nmap); 4520 return error; 4521 } 4522 4523 int 4524 xfs_bmapi_remap( 4525 struct xfs_trans *tp, 4526 struct xfs_inode *ip, 4527 xfs_fileoff_t bno, 4528 xfs_filblks_t len, 4529 xfs_fsblock_t startblock, 4530 struct xfs_defer_ops *dfops, 4531 int flags) 4532 { 4533 struct xfs_mount *mp = ip->i_mount; 4534 struct xfs_ifork *ifp; 4535 struct xfs_btree_cur *cur = NULL; 4536 xfs_fsblock_t firstblock = NULLFSBLOCK; 4537 struct xfs_bmbt_irec got; 4538 struct xfs_iext_cursor icur; 4539 int whichfork = xfs_bmapi_whichfork(flags); 4540 int logflags = 0, error; 4541 4542 ifp = XFS_IFORK_PTR(ip, whichfork); 4543 ASSERT(len > 0); 4544 ASSERT(len <= (xfs_filblks_t)MAXEXTLEN); 4545 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4546 ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC | 4547 XFS_BMAPI_NORMAP))); 4548 ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) != 4549 (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)); 4550 4551 if (unlikely(XFS_TEST_ERROR( 4552 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 4553 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 4554 mp, XFS_ERRTAG_BMAPIFORMAT))) { 4555 XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp); 4556 return -EFSCORRUPTED; 4557 } 4558 4559 if (XFS_FORCED_SHUTDOWN(mp)) 4560 return -EIO; 4561 4562 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 4563 error = xfs_iread_extents(tp, ip, whichfork); 4564 if (error) 4565 return error; 4566 } 4567 4568 if (xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) { 4569 /* make sure we only reflink into a hole. */ 4570 ASSERT(got.br_startoff > bno); 4571 ASSERT(got.br_startoff - bno >= len); 4572 } 4573 4574 ip->i_d.di_nblocks += len; 4575 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 4576 4577 if (ifp->if_flags & XFS_IFBROOT) { 4578 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 4579 cur->bc_private.b.firstblock = firstblock; 4580 cur->bc_private.b.dfops = dfops; 4581 cur->bc_private.b.flags = 0; 4582 } 4583 4584 got.br_startoff = bno; 4585 got.br_startblock = startblock; 4586 got.br_blockcount = len; 4587 if (flags & XFS_BMAPI_PREALLOC) 4588 got.br_state = XFS_EXT_UNWRITTEN; 4589 else 4590 got.br_state = XFS_EXT_NORM; 4591 4592 error = xfs_bmap_add_extent_hole_real(tp, ip, whichfork, &icur, 4593 &cur, &got, &firstblock, dfops, &logflags, flags); 4594 if (error) 4595 goto error0; 4596 4597 if (xfs_bmap_wants_extents(ip, whichfork)) { 4598 int tmp_logflags = 0; 4599 4600 error = xfs_bmap_btree_to_extents(tp, ip, cur, 4601 &tmp_logflags, whichfork); 4602 logflags |= tmp_logflags; 4603 } 4604 4605 error0: 4606 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) 4607 logflags &= ~XFS_ILOG_DEXT; 4608 else if (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) 4609 logflags &= ~XFS_ILOG_DBROOT; 4610 4611 if (logflags) 4612 xfs_trans_log_inode(tp, ip, logflags); 4613 if (cur) { 4614 xfs_btree_del_cursor(cur, 4615 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 4616 } 4617 return error; 4618 } 4619 4620 /* 4621 * When a delalloc extent is split (e.g., due to a hole punch), the original 4622 * indlen reservation must be shared across the two new extents that are left 4623 * behind. 4624 * 4625 * Given the original reservation and the worst case indlen for the two new 4626 * extents (as calculated by xfs_bmap_worst_indlen()), split the original 4627 * reservation fairly across the two new extents. If necessary, steal available 4628 * blocks from a deleted extent to make up a reservation deficiency (e.g., if 4629 * ores == 1). The number of stolen blocks is returned. The availability and 4630 * subsequent accounting of stolen blocks is the responsibility of the caller. 4631 */ 4632 static xfs_filblks_t 4633 xfs_bmap_split_indlen( 4634 xfs_filblks_t ores, /* original res. */ 4635 xfs_filblks_t *indlen1, /* ext1 worst indlen */ 4636 xfs_filblks_t *indlen2, /* ext2 worst indlen */ 4637 xfs_filblks_t avail) /* stealable blocks */ 4638 { 4639 xfs_filblks_t len1 = *indlen1; 4640 xfs_filblks_t len2 = *indlen2; 4641 xfs_filblks_t nres = len1 + len2; /* new total res. */ 4642 xfs_filblks_t stolen = 0; 4643 xfs_filblks_t resfactor; 4644 4645 /* 4646 * Steal as many blocks as we can to try and satisfy the worst case 4647 * indlen for both new extents. 4648 */ 4649 if (ores < nres && avail) 4650 stolen = XFS_FILBLKS_MIN(nres - ores, avail); 4651 ores += stolen; 4652 4653 /* nothing else to do if we've satisfied the new reservation */ 4654 if (ores >= nres) 4655 return stolen; 4656 4657 /* 4658 * We can't meet the total required reservation for the two extents. 4659 * Calculate the percent of the overall shortage between both extents 4660 * and apply this percentage to each of the requested indlen values. 4661 * This distributes the shortage fairly and reduces the chances that one 4662 * of the two extents is left with nothing when extents are repeatedly 4663 * split. 4664 */ 4665 resfactor = (ores * 100); 4666 do_div(resfactor, nres); 4667 len1 *= resfactor; 4668 do_div(len1, 100); 4669 len2 *= resfactor; 4670 do_div(len2, 100); 4671 ASSERT(len1 + len2 <= ores); 4672 ASSERT(len1 < *indlen1 && len2 < *indlen2); 4673 4674 /* 4675 * Hand out the remainder to each extent. If one of the two reservations 4676 * is zero, we want to make sure that one gets a block first. The loop 4677 * below starts with len1, so hand len2 a block right off the bat if it 4678 * is zero. 4679 */ 4680 ores -= (len1 + len2); 4681 ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores); 4682 if (ores && !len2 && *indlen2) { 4683 len2++; 4684 ores--; 4685 } 4686 while (ores) { 4687 if (len1 < *indlen1) { 4688 len1++; 4689 ores--; 4690 } 4691 if (!ores) 4692 break; 4693 if (len2 < *indlen2) { 4694 len2++; 4695 ores--; 4696 } 4697 } 4698 4699 *indlen1 = len1; 4700 *indlen2 = len2; 4701 4702 return stolen; 4703 } 4704 4705 int 4706 xfs_bmap_del_extent_delay( 4707 struct xfs_inode *ip, 4708 int whichfork, 4709 struct xfs_iext_cursor *icur, 4710 struct xfs_bmbt_irec *got, 4711 struct xfs_bmbt_irec *del) 4712 { 4713 struct xfs_mount *mp = ip->i_mount; 4714 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 4715 struct xfs_bmbt_irec new; 4716 int64_t da_old, da_new, da_diff = 0; 4717 xfs_fileoff_t del_endoff, got_endoff; 4718 xfs_filblks_t got_indlen, new_indlen, stolen; 4719 int state = xfs_bmap_fork_to_state(whichfork); 4720 int error = 0; 4721 bool isrt; 4722 4723 XFS_STATS_INC(mp, xs_del_exlist); 4724 4725 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 4726 del_endoff = del->br_startoff + del->br_blockcount; 4727 got_endoff = got->br_startoff + got->br_blockcount; 4728 da_old = startblockval(got->br_startblock); 4729 da_new = 0; 4730 4731 ASSERT(del->br_blockcount > 0); 4732 ASSERT(got->br_startoff <= del->br_startoff); 4733 ASSERT(got_endoff >= del_endoff); 4734 4735 if (isrt) { 4736 uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); 4737 4738 do_div(rtexts, mp->m_sb.sb_rextsize); 4739 xfs_mod_frextents(mp, rtexts); 4740 } 4741 4742 /* 4743 * Update the inode delalloc counter now and wait to update the 4744 * sb counters as we might have to borrow some blocks for the 4745 * indirect block accounting. 4746 */ 4747 error = xfs_trans_reserve_quota_nblks(NULL, ip, 4748 -((long)del->br_blockcount), 0, 4749 isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); 4750 if (error) 4751 return error; 4752 ip->i_delayed_blks -= del->br_blockcount; 4753 4754 if (got->br_startoff == del->br_startoff) 4755 state |= BMAP_LEFT_FILLING; 4756 if (got_endoff == del_endoff) 4757 state |= BMAP_RIGHT_FILLING; 4758 4759 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 4760 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4761 /* 4762 * Matches the whole extent. Delete the entry. 4763 */ 4764 xfs_iext_remove(ip, icur, state); 4765 xfs_iext_prev(ifp, icur); 4766 break; 4767 case BMAP_LEFT_FILLING: 4768 /* 4769 * Deleting the first part of the extent. 4770 */ 4771 got->br_startoff = del_endoff; 4772 got->br_blockcount -= del->br_blockcount; 4773 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 4774 got->br_blockcount), da_old); 4775 got->br_startblock = nullstartblock((int)da_new); 4776 xfs_iext_update_extent(ip, state, icur, got); 4777 break; 4778 case BMAP_RIGHT_FILLING: 4779 /* 4780 * Deleting the last part of the extent. 4781 */ 4782 got->br_blockcount = got->br_blockcount - del->br_blockcount; 4783 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, 4784 got->br_blockcount), da_old); 4785 got->br_startblock = nullstartblock((int)da_new); 4786 xfs_iext_update_extent(ip, state, icur, got); 4787 break; 4788 case 0: 4789 /* 4790 * Deleting the middle of the extent. 4791 * 4792 * Distribute the original indlen reservation across the two new 4793 * extents. Steal blocks from the deleted extent if necessary. 4794 * Stealing blocks simply fudges the fdblocks accounting below. 4795 * Warn if either of the new indlen reservations is zero as this 4796 * can lead to delalloc problems. 4797 */ 4798 got->br_blockcount = del->br_startoff - got->br_startoff; 4799 got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); 4800 4801 new.br_blockcount = got_endoff - del_endoff; 4802 new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); 4803 4804 WARN_ON_ONCE(!got_indlen || !new_indlen); 4805 stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen, 4806 del->br_blockcount); 4807 4808 got->br_startblock = nullstartblock((int)got_indlen); 4809 4810 new.br_startoff = del_endoff; 4811 new.br_state = got->br_state; 4812 new.br_startblock = nullstartblock((int)new_indlen); 4813 4814 xfs_iext_update_extent(ip, state, icur, got); 4815 xfs_iext_next(ifp, icur); 4816 xfs_iext_insert(ip, icur, &new, state); 4817 4818 da_new = got_indlen + new_indlen - stolen; 4819 del->br_blockcount -= stolen; 4820 break; 4821 } 4822 4823 ASSERT(da_old >= da_new); 4824 da_diff = da_old - da_new; 4825 if (!isrt) 4826 da_diff += del->br_blockcount; 4827 if (da_diff) 4828 xfs_mod_fdblocks(mp, da_diff, false); 4829 return error; 4830 } 4831 4832 void 4833 xfs_bmap_del_extent_cow( 4834 struct xfs_inode *ip, 4835 struct xfs_iext_cursor *icur, 4836 struct xfs_bmbt_irec *got, 4837 struct xfs_bmbt_irec *del) 4838 { 4839 struct xfs_mount *mp = ip->i_mount; 4840 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); 4841 struct xfs_bmbt_irec new; 4842 xfs_fileoff_t del_endoff, got_endoff; 4843 int state = BMAP_COWFORK; 4844 4845 XFS_STATS_INC(mp, xs_del_exlist); 4846 4847 del_endoff = del->br_startoff + del->br_blockcount; 4848 got_endoff = got->br_startoff + got->br_blockcount; 4849 4850 ASSERT(del->br_blockcount > 0); 4851 ASSERT(got->br_startoff <= del->br_startoff); 4852 ASSERT(got_endoff >= del_endoff); 4853 ASSERT(!isnullstartblock(got->br_startblock)); 4854 4855 if (got->br_startoff == del->br_startoff) 4856 state |= BMAP_LEFT_FILLING; 4857 if (got_endoff == del_endoff) 4858 state |= BMAP_RIGHT_FILLING; 4859 4860 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 4861 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 4862 /* 4863 * Matches the whole extent. Delete the entry. 4864 */ 4865 xfs_iext_remove(ip, icur, state); 4866 xfs_iext_prev(ifp, icur); 4867 break; 4868 case BMAP_LEFT_FILLING: 4869 /* 4870 * Deleting the first part of the extent. 4871 */ 4872 got->br_startoff = del_endoff; 4873 got->br_blockcount -= del->br_blockcount; 4874 got->br_startblock = del->br_startblock + del->br_blockcount; 4875 xfs_iext_update_extent(ip, state, icur, got); 4876 break; 4877 case BMAP_RIGHT_FILLING: 4878 /* 4879 * Deleting the last part of the extent. 4880 */ 4881 got->br_blockcount -= del->br_blockcount; 4882 xfs_iext_update_extent(ip, state, icur, got); 4883 break; 4884 case 0: 4885 /* 4886 * Deleting the middle of the extent. 4887 */ 4888 got->br_blockcount = del->br_startoff - got->br_startoff; 4889 4890 new.br_startoff = del_endoff; 4891 new.br_blockcount = got_endoff - del_endoff; 4892 new.br_state = got->br_state; 4893 new.br_startblock = del->br_startblock + del->br_blockcount; 4894 4895 xfs_iext_update_extent(ip, state, icur, got); 4896 xfs_iext_next(ifp, icur); 4897 xfs_iext_insert(ip, icur, &new, state); 4898 break; 4899 } 4900 ip->i_delayed_blks -= del->br_blockcount; 4901 } 4902 4903 /* 4904 * Called by xfs_bmapi to update file extent records and the btree 4905 * after removing space. 4906 */ 4907 STATIC int /* error */ 4908 xfs_bmap_del_extent_real( 4909 xfs_inode_t *ip, /* incore inode pointer */ 4910 xfs_trans_t *tp, /* current transaction pointer */ 4911 struct xfs_iext_cursor *icur, 4912 struct xfs_defer_ops *dfops, /* list of extents to be freed */ 4913 xfs_btree_cur_t *cur, /* if null, not a btree */ 4914 xfs_bmbt_irec_t *del, /* data to remove from extents */ 4915 int *logflagsp, /* inode logging flags */ 4916 int whichfork, /* data or attr fork */ 4917 int bflags) /* bmapi flags */ 4918 { 4919 xfs_fsblock_t del_endblock=0; /* first block past del */ 4920 xfs_fileoff_t del_endoff; /* first offset past del */ 4921 int do_fx; /* free extent at end of routine */ 4922 int error; /* error return value */ 4923 int flags = 0;/* inode logging flags */ 4924 struct xfs_bmbt_irec got; /* current extent entry */ 4925 xfs_fileoff_t got_endoff; /* first offset past got */ 4926 int i; /* temp state */ 4927 xfs_ifork_t *ifp; /* inode fork pointer */ 4928 xfs_mount_t *mp; /* mount structure */ 4929 xfs_filblks_t nblks; /* quota/sb block count */ 4930 xfs_bmbt_irec_t new; /* new record to be inserted */ 4931 /* REFERENCED */ 4932 uint qfield; /* quota field to update */ 4933 int state = xfs_bmap_fork_to_state(whichfork); 4934 struct xfs_bmbt_irec old; 4935 4936 mp = ip->i_mount; 4937 XFS_STATS_INC(mp, xs_del_exlist); 4938 4939 ifp = XFS_IFORK_PTR(ip, whichfork); 4940 ASSERT(del->br_blockcount > 0); 4941 xfs_iext_get_extent(ifp, icur, &got); 4942 ASSERT(got.br_startoff <= del->br_startoff); 4943 del_endoff = del->br_startoff + del->br_blockcount; 4944 got_endoff = got.br_startoff + got.br_blockcount; 4945 ASSERT(got_endoff >= del_endoff); 4946 ASSERT(!isnullstartblock(got.br_startblock)); 4947 qfield = 0; 4948 error = 0; 4949 4950 /* 4951 * If it's the case where the directory code is running with no block 4952 * reservation, and the deleted block is in the middle of its extent, 4953 * and the resulting insert of an extent would cause transformation to 4954 * btree format, then reject it. The calling code will then swap blocks 4955 * around instead. We have to do this now, rather than waiting for the 4956 * conversion to btree format, since the transaction will be dirty then. 4957 */ 4958 if (tp->t_blk_res == 0 && 4959 XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && 4960 XFS_IFORK_NEXTENTS(ip, whichfork) >= 4961 XFS_IFORK_MAXEXT(ip, whichfork) && 4962 del->br_startoff > got.br_startoff && del_endoff < got_endoff) 4963 return -ENOSPC; 4964 4965 flags = XFS_ILOG_CORE; 4966 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { 4967 xfs_fsblock_t bno; 4968 xfs_filblks_t len; 4969 4970 ASSERT(do_mod(del->br_blockcount, mp->m_sb.sb_rextsize) == 0); 4971 ASSERT(do_mod(del->br_startblock, mp->m_sb.sb_rextsize) == 0); 4972 bno = del->br_startblock; 4973 len = del->br_blockcount; 4974 do_div(bno, mp->m_sb.sb_rextsize); 4975 do_div(len, mp->m_sb.sb_rextsize); 4976 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len); 4977 if (error) 4978 goto done; 4979 do_fx = 0; 4980 nblks = len * mp->m_sb.sb_rextsize; 4981 qfield = XFS_TRANS_DQ_RTBCOUNT; 4982 } else { 4983 do_fx = 1; 4984 nblks = del->br_blockcount; 4985 qfield = XFS_TRANS_DQ_BCOUNT; 4986 } 4987 4988 del_endblock = del->br_startblock + del->br_blockcount; 4989 if (cur) { 4990 error = xfs_bmbt_lookup_eq(cur, &got, &i); 4991 if (error) 4992 goto done; 4993 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 4994 } 4995 4996 if (got.br_startoff == del->br_startoff) 4997 state |= BMAP_LEFT_FILLING; 4998 if (got_endoff == del_endoff) 4999 state |= BMAP_RIGHT_FILLING; 5000 5001 switch (state & (BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING)) { 5002 case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING: 5003 /* 5004 * Matches the whole extent. Delete the entry. 5005 */ 5006 xfs_iext_remove(ip, icur, state); 5007 xfs_iext_prev(ifp, icur); 5008 XFS_IFORK_NEXT_SET(ip, whichfork, 5009 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5010 flags |= XFS_ILOG_CORE; 5011 if (!cur) { 5012 flags |= xfs_ilog_fext(whichfork); 5013 break; 5014 } 5015 if ((error = xfs_btree_delete(cur, &i))) 5016 goto done; 5017 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5018 break; 5019 case BMAP_LEFT_FILLING: 5020 /* 5021 * Deleting the first part of the extent. 5022 */ 5023 got.br_startoff = del_endoff; 5024 got.br_startblock = del_endblock; 5025 got.br_blockcount -= del->br_blockcount; 5026 xfs_iext_update_extent(ip, state, icur, &got); 5027 if (!cur) { 5028 flags |= xfs_ilog_fext(whichfork); 5029 break; 5030 } 5031 error = xfs_bmbt_update(cur, &got); 5032 if (error) 5033 goto done; 5034 break; 5035 case BMAP_RIGHT_FILLING: 5036 /* 5037 * Deleting the last part of the extent. 5038 */ 5039 got.br_blockcount -= del->br_blockcount; 5040 xfs_iext_update_extent(ip, state, icur, &got); 5041 if (!cur) { 5042 flags |= xfs_ilog_fext(whichfork); 5043 break; 5044 } 5045 error = xfs_bmbt_update(cur, &got); 5046 if (error) 5047 goto done; 5048 break; 5049 case 0: 5050 /* 5051 * Deleting the middle of the extent. 5052 */ 5053 old = got; 5054 5055 got.br_blockcount = del->br_startoff - got.br_startoff; 5056 xfs_iext_update_extent(ip, state, icur, &got); 5057 5058 new.br_startoff = del_endoff; 5059 new.br_blockcount = got_endoff - del_endoff; 5060 new.br_state = got.br_state; 5061 new.br_startblock = del_endblock; 5062 5063 flags |= XFS_ILOG_CORE; 5064 if (cur) { 5065 error = xfs_bmbt_update(cur, &got); 5066 if (error) 5067 goto done; 5068 error = xfs_btree_increment(cur, 0, &i); 5069 if (error) 5070 goto done; 5071 cur->bc_rec.b = new; 5072 error = xfs_btree_insert(cur, &i); 5073 if (error && error != -ENOSPC) 5074 goto done; 5075 /* 5076 * If get no-space back from btree insert, it tried a 5077 * split, and we have a zero block reservation. Fix up 5078 * our state and return the error. 5079 */ 5080 if (error == -ENOSPC) { 5081 /* 5082 * Reset the cursor, don't trust it after any 5083 * insert operation. 5084 */ 5085 error = xfs_bmbt_lookup_eq(cur, &got, &i); 5086 if (error) 5087 goto done; 5088 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5089 /* 5090 * Update the btree record back 5091 * to the original value. 5092 */ 5093 error = xfs_bmbt_update(cur, &old); 5094 if (error) 5095 goto done; 5096 /* 5097 * Reset the extent record back 5098 * to the original value. 5099 */ 5100 xfs_iext_update_extent(ip, state, icur, &old); 5101 flags = 0; 5102 error = -ENOSPC; 5103 goto done; 5104 } 5105 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 5106 } else 5107 flags |= xfs_ilog_fext(whichfork); 5108 XFS_IFORK_NEXT_SET(ip, whichfork, 5109 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5110 xfs_iext_next(ifp, icur); 5111 xfs_iext_insert(ip, icur, &new, state); 5112 break; 5113 } 5114 5115 /* remove reverse mapping */ 5116 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del); 5117 if (error) 5118 goto done; 5119 5120 /* 5121 * If we need to, add to list of extents to delete. 5122 */ 5123 if (do_fx && !(bflags & XFS_BMAPI_REMAP)) { 5124 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { 5125 error = xfs_refcount_decrease_extent(mp, dfops, del); 5126 if (error) 5127 goto done; 5128 } else { 5129 __xfs_bmap_add_free(mp, dfops, del->br_startblock, 5130 del->br_blockcount, NULL, 5131 (bflags & XFS_BMAPI_NODISCARD) || 5132 del->br_state == XFS_EXT_UNWRITTEN); 5133 } 5134 } 5135 5136 /* 5137 * Adjust inode # blocks in the file. 5138 */ 5139 if (nblks) 5140 ip->i_d.di_nblocks -= nblks; 5141 /* 5142 * Adjust quota data. 5143 */ 5144 if (qfield && !(bflags & XFS_BMAPI_REMAP)) 5145 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); 5146 5147 done: 5148 *logflagsp = flags; 5149 return error; 5150 } 5151 5152 /* 5153 * Unmap (remove) blocks from a file. 5154 * If nexts is nonzero then the number of extents to remove is limited to 5155 * that value. If not all extents in the block range can be removed then 5156 * *done is set. 5157 */ 5158 int /* error */ 5159 __xfs_bunmapi( 5160 xfs_trans_t *tp, /* transaction pointer */ 5161 struct xfs_inode *ip, /* incore inode */ 5162 xfs_fileoff_t start, /* first file offset deleted */ 5163 xfs_filblks_t *rlen, /* i/o: amount remaining */ 5164 int flags, /* misc flags */ 5165 xfs_extnum_t nexts, /* number of extents max */ 5166 xfs_fsblock_t *firstblock, /* first allocated block 5167 controls a.g. for allocs */ 5168 struct xfs_defer_ops *dfops) /* i/o: deferred updates */ 5169 { 5170 xfs_btree_cur_t *cur; /* bmap btree cursor */ 5171 xfs_bmbt_irec_t del; /* extent being deleted */ 5172 int error; /* error return value */ 5173 xfs_extnum_t extno; /* extent number in list */ 5174 xfs_bmbt_irec_t got; /* current extent record */ 5175 xfs_ifork_t *ifp; /* inode fork pointer */ 5176 int isrt; /* freeing in rt area */ 5177 int logflags; /* transaction logging flags */ 5178 xfs_extlen_t mod; /* rt extent offset */ 5179 xfs_mount_t *mp; /* mount structure */ 5180 int tmp_logflags; /* partial logging flags */ 5181 int wasdel; /* was a delayed alloc extent */ 5182 int whichfork; /* data or attribute fork */ 5183 xfs_fsblock_t sum; 5184 xfs_filblks_t len = *rlen; /* length to unmap in file */ 5185 xfs_fileoff_t max_len; 5186 xfs_agnumber_t prev_agno = NULLAGNUMBER, agno; 5187 xfs_fileoff_t end; 5188 struct xfs_iext_cursor icur; 5189 bool done = false; 5190 5191 trace_xfs_bunmap(ip, start, len, flags, _RET_IP_); 5192 5193 whichfork = xfs_bmapi_whichfork(flags); 5194 ASSERT(whichfork != XFS_COW_FORK); 5195 ifp = XFS_IFORK_PTR(ip, whichfork); 5196 if (unlikely( 5197 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5198 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) { 5199 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW, 5200 ip->i_mount); 5201 return -EFSCORRUPTED; 5202 } 5203 mp = ip->i_mount; 5204 if (XFS_FORCED_SHUTDOWN(mp)) 5205 return -EIO; 5206 5207 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5208 ASSERT(len > 0); 5209 ASSERT(nexts >= 0); 5210 5211 /* 5212 * Guesstimate how many blocks we can unmap without running the risk of 5213 * blowing out the transaction with a mix of EFIs and reflink 5214 * adjustments. 5215 */ 5216 if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5217 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5218 else 5219 max_len = len; 5220 5221 if (!(ifp->if_flags & XFS_IFEXTENTS) && 5222 (error = xfs_iread_extents(tp, ip, whichfork))) 5223 return error; 5224 if (xfs_iext_count(ifp) == 0) { 5225 *rlen = 0; 5226 return 0; 5227 } 5228 XFS_STATS_INC(mp, xs_blk_unmap); 5229 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 5230 end = start + len; 5231 5232 if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { 5233 *rlen = 0; 5234 return 0; 5235 } 5236 end--; 5237 5238 logflags = 0; 5239 if (ifp->if_flags & XFS_IFBROOT) { 5240 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE); 5241 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5242 cur->bc_private.b.firstblock = *firstblock; 5243 cur->bc_private.b.dfops = dfops; 5244 cur->bc_private.b.flags = 0; 5245 } else 5246 cur = NULL; 5247 5248 if (isrt) { 5249 /* 5250 * Synchronize by locking the bitmap inode. 5251 */ 5252 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP); 5253 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL); 5254 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM); 5255 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL); 5256 } 5257 5258 extno = 0; 5259 while (end != (xfs_fileoff_t)-1 && end >= start && 5260 (nexts == 0 || extno < nexts) && max_len > 0) { 5261 /* 5262 * Is the found extent after a hole in which end lives? 5263 * Just back up to the previous extent, if so. 5264 */ 5265 if (got.br_startoff > end && 5266 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5267 done = true; 5268 break; 5269 } 5270 /* 5271 * Is the last block of this extent before the range 5272 * we're supposed to delete? If so, we're done. 5273 */ 5274 end = XFS_FILEOFF_MIN(end, 5275 got.br_startoff + got.br_blockcount - 1); 5276 if (end < start) 5277 break; 5278 /* 5279 * Then deal with the (possibly delayed) allocated space 5280 * we found. 5281 */ 5282 del = got; 5283 wasdel = isnullstartblock(del.br_startblock); 5284 5285 /* 5286 * Make sure we don't touch multiple AGF headers out of order 5287 * in a single transaction, as that could cause AB-BA deadlocks. 5288 */ 5289 if (!wasdel) { 5290 agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); 5291 if (prev_agno != NULLAGNUMBER && prev_agno > agno) 5292 break; 5293 prev_agno = agno; 5294 } 5295 if (got.br_startoff < start) { 5296 del.br_startoff = start; 5297 del.br_blockcount -= start - got.br_startoff; 5298 if (!wasdel) 5299 del.br_startblock += start - got.br_startoff; 5300 } 5301 if (del.br_startoff + del.br_blockcount > end + 1) 5302 del.br_blockcount = end + 1 - del.br_startoff; 5303 5304 /* How much can we safely unmap? */ 5305 if (max_len < del.br_blockcount) { 5306 del.br_startoff += del.br_blockcount - max_len; 5307 if (!wasdel) 5308 del.br_startblock += del.br_blockcount - max_len; 5309 del.br_blockcount = max_len; 5310 } 5311 5312 sum = del.br_startblock + del.br_blockcount; 5313 if (isrt && 5314 (mod = do_mod(sum, mp->m_sb.sb_rextsize))) { 5315 /* 5316 * Realtime extent not lined up at the end. 5317 * The extent could have been split into written 5318 * and unwritten pieces, or we could just be 5319 * unmapping part of it. But we can't really 5320 * get rid of part of a realtime extent. 5321 */ 5322 if (del.br_state == XFS_EXT_UNWRITTEN || 5323 !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 5324 /* 5325 * This piece is unwritten, or we're not 5326 * using unwritten extents. Skip over it. 5327 */ 5328 ASSERT(end >= mod); 5329 end -= mod > del.br_blockcount ? 5330 del.br_blockcount : mod; 5331 if (end < got.br_startoff && 5332 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5333 done = true; 5334 break; 5335 } 5336 continue; 5337 } 5338 /* 5339 * It's written, turn it unwritten. 5340 * This is better than zeroing it. 5341 */ 5342 ASSERT(del.br_state == XFS_EXT_NORM); 5343 ASSERT(tp->t_blk_res > 0); 5344 /* 5345 * If this spans a realtime extent boundary, 5346 * chop it back to the start of the one we end at. 5347 */ 5348 if (del.br_blockcount > mod) { 5349 del.br_startoff += del.br_blockcount - mod; 5350 del.br_startblock += del.br_blockcount - mod; 5351 del.br_blockcount = mod; 5352 } 5353 del.br_state = XFS_EXT_UNWRITTEN; 5354 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5355 whichfork, &icur, &cur, &del, 5356 firstblock, dfops, &logflags); 5357 if (error) 5358 goto error0; 5359 goto nodelete; 5360 } 5361 if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) { 5362 /* 5363 * Realtime extent is lined up at the end but not 5364 * at the front. We'll get rid of full extents if 5365 * we can. 5366 */ 5367 mod = mp->m_sb.sb_rextsize - mod; 5368 if (del.br_blockcount > mod) { 5369 del.br_blockcount -= mod; 5370 del.br_startoff += mod; 5371 del.br_startblock += mod; 5372 } else if ((del.br_startoff == start && 5373 (del.br_state == XFS_EXT_UNWRITTEN || 5374 tp->t_blk_res == 0)) || 5375 !xfs_sb_version_hasextflgbit(&mp->m_sb)) { 5376 /* 5377 * Can't make it unwritten. There isn't 5378 * a full extent here so just skip it. 5379 */ 5380 ASSERT(end >= del.br_blockcount); 5381 end -= del.br_blockcount; 5382 if (got.br_startoff > end && 5383 !xfs_iext_prev_extent(ifp, &icur, &got)) { 5384 done = true; 5385 break; 5386 } 5387 continue; 5388 } else if (del.br_state == XFS_EXT_UNWRITTEN) { 5389 struct xfs_bmbt_irec prev; 5390 5391 /* 5392 * This one is already unwritten. 5393 * It must have a written left neighbor. 5394 * Unwrite the killed part of that one and 5395 * try again. 5396 */ 5397 if (!xfs_iext_prev_extent(ifp, &icur, &prev)) 5398 ASSERT(0); 5399 ASSERT(prev.br_state == XFS_EXT_NORM); 5400 ASSERT(!isnullstartblock(prev.br_startblock)); 5401 ASSERT(del.br_startblock == 5402 prev.br_startblock + prev.br_blockcount); 5403 if (prev.br_startoff < start) { 5404 mod = start - prev.br_startoff; 5405 prev.br_blockcount -= mod; 5406 prev.br_startblock += mod; 5407 prev.br_startoff = start; 5408 } 5409 prev.br_state = XFS_EXT_UNWRITTEN; 5410 error = xfs_bmap_add_extent_unwritten_real(tp, 5411 ip, whichfork, &icur, &cur, 5412 &prev, firstblock, dfops, 5413 &logflags); 5414 if (error) 5415 goto error0; 5416 goto nodelete; 5417 } else { 5418 ASSERT(del.br_state == XFS_EXT_NORM); 5419 del.br_state = XFS_EXT_UNWRITTEN; 5420 error = xfs_bmap_add_extent_unwritten_real(tp, 5421 ip, whichfork, &icur, &cur, 5422 &del, firstblock, dfops, 5423 &logflags); 5424 if (error) 5425 goto error0; 5426 goto nodelete; 5427 } 5428 } 5429 5430 if (wasdel) { 5431 error = xfs_bmap_del_extent_delay(ip, whichfork, &icur, 5432 &got, &del); 5433 } else { 5434 error = xfs_bmap_del_extent_real(ip, tp, &icur, dfops, 5435 cur, &del, &tmp_logflags, whichfork, 5436 flags); 5437 logflags |= tmp_logflags; 5438 } 5439 5440 if (error) 5441 goto error0; 5442 5443 max_len -= del.br_blockcount; 5444 end = del.br_startoff - 1; 5445 nodelete: 5446 /* 5447 * If not done go on to the next (previous) record. 5448 */ 5449 if (end != (xfs_fileoff_t)-1 && end >= start) { 5450 if (!xfs_iext_get_extent(ifp, &icur, &got) || 5451 (got.br_startoff > end && 5452 !xfs_iext_prev_extent(ifp, &icur, &got))) { 5453 done = true; 5454 break; 5455 } 5456 extno++; 5457 } 5458 } 5459 if (done || end == (xfs_fileoff_t)-1 || end < start) 5460 *rlen = 0; 5461 else 5462 *rlen = end - start + 1; 5463 5464 /* 5465 * Convert to a btree if necessary. 5466 */ 5467 if (xfs_bmap_needs_btree(ip, whichfork)) { 5468 ASSERT(cur == NULL); 5469 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, 5470 &cur, 0, &tmp_logflags, whichfork); 5471 logflags |= tmp_logflags; 5472 if (error) 5473 goto error0; 5474 } 5475 /* 5476 * transform from btree to extents, give it cur 5477 */ 5478 else if (xfs_bmap_wants_extents(ip, whichfork)) { 5479 ASSERT(cur != NULL); 5480 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags, 5481 whichfork); 5482 logflags |= tmp_logflags; 5483 if (error) 5484 goto error0; 5485 } 5486 /* 5487 * transform from extents to local? 5488 */ 5489 error = 0; 5490 error0: 5491 /* 5492 * Log everything. Do this after conversion, there's no point in 5493 * logging the extent records if we've converted to btree format. 5494 */ 5495 if ((logflags & xfs_ilog_fext(whichfork)) && 5496 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS) 5497 logflags &= ~xfs_ilog_fext(whichfork); 5498 else if ((logflags & xfs_ilog_fbroot(whichfork)) && 5499 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) 5500 logflags &= ~xfs_ilog_fbroot(whichfork); 5501 /* 5502 * Log inode even in the error case, if the transaction 5503 * is dirty we'll need to shut down the filesystem. 5504 */ 5505 if (logflags) 5506 xfs_trans_log_inode(tp, ip, logflags); 5507 if (cur) { 5508 if (!error) { 5509 *firstblock = cur->bc_private.b.firstblock; 5510 cur->bc_private.b.allocated = 0; 5511 } 5512 xfs_btree_del_cursor(cur, 5513 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5514 } 5515 return error; 5516 } 5517 5518 /* Unmap a range of a file. */ 5519 int 5520 xfs_bunmapi( 5521 xfs_trans_t *tp, 5522 struct xfs_inode *ip, 5523 xfs_fileoff_t bno, 5524 xfs_filblks_t len, 5525 int flags, 5526 xfs_extnum_t nexts, 5527 xfs_fsblock_t *firstblock, 5528 struct xfs_defer_ops *dfops, 5529 int *done) 5530 { 5531 int error; 5532 5533 error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock, 5534 dfops); 5535 *done = (len == 0); 5536 return error; 5537 } 5538 5539 /* 5540 * Determine whether an extent shift can be accomplished by a merge with the 5541 * extent that precedes the target hole of the shift. 5542 */ 5543 STATIC bool 5544 xfs_bmse_can_merge( 5545 struct xfs_bmbt_irec *left, /* preceding extent */ 5546 struct xfs_bmbt_irec *got, /* current extent to shift */ 5547 xfs_fileoff_t shift) /* shift fsb */ 5548 { 5549 xfs_fileoff_t startoff; 5550 5551 startoff = got->br_startoff - shift; 5552 5553 /* 5554 * The extent, once shifted, must be adjacent in-file and on-disk with 5555 * the preceding extent. 5556 */ 5557 if ((left->br_startoff + left->br_blockcount != startoff) || 5558 (left->br_startblock + left->br_blockcount != got->br_startblock) || 5559 (left->br_state != got->br_state) || 5560 (left->br_blockcount + got->br_blockcount > MAXEXTLEN)) 5561 return false; 5562 5563 return true; 5564 } 5565 5566 /* 5567 * A bmap extent shift adjusts the file offset of an extent to fill a preceding 5568 * hole in the file. If an extent shift would result in the extent being fully 5569 * adjacent to the extent that currently precedes the hole, we can merge with 5570 * the preceding extent rather than do the shift. 5571 * 5572 * This function assumes the caller has verified a shift-by-merge is possible 5573 * with the provided extents via xfs_bmse_can_merge(). 5574 */ 5575 STATIC int 5576 xfs_bmse_merge( 5577 struct xfs_inode *ip, 5578 int whichfork, 5579 xfs_fileoff_t shift, /* shift fsb */ 5580 struct xfs_iext_cursor *icur, 5581 struct xfs_bmbt_irec *got, /* extent to shift */ 5582 struct xfs_bmbt_irec *left, /* preceding extent */ 5583 struct xfs_btree_cur *cur, 5584 int *logflags, /* output */ 5585 struct xfs_defer_ops *dfops) 5586 { 5587 struct xfs_bmbt_irec new; 5588 xfs_filblks_t blockcount; 5589 int error, i; 5590 struct xfs_mount *mp = ip->i_mount; 5591 5592 blockcount = left->br_blockcount + got->br_blockcount; 5593 5594 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 5595 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 5596 ASSERT(xfs_bmse_can_merge(left, got, shift)); 5597 5598 new = *left; 5599 new.br_blockcount = blockcount; 5600 5601 /* 5602 * Update the on-disk extent count, the btree if necessary and log the 5603 * inode. 5604 */ 5605 XFS_IFORK_NEXT_SET(ip, whichfork, 5606 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5607 *logflags |= XFS_ILOG_CORE; 5608 if (!cur) { 5609 *logflags |= XFS_ILOG_DEXT; 5610 goto done; 5611 } 5612 5613 /* lookup and remove the extent to merge */ 5614 error = xfs_bmbt_lookup_eq(cur, got, &i); 5615 if (error) 5616 return error; 5617 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5618 5619 error = xfs_btree_delete(cur, &i); 5620 if (error) 5621 return error; 5622 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5623 5624 /* lookup and update size of the previous extent */ 5625 error = xfs_bmbt_lookup_eq(cur, left, &i); 5626 if (error) 5627 return error; 5628 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5629 5630 error = xfs_bmbt_update(cur, &new); 5631 if (error) 5632 return error; 5633 5634 done: 5635 xfs_iext_remove(ip, icur, 0); 5636 xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur); 5637 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5638 &new); 5639 5640 /* update reverse mapping. rmap functions merge the rmaps for us */ 5641 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, got); 5642 if (error) 5643 return error; 5644 memcpy(&new, got, sizeof(new)); 5645 new.br_startoff = left->br_startoff + left->br_blockcount; 5646 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &new); 5647 } 5648 5649 static int 5650 xfs_bmap_shift_update_extent( 5651 struct xfs_inode *ip, 5652 int whichfork, 5653 struct xfs_iext_cursor *icur, 5654 struct xfs_bmbt_irec *got, 5655 struct xfs_btree_cur *cur, 5656 int *logflags, 5657 struct xfs_defer_ops *dfops, 5658 xfs_fileoff_t startoff) 5659 { 5660 struct xfs_mount *mp = ip->i_mount; 5661 struct xfs_bmbt_irec prev = *got; 5662 int error, i; 5663 5664 *logflags |= XFS_ILOG_CORE; 5665 5666 got->br_startoff = startoff; 5667 5668 if (cur) { 5669 error = xfs_bmbt_lookup_eq(cur, &prev, &i); 5670 if (error) 5671 return error; 5672 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 5673 5674 error = xfs_bmbt_update(cur, got); 5675 if (error) 5676 return error; 5677 } else { 5678 *logflags |= XFS_ILOG_DEXT; 5679 } 5680 5681 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), icur, 5682 got); 5683 5684 /* update reverse mapping */ 5685 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &prev); 5686 if (error) 5687 return error; 5688 return xfs_rmap_map_extent(mp, dfops, ip, whichfork, got); 5689 } 5690 5691 int 5692 xfs_bmap_collapse_extents( 5693 struct xfs_trans *tp, 5694 struct xfs_inode *ip, 5695 xfs_fileoff_t *next_fsb, 5696 xfs_fileoff_t offset_shift_fsb, 5697 bool *done, 5698 xfs_fsblock_t *firstblock, 5699 struct xfs_defer_ops *dfops) 5700 { 5701 int whichfork = XFS_DATA_FORK; 5702 struct xfs_mount *mp = ip->i_mount; 5703 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 5704 struct xfs_btree_cur *cur = NULL; 5705 struct xfs_bmbt_irec got, prev; 5706 struct xfs_iext_cursor icur; 5707 xfs_fileoff_t new_startoff; 5708 int error = 0; 5709 int logflags = 0; 5710 5711 if (unlikely(XFS_TEST_ERROR( 5712 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5713 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5714 mp, XFS_ERRTAG_BMAPIFORMAT))) { 5715 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 5716 return -EFSCORRUPTED; 5717 } 5718 5719 if (XFS_FORCED_SHUTDOWN(mp)) 5720 return -EIO; 5721 5722 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); 5723 5724 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5725 error = xfs_iread_extents(tp, ip, whichfork); 5726 if (error) 5727 return error; 5728 } 5729 5730 if (ifp->if_flags & XFS_IFBROOT) { 5731 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5732 cur->bc_private.b.firstblock = *firstblock; 5733 cur->bc_private.b.dfops = dfops; 5734 cur->bc_private.b.flags = 0; 5735 } 5736 5737 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 5738 *done = true; 5739 goto del_cursor; 5740 } 5741 XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), 5742 del_cursor); 5743 5744 new_startoff = got.br_startoff - offset_shift_fsb; 5745 if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { 5746 if (new_startoff < prev.br_startoff + prev.br_blockcount) { 5747 error = -EINVAL; 5748 goto del_cursor; 5749 } 5750 5751 if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) { 5752 error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb, 5753 &icur, &got, &prev, cur, &logflags, 5754 dfops); 5755 if (error) 5756 goto del_cursor; 5757 goto done; 5758 } 5759 } else { 5760 if (got.br_startoff < offset_shift_fsb) { 5761 error = -EINVAL; 5762 goto del_cursor; 5763 } 5764 } 5765 5766 error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, 5767 &logflags, dfops, new_startoff); 5768 if (error) 5769 goto del_cursor; 5770 5771 done: 5772 if (!xfs_iext_next_extent(ifp, &icur, &got)) { 5773 *done = true; 5774 goto del_cursor; 5775 } 5776 5777 *next_fsb = got.br_startoff; 5778 del_cursor: 5779 if (cur) 5780 xfs_btree_del_cursor(cur, 5781 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5782 if (logflags) 5783 xfs_trans_log_inode(tp, ip, logflags); 5784 return error; 5785 } 5786 5787 int 5788 xfs_bmap_insert_extents( 5789 struct xfs_trans *tp, 5790 struct xfs_inode *ip, 5791 xfs_fileoff_t *next_fsb, 5792 xfs_fileoff_t offset_shift_fsb, 5793 bool *done, 5794 xfs_fileoff_t stop_fsb, 5795 xfs_fsblock_t *firstblock, 5796 struct xfs_defer_ops *dfops) 5797 { 5798 int whichfork = XFS_DATA_FORK; 5799 struct xfs_mount *mp = ip->i_mount; 5800 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 5801 struct xfs_btree_cur *cur = NULL; 5802 struct xfs_bmbt_irec got, next; 5803 struct xfs_iext_cursor icur; 5804 xfs_fileoff_t new_startoff; 5805 int error = 0; 5806 int logflags = 0; 5807 5808 if (unlikely(XFS_TEST_ERROR( 5809 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5810 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5811 mp, XFS_ERRTAG_BMAPIFORMAT))) { 5812 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 5813 return -EFSCORRUPTED; 5814 } 5815 5816 if (XFS_FORCED_SHUTDOWN(mp)) 5817 return -EIO; 5818 5819 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); 5820 5821 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5822 error = xfs_iread_extents(tp, ip, whichfork); 5823 if (error) 5824 return error; 5825 } 5826 5827 if (ifp->if_flags & XFS_IFBROOT) { 5828 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5829 cur->bc_private.b.firstblock = *firstblock; 5830 cur->bc_private.b.dfops = dfops; 5831 cur->bc_private.b.flags = 0; 5832 } 5833 5834 if (*next_fsb == NULLFSBLOCK) { 5835 xfs_iext_last(ifp, &icur); 5836 if (!xfs_iext_get_extent(ifp, &icur, &got) || 5837 stop_fsb > got.br_startoff) { 5838 *done = true; 5839 goto del_cursor; 5840 } 5841 } else { 5842 if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { 5843 *done = true; 5844 goto del_cursor; 5845 } 5846 } 5847 XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), 5848 del_cursor); 5849 5850 if (stop_fsb >= got.br_startoff + got.br_blockcount) { 5851 error = -EIO; 5852 goto del_cursor; 5853 } 5854 5855 new_startoff = got.br_startoff + offset_shift_fsb; 5856 if (xfs_iext_peek_next_extent(ifp, &icur, &next)) { 5857 if (new_startoff + got.br_blockcount > next.br_startoff) { 5858 error = -EINVAL; 5859 goto del_cursor; 5860 } 5861 5862 /* 5863 * Unlike a left shift (which involves a hole punch), a right 5864 * shift does not modify extent neighbors in any way. We should 5865 * never find mergeable extents in this scenario. Check anyways 5866 * and warn if we encounter two extents that could be one. 5867 */ 5868 if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb)) 5869 WARN_ON_ONCE(1); 5870 } 5871 5872 error = xfs_bmap_shift_update_extent(ip, whichfork, &icur, &got, cur, 5873 &logflags, dfops, new_startoff); 5874 if (error) 5875 goto del_cursor; 5876 5877 if (!xfs_iext_prev_extent(ifp, &icur, &got) || 5878 stop_fsb >= got.br_startoff + got.br_blockcount) { 5879 *done = true; 5880 goto del_cursor; 5881 } 5882 5883 *next_fsb = got.br_startoff; 5884 del_cursor: 5885 if (cur) 5886 xfs_btree_del_cursor(cur, 5887 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5888 if (logflags) 5889 xfs_trans_log_inode(tp, ip, logflags); 5890 return error; 5891 } 5892 5893 /* 5894 * Splits an extent into two extents at split_fsb block such that it is the 5895 * first block of the current_ext. @ext is a target extent to be split. 5896 * @split_fsb is a block where the extents is split. If split_fsb lies in a 5897 * hole or the first block of extents, just return 0. 5898 */ 5899 STATIC int 5900 xfs_bmap_split_extent_at( 5901 struct xfs_trans *tp, 5902 struct xfs_inode *ip, 5903 xfs_fileoff_t split_fsb, 5904 xfs_fsblock_t *firstfsb, 5905 struct xfs_defer_ops *dfops) 5906 { 5907 int whichfork = XFS_DATA_FORK; 5908 struct xfs_btree_cur *cur = NULL; 5909 struct xfs_bmbt_irec got; 5910 struct xfs_bmbt_irec new; /* split extent */ 5911 struct xfs_mount *mp = ip->i_mount; 5912 struct xfs_ifork *ifp; 5913 xfs_fsblock_t gotblkcnt; /* new block count for got */ 5914 struct xfs_iext_cursor icur; 5915 int error = 0; 5916 int logflags = 0; 5917 int i = 0; 5918 5919 if (unlikely(XFS_TEST_ERROR( 5920 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 5921 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 5922 mp, XFS_ERRTAG_BMAPIFORMAT))) { 5923 XFS_ERROR_REPORT("xfs_bmap_split_extent_at", 5924 XFS_ERRLEVEL_LOW, mp); 5925 return -EFSCORRUPTED; 5926 } 5927 5928 if (XFS_FORCED_SHUTDOWN(mp)) 5929 return -EIO; 5930 5931 ifp = XFS_IFORK_PTR(ip, whichfork); 5932 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 5933 /* Read in all the extents */ 5934 error = xfs_iread_extents(tp, ip, whichfork); 5935 if (error) 5936 return error; 5937 } 5938 5939 /* 5940 * If there are not extents, or split_fsb lies in a hole we are done. 5941 */ 5942 if (!xfs_iext_lookup_extent(ip, ifp, split_fsb, &icur, &got) || 5943 got.br_startoff >= split_fsb) 5944 return 0; 5945 5946 gotblkcnt = split_fsb - got.br_startoff; 5947 new.br_startoff = split_fsb; 5948 new.br_startblock = got.br_startblock + gotblkcnt; 5949 new.br_blockcount = got.br_blockcount - gotblkcnt; 5950 new.br_state = got.br_state; 5951 5952 if (ifp->if_flags & XFS_IFBROOT) { 5953 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5954 cur->bc_private.b.firstblock = *firstfsb; 5955 cur->bc_private.b.dfops = dfops; 5956 cur->bc_private.b.flags = 0; 5957 error = xfs_bmbt_lookup_eq(cur, &got, &i); 5958 if (error) 5959 goto del_cursor; 5960 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); 5961 } 5962 5963 got.br_blockcount = gotblkcnt; 5964 xfs_iext_update_extent(ip, xfs_bmap_fork_to_state(whichfork), &icur, 5965 &got); 5966 5967 logflags = XFS_ILOG_CORE; 5968 if (cur) { 5969 error = xfs_bmbt_update(cur, &got); 5970 if (error) 5971 goto del_cursor; 5972 } else 5973 logflags |= XFS_ILOG_DEXT; 5974 5975 /* Add new extent */ 5976 xfs_iext_next(ifp, &icur); 5977 xfs_iext_insert(ip, &icur, &new, 0); 5978 XFS_IFORK_NEXT_SET(ip, whichfork, 5979 XFS_IFORK_NEXTENTS(ip, whichfork) + 1); 5980 5981 if (cur) { 5982 error = xfs_bmbt_lookup_eq(cur, &new, &i); 5983 if (error) 5984 goto del_cursor; 5985 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor); 5986 error = xfs_btree_insert(cur, &i); 5987 if (error) 5988 goto del_cursor; 5989 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor); 5990 } 5991 5992 /* 5993 * Convert to a btree if necessary. 5994 */ 5995 if (xfs_bmap_needs_btree(ip, whichfork)) { 5996 int tmp_logflags; /* partial log flag return val */ 5997 5998 ASSERT(cur == NULL); 5999 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops, 6000 &cur, 0, &tmp_logflags, whichfork); 6001 logflags |= tmp_logflags; 6002 } 6003 6004 del_cursor: 6005 if (cur) { 6006 cur->bc_private.b.allocated = 0; 6007 xfs_btree_del_cursor(cur, 6008 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 6009 } 6010 6011 if (logflags) 6012 xfs_trans_log_inode(tp, ip, logflags); 6013 return error; 6014 } 6015 6016 int 6017 xfs_bmap_split_extent( 6018 struct xfs_inode *ip, 6019 xfs_fileoff_t split_fsb) 6020 { 6021 struct xfs_mount *mp = ip->i_mount; 6022 struct xfs_trans *tp; 6023 struct xfs_defer_ops dfops; 6024 xfs_fsblock_t firstfsb; 6025 int error; 6026 6027 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 6028 XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp); 6029 if (error) 6030 return error; 6031 6032 xfs_ilock(ip, XFS_ILOCK_EXCL); 6033 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 6034 6035 xfs_defer_init(&dfops, &firstfsb); 6036 6037 error = xfs_bmap_split_extent_at(tp, ip, split_fsb, 6038 &firstfsb, &dfops); 6039 if (error) 6040 goto out; 6041 6042 error = xfs_defer_finish(&tp, &dfops); 6043 if (error) 6044 goto out; 6045 6046 return xfs_trans_commit(tp); 6047 6048 out: 6049 xfs_defer_cancel(&dfops); 6050 xfs_trans_cancel(tp); 6051 return error; 6052 } 6053 6054 /* Deferred mapping is only for real extents in the data fork. */ 6055 static bool 6056 xfs_bmap_is_update_needed( 6057 struct xfs_bmbt_irec *bmap) 6058 { 6059 return bmap->br_startblock != HOLESTARTBLOCK && 6060 bmap->br_startblock != DELAYSTARTBLOCK; 6061 } 6062 6063 /* Record a bmap intent. */ 6064 static int 6065 __xfs_bmap_add( 6066 struct xfs_mount *mp, 6067 struct xfs_defer_ops *dfops, 6068 enum xfs_bmap_intent_type type, 6069 struct xfs_inode *ip, 6070 int whichfork, 6071 struct xfs_bmbt_irec *bmap) 6072 { 6073 int error; 6074 struct xfs_bmap_intent *bi; 6075 6076 trace_xfs_bmap_defer(mp, 6077 XFS_FSB_TO_AGNO(mp, bmap->br_startblock), 6078 type, 6079 XFS_FSB_TO_AGBNO(mp, bmap->br_startblock), 6080 ip->i_ino, whichfork, 6081 bmap->br_startoff, 6082 bmap->br_blockcount, 6083 bmap->br_state); 6084 6085 bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS); 6086 INIT_LIST_HEAD(&bi->bi_list); 6087 bi->bi_type = type; 6088 bi->bi_owner = ip; 6089 bi->bi_whichfork = whichfork; 6090 bi->bi_bmap = *bmap; 6091 6092 error = xfs_defer_ijoin(dfops, bi->bi_owner); 6093 if (error) { 6094 kmem_free(bi); 6095 return error; 6096 } 6097 6098 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list); 6099 return 0; 6100 } 6101 6102 /* Map an extent into a file. */ 6103 int 6104 xfs_bmap_map_extent( 6105 struct xfs_mount *mp, 6106 struct xfs_defer_ops *dfops, 6107 struct xfs_inode *ip, 6108 struct xfs_bmbt_irec *PREV) 6109 { 6110 if (!xfs_bmap_is_update_needed(PREV)) 6111 return 0; 6112 6113 return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip, 6114 XFS_DATA_FORK, PREV); 6115 } 6116 6117 /* Unmap an extent out of a file. */ 6118 int 6119 xfs_bmap_unmap_extent( 6120 struct xfs_mount *mp, 6121 struct xfs_defer_ops *dfops, 6122 struct xfs_inode *ip, 6123 struct xfs_bmbt_irec *PREV) 6124 { 6125 if (!xfs_bmap_is_update_needed(PREV)) 6126 return 0; 6127 6128 return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip, 6129 XFS_DATA_FORK, PREV); 6130 } 6131 6132 /* 6133 * Process one of the deferred bmap operations. We pass back the 6134 * btree cursor to maintain our lock on the bmapbt between calls. 6135 */ 6136 int 6137 xfs_bmap_finish_one( 6138 struct xfs_trans *tp, 6139 struct xfs_defer_ops *dfops, 6140 struct xfs_inode *ip, 6141 enum xfs_bmap_intent_type type, 6142 int whichfork, 6143 xfs_fileoff_t startoff, 6144 xfs_fsblock_t startblock, 6145 xfs_filblks_t *blockcount, 6146 xfs_exntst_t state) 6147 { 6148 xfs_fsblock_t firstfsb; 6149 int error = 0; 6150 6151 /* 6152 * firstfsb is tied to the transaction lifetime and is used to 6153 * ensure correct AG locking order and schedule work item 6154 * continuations. XFS_BUI_MAX_FAST_EXTENTS (== 1) restricts us 6155 * to only making one bmap call per transaction, so it should 6156 * be safe to have it as a local variable here. 6157 */ 6158 firstfsb = NULLFSBLOCK; 6159 6160 trace_xfs_bmap_deferred(tp->t_mountp, 6161 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, 6162 XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 6163 ip->i_ino, whichfork, startoff, *blockcount, state); 6164 6165 if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK)) 6166 return -EFSCORRUPTED; 6167 6168 if (XFS_TEST_ERROR(false, tp->t_mountp, 6169 XFS_ERRTAG_BMAP_FINISH_ONE)) 6170 return -EIO; 6171 6172 switch (type) { 6173 case XFS_BMAP_MAP: 6174 error = xfs_bmapi_remap(tp, ip, startoff, *blockcount, 6175 startblock, dfops, 0); 6176 *blockcount = 0; 6177 break; 6178 case XFS_BMAP_UNMAP: 6179 error = __xfs_bunmapi(tp, ip, startoff, blockcount, 6180 XFS_BMAPI_REMAP, 1, &firstfsb, dfops); 6181 break; 6182 default: 6183 ASSERT(0); 6184 error = -EFSCORRUPTED; 6185 } 6186 6187 return error; 6188 } 6189 6190 /* Check that an inode's extent does not have invalid flags or bad ranges. */ 6191 xfs_failaddr_t 6192 xfs_bmap_validate_extent( 6193 struct xfs_inode *ip, 6194 int whichfork, 6195 struct xfs_bmbt_irec *irec) 6196 { 6197 struct xfs_mount *mp = ip->i_mount; 6198 xfs_fsblock_t endfsb; 6199 bool isrt; 6200 6201 isrt = XFS_IS_REALTIME_INODE(ip); 6202 endfsb = irec->br_startblock + irec->br_blockcount - 1; 6203 if (isrt) { 6204 if (!xfs_verify_rtbno(mp, irec->br_startblock)) 6205 return __this_address; 6206 if (!xfs_verify_rtbno(mp, endfsb)) 6207 return __this_address; 6208 } else { 6209 if (!xfs_verify_fsbno(mp, irec->br_startblock)) 6210 return __this_address; 6211 if (!xfs_verify_fsbno(mp, endfsb)) 6212 return __this_address; 6213 if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) != 6214 XFS_FSB_TO_AGNO(mp, endfsb)) 6215 return __this_address; 6216 } 6217 if (irec->br_state != XFS_EXT_NORM) { 6218 if (whichfork != XFS_DATA_FORK) 6219 return __this_address; 6220 if (!xfs_sb_version_hasextflgbit(&mp->m_sb)) 6221 return __this_address; 6222 } 6223 return NULL; 6224 } 6225