1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 26 /* Allowable refcount adjustment amounts. */ 27 enum xfs_refc_adjust_op { 28 XFS_REFCOUNT_ADJUST_INCREASE = 1, 29 XFS_REFCOUNT_ADJUST_DECREASE = -1, 30 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 31 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 32 }; 33 34 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 35 xfs_agblock_t agbno, xfs_extlen_t aglen); 36 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 37 xfs_agblock_t agbno, xfs_extlen_t aglen); 38 39 /* 40 * Look up the first record less than or equal to [bno, len] in the btree 41 * given by cur. 42 */ 43 int 44 xfs_refcount_lookup_le( 45 struct xfs_btree_cur *cur, 46 xfs_agblock_t bno, 47 int *stat) 48 { 49 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, 50 XFS_LOOKUP_LE); 51 cur->bc_rec.rc.rc_startblock = bno; 52 cur->bc_rec.rc.rc_blockcount = 0; 53 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 54 } 55 56 /* 57 * Look up the first record greater than or equal to [bno, len] in the btree 58 * given by cur. 59 */ 60 int 61 xfs_refcount_lookup_ge( 62 struct xfs_btree_cur *cur, 63 xfs_agblock_t bno, 64 int *stat) 65 { 66 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, 67 XFS_LOOKUP_GE); 68 cur->bc_rec.rc.rc_startblock = bno; 69 cur->bc_rec.rc.rc_blockcount = 0; 70 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 71 } 72 73 /* 74 * Look up the first record equal to [bno, len] in the btree 75 * given by cur. 76 */ 77 int 78 xfs_refcount_lookup_eq( 79 struct xfs_btree_cur *cur, 80 xfs_agblock_t bno, 81 int *stat) 82 { 83 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, 84 XFS_LOOKUP_LE); 85 cur->bc_rec.rc.rc_startblock = bno; 86 cur->bc_rec.rc.rc_blockcount = 0; 87 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 88 } 89 90 /* Convert on-disk record to in-core format. */ 91 void 92 xfs_refcount_btrec_to_irec( 93 union xfs_btree_rec *rec, 94 struct xfs_refcount_irec *irec) 95 { 96 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 97 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 98 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 99 } 100 101 /* 102 * Get the data from the pointed-to record. 103 */ 104 int 105 xfs_refcount_get_rec( 106 struct xfs_btree_cur *cur, 107 struct xfs_refcount_irec *irec, 108 int *stat) 109 { 110 struct xfs_mount *mp = cur->bc_mp; 111 xfs_agnumber_t agno = cur->bc_ag.agno; 112 union xfs_btree_rec *rec; 113 int error; 114 xfs_agblock_t realstart; 115 116 error = xfs_btree_get_rec(cur, &rec, stat); 117 if (error || !*stat) 118 return error; 119 120 xfs_refcount_btrec_to_irec(rec, irec); 121 122 agno = cur->bc_ag.agno; 123 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 124 goto out_bad_rec; 125 126 /* handle special COW-staging state */ 127 realstart = irec->rc_startblock; 128 if (realstart & XFS_REFC_COW_START) { 129 if (irec->rc_refcount != 1) 130 goto out_bad_rec; 131 realstart &= ~XFS_REFC_COW_START; 132 } else if (irec->rc_refcount < 2) { 133 goto out_bad_rec; 134 } 135 136 /* check for valid extent range, including overflow */ 137 if (!xfs_verify_agbno(mp, agno, realstart)) 138 goto out_bad_rec; 139 if (realstart > realstart + irec->rc_blockcount) 140 goto out_bad_rec; 141 if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) 142 goto out_bad_rec; 143 144 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 145 goto out_bad_rec; 146 147 trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.agno, irec); 148 return 0; 149 150 out_bad_rec: 151 xfs_warn(mp, 152 "Refcount BTree record corruption in AG %d detected!", agno); 153 xfs_warn(mp, 154 "Start block 0x%x, block count 0x%x, references 0x%x", 155 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 156 return -EFSCORRUPTED; 157 } 158 159 /* 160 * Update the record referred to by cur to the value given 161 * by [bno, len, refcount]. 162 * This either works (return 0) or gets an EFSCORRUPTED error. 163 */ 164 STATIC int 165 xfs_refcount_update( 166 struct xfs_btree_cur *cur, 167 struct xfs_refcount_irec *irec) 168 { 169 union xfs_btree_rec rec; 170 int error; 171 172 trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.agno, irec); 173 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 174 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 175 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 176 error = xfs_btree_update(cur, &rec); 177 if (error) 178 trace_xfs_refcount_update_error(cur->bc_mp, 179 cur->bc_ag.agno, error, _RET_IP_); 180 return error; 181 } 182 183 /* 184 * Insert the record referred to by cur to the value given 185 * by [bno, len, refcount]. 186 * This either works (return 0) or gets an EFSCORRUPTED error. 187 */ 188 int 189 xfs_refcount_insert( 190 struct xfs_btree_cur *cur, 191 struct xfs_refcount_irec *irec, 192 int *i) 193 { 194 int error; 195 196 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.agno, irec); 197 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 198 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 199 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 200 error = xfs_btree_insert(cur, i); 201 if (error) 202 goto out_error; 203 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 204 error = -EFSCORRUPTED; 205 goto out_error; 206 } 207 208 out_error: 209 if (error) 210 trace_xfs_refcount_insert_error(cur->bc_mp, 211 cur->bc_ag.agno, error, _RET_IP_); 212 return error; 213 } 214 215 /* 216 * Remove the record referred to by cur, then set the pointer to the spot 217 * where the record could be re-inserted, in case we want to increment or 218 * decrement the cursor. 219 * This either works (return 0) or gets an EFSCORRUPTED error. 220 */ 221 STATIC int 222 xfs_refcount_delete( 223 struct xfs_btree_cur *cur, 224 int *i) 225 { 226 struct xfs_refcount_irec irec; 227 int found_rec; 228 int error; 229 230 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 231 if (error) 232 goto out_error; 233 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 234 error = -EFSCORRUPTED; 235 goto out_error; 236 } 237 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.agno, &irec); 238 error = xfs_btree_delete(cur, i); 239 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 240 error = -EFSCORRUPTED; 241 goto out_error; 242 } 243 if (error) 244 goto out_error; 245 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 246 out_error: 247 if (error) 248 trace_xfs_refcount_delete_error(cur->bc_mp, 249 cur->bc_ag.agno, error, _RET_IP_); 250 return error; 251 } 252 253 /* 254 * Adjusting the Reference Count 255 * 256 * As stated elsewhere, the reference count btree (refcbt) stores 257 * >1 reference counts for extents of physical blocks. In this 258 * operation, we're either raising or lowering the reference count of 259 * some subrange stored in the tree: 260 * 261 * <------ adjustment range ------> 262 * ----+ +---+-----+ +--+--------+--------- 263 * 2 | | 3 | 4 | |17| 55 | 10 264 * ----+ +---+-----+ +--+--------+--------- 265 * X axis is physical blocks number; 266 * reference counts are the numbers inside the rectangles 267 * 268 * The first thing we need to do is to ensure that there are no 269 * refcount extents crossing either boundary of the range to be 270 * adjusted. For any extent that does cross a boundary, split it into 271 * two extents so that we can increment the refcount of one of the 272 * pieces later: 273 * 274 * <------ adjustment range ------> 275 * ----+ +---+-----+ +--+--------+----+---- 276 * 2 | | 3 | 2 | |17| 55 | 10 | 10 277 * ----+ +---+-----+ +--+--------+----+---- 278 * 279 * For this next step, let's assume that all the physical blocks in 280 * the adjustment range are mapped to a file and are therefore in use 281 * at least once. Therefore, we can infer that any gap in the 282 * refcount tree within the adjustment range represents a physical 283 * extent with refcount == 1: 284 * 285 * <------ adjustment range ------> 286 * ----+---+---+-----+-+--+--------+----+---- 287 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 288 * ----+---+---+-----+-+--+--------+----+---- 289 * ^ 290 * 291 * For each extent that falls within the interval range, figure out 292 * which extent is to the left or the right of that extent. Now we 293 * have a left, current, and right extent. If the new reference count 294 * of the center extent enables us to merge left, center, and right 295 * into one record covering all three, do so. If the center extent is 296 * at the left end of the range, abuts the left extent, and its new 297 * reference count matches the left extent's record, then merge them. 298 * If the center extent is at the right end of the range, abuts the 299 * right extent, and the reference counts match, merge those. In the 300 * example, we can left merge (assuming an increment operation): 301 * 302 * <------ adjustment range ------> 303 * --------+---+-----+-+--+--------+----+---- 304 * 2 | 3 | 2 |1|17| 55 | 10 | 10 305 * --------+---+-----+-+--+--------+----+---- 306 * ^ 307 * 308 * For all other extents within the range, adjust the reference count 309 * or delete it if the refcount falls below 2. If we were 310 * incrementing, the end result looks like this: 311 * 312 * <------ adjustment range ------> 313 * --------+---+-----+-+--+--------+----+---- 314 * 2 | 4 | 3 |2|18| 56 | 11 | 10 315 * --------+---+-----+-+--+--------+----+---- 316 * 317 * The result of a decrement operation looks as such: 318 * 319 * <------ adjustment range ------> 320 * ----+ +---+ +--+--------+----+---- 321 * 2 | | 2 | |16| 54 | 9 | 10 322 * ----+ +---+ +--+--------+----+---- 323 * DDDD 111111DD 324 * 325 * The blocks marked "D" are freed; the blocks marked "1" are only 326 * referenced once and therefore the record is removed from the 327 * refcount btree. 328 */ 329 330 /* Next block after this extent. */ 331 static inline xfs_agblock_t 332 xfs_refc_next( 333 struct xfs_refcount_irec *rc) 334 { 335 return rc->rc_startblock + rc->rc_blockcount; 336 } 337 338 /* 339 * Split a refcount extent that crosses agbno. 340 */ 341 STATIC int 342 xfs_refcount_split_extent( 343 struct xfs_btree_cur *cur, 344 xfs_agblock_t agbno, 345 bool *shape_changed) 346 { 347 struct xfs_refcount_irec rcext, tmp; 348 int found_rec; 349 int error; 350 351 *shape_changed = false; 352 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 353 if (error) 354 goto out_error; 355 if (!found_rec) 356 return 0; 357 358 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 359 if (error) 360 goto out_error; 361 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 362 error = -EFSCORRUPTED; 363 goto out_error; 364 } 365 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 366 return 0; 367 368 *shape_changed = true; 369 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.agno, 370 &rcext, agbno); 371 372 /* Establish the right extent. */ 373 tmp = rcext; 374 tmp.rc_startblock = agbno; 375 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 376 error = xfs_refcount_update(cur, &tmp); 377 if (error) 378 goto out_error; 379 380 /* Insert the left extent. */ 381 tmp = rcext; 382 tmp.rc_blockcount = agbno - rcext.rc_startblock; 383 error = xfs_refcount_insert(cur, &tmp, &found_rec); 384 if (error) 385 goto out_error; 386 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 387 error = -EFSCORRUPTED; 388 goto out_error; 389 } 390 return error; 391 392 out_error: 393 trace_xfs_refcount_split_extent_error(cur->bc_mp, 394 cur->bc_ag.agno, error, _RET_IP_); 395 return error; 396 } 397 398 /* 399 * Merge the left, center, and right extents. 400 */ 401 STATIC int 402 xfs_refcount_merge_center_extents( 403 struct xfs_btree_cur *cur, 404 struct xfs_refcount_irec *left, 405 struct xfs_refcount_irec *center, 406 struct xfs_refcount_irec *right, 407 unsigned long long extlen, 408 xfs_extlen_t *aglen) 409 { 410 int error; 411 int found_rec; 412 413 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 414 cur->bc_ag.agno, left, center, right); 415 416 /* 417 * Make sure the center and right extents are not in the btree. 418 * If the center extent was synthesized, the first delete call 419 * removes the right extent and we skip the second deletion. 420 * If center and right were in the btree, then the first delete 421 * call removes the center and the second one removes the right 422 * extent. 423 */ 424 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 425 &found_rec); 426 if (error) 427 goto out_error; 428 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 429 error = -EFSCORRUPTED; 430 goto out_error; 431 } 432 433 error = xfs_refcount_delete(cur, &found_rec); 434 if (error) 435 goto out_error; 436 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 437 error = -EFSCORRUPTED; 438 goto out_error; 439 } 440 441 if (center->rc_refcount > 1) { 442 error = xfs_refcount_delete(cur, &found_rec); 443 if (error) 444 goto out_error; 445 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 446 error = -EFSCORRUPTED; 447 goto out_error; 448 } 449 } 450 451 /* Enlarge the left extent. */ 452 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 453 &found_rec); 454 if (error) 455 goto out_error; 456 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 457 error = -EFSCORRUPTED; 458 goto out_error; 459 } 460 461 left->rc_blockcount = extlen; 462 error = xfs_refcount_update(cur, left); 463 if (error) 464 goto out_error; 465 466 *aglen = 0; 467 return error; 468 469 out_error: 470 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 471 cur->bc_ag.agno, error, _RET_IP_); 472 return error; 473 } 474 475 /* 476 * Merge with the left extent. 477 */ 478 STATIC int 479 xfs_refcount_merge_left_extent( 480 struct xfs_btree_cur *cur, 481 struct xfs_refcount_irec *left, 482 struct xfs_refcount_irec *cleft, 483 xfs_agblock_t *agbno, 484 xfs_extlen_t *aglen) 485 { 486 int error; 487 int found_rec; 488 489 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 490 cur->bc_ag.agno, left, cleft); 491 492 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 493 if (cleft->rc_refcount > 1) { 494 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 495 &found_rec); 496 if (error) 497 goto out_error; 498 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 499 error = -EFSCORRUPTED; 500 goto out_error; 501 } 502 503 error = xfs_refcount_delete(cur, &found_rec); 504 if (error) 505 goto out_error; 506 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 507 error = -EFSCORRUPTED; 508 goto out_error; 509 } 510 } 511 512 /* Enlarge the left extent. */ 513 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 514 &found_rec); 515 if (error) 516 goto out_error; 517 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 518 error = -EFSCORRUPTED; 519 goto out_error; 520 } 521 522 left->rc_blockcount += cleft->rc_blockcount; 523 error = xfs_refcount_update(cur, left); 524 if (error) 525 goto out_error; 526 527 *agbno += cleft->rc_blockcount; 528 *aglen -= cleft->rc_blockcount; 529 return error; 530 531 out_error: 532 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 533 cur->bc_ag.agno, error, _RET_IP_); 534 return error; 535 } 536 537 /* 538 * Merge with the right extent. 539 */ 540 STATIC int 541 xfs_refcount_merge_right_extent( 542 struct xfs_btree_cur *cur, 543 struct xfs_refcount_irec *right, 544 struct xfs_refcount_irec *cright, 545 xfs_extlen_t *aglen) 546 { 547 int error; 548 int found_rec; 549 550 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 551 cur->bc_ag.agno, cright, right); 552 553 /* 554 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 555 * remove it. 556 */ 557 if (cright->rc_refcount > 1) { 558 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 559 &found_rec); 560 if (error) 561 goto out_error; 562 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 563 error = -EFSCORRUPTED; 564 goto out_error; 565 } 566 567 error = xfs_refcount_delete(cur, &found_rec); 568 if (error) 569 goto out_error; 570 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 571 error = -EFSCORRUPTED; 572 goto out_error; 573 } 574 } 575 576 /* Enlarge the right extent. */ 577 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 578 &found_rec); 579 if (error) 580 goto out_error; 581 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 582 error = -EFSCORRUPTED; 583 goto out_error; 584 } 585 586 right->rc_startblock -= cright->rc_blockcount; 587 right->rc_blockcount += cright->rc_blockcount; 588 error = xfs_refcount_update(cur, right); 589 if (error) 590 goto out_error; 591 592 *aglen -= cright->rc_blockcount; 593 return error; 594 595 out_error: 596 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 597 cur->bc_ag.agno, error, _RET_IP_); 598 return error; 599 } 600 601 #define XFS_FIND_RCEXT_SHARED 1 602 #define XFS_FIND_RCEXT_COW 2 603 /* 604 * Find the left extent and the one after it (cleft). This function assumes 605 * that we've already split any extent crossing agbno. 606 */ 607 STATIC int 608 xfs_refcount_find_left_extents( 609 struct xfs_btree_cur *cur, 610 struct xfs_refcount_irec *left, 611 struct xfs_refcount_irec *cleft, 612 xfs_agblock_t agbno, 613 xfs_extlen_t aglen, 614 int flags) 615 { 616 struct xfs_refcount_irec tmp; 617 int error; 618 int found_rec; 619 620 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 621 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 622 if (error) 623 goto out_error; 624 if (!found_rec) 625 return 0; 626 627 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 628 if (error) 629 goto out_error; 630 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 631 error = -EFSCORRUPTED; 632 goto out_error; 633 } 634 635 if (xfs_refc_next(&tmp) != agbno) 636 return 0; 637 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 638 return 0; 639 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 640 return 0; 641 /* We have a left extent; retrieve (or invent) the next right one */ 642 *left = tmp; 643 644 error = xfs_btree_increment(cur, 0, &found_rec); 645 if (error) 646 goto out_error; 647 if (found_rec) { 648 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 649 if (error) 650 goto out_error; 651 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 652 error = -EFSCORRUPTED; 653 goto out_error; 654 } 655 656 /* if tmp starts at the end of our range, just use that */ 657 if (tmp.rc_startblock == agbno) 658 *cleft = tmp; 659 else { 660 /* 661 * There's a gap in the refcntbt at the start of the 662 * range we're interested in (refcount == 1) so 663 * synthesize the implied extent and pass it back. 664 * We assume here that the agbno/aglen range was 665 * passed in from a data fork extent mapping and 666 * therefore is allocated to exactly one owner. 667 */ 668 cleft->rc_startblock = agbno; 669 cleft->rc_blockcount = min(aglen, 670 tmp.rc_startblock - agbno); 671 cleft->rc_refcount = 1; 672 } 673 } else { 674 /* 675 * No extents, so pretend that there's one covering the whole 676 * range. 677 */ 678 cleft->rc_startblock = agbno; 679 cleft->rc_blockcount = aglen; 680 cleft->rc_refcount = 1; 681 } 682 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.agno, 683 left, cleft, agbno); 684 return error; 685 686 out_error: 687 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 688 cur->bc_ag.agno, error, _RET_IP_); 689 return error; 690 } 691 692 /* 693 * Find the right extent and the one before it (cright). This function 694 * assumes that we've already split any extents crossing agbno + aglen. 695 */ 696 STATIC int 697 xfs_refcount_find_right_extents( 698 struct xfs_btree_cur *cur, 699 struct xfs_refcount_irec *right, 700 struct xfs_refcount_irec *cright, 701 xfs_agblock_t agbno, 702 xfs_extlen_t aglen, 703 int flags) 704 { 705 struct xfs_refcount_irec tmp; 706 int error; 707 int found_rec; 708 709 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 710 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 711 if (error) 712 goto out_error; 713 if (!found_rec) 714 return 0; 715 716 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 717 if (error) 718 goto out_error; 719 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 720 error = -EFSCORRUPTED; 721 goto out_error; 722 } 723 724 if (tmp.rc_startblock != agbno + aglen) 725 return 0; 726 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 727 return 0; 728 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 729 return 0; 730 /* We have a right extent; retrieve (or invent) the next left one */ 731 *right = tmp; 732 733 error = xfs_btree_decrement(cur, 0, &found_rec); 734 if (error) 735 goto out_error; 736 if (found_rec) { 737 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 738 if (error) 739 goto out_error; 740 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 741 error = -EFSCORRUPTED; 742 goto out_error; 743 } 744 745 /* if tmp ends at the end of our range, just use that */ 746 if (xfs_refc_next(&tmp) == agbno + aglen) 747 *cright = tmp; 748 else { 749 /* 750 * There's a gap in the refcntbt at the end of the 751 * range we're interested in (refcount == 1) so 752 * create the implied extent and pass it back. 753 * We assume here that the agbno/aglen range was 754 * passed in from a data fork extent mapping and 755 * therefore is allocated to exactly one owner. 756 */ 757 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 758 cright->rc_blockcount = right->rc_startblock - 759 cright->rc_startblock; 760 cright->rc_refcount = 1; 761 } 762 } else { 763 /* 764 * No extents, so pretend that there's one covering the whole 765 * range. 766 */ 767 cright->rc_startblock = agbno; 768 cright->rc_blockcount = aglen; 769 cright->rc_refcount = 1; 770 } 771 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.agno, 772 cright, right, agbno + aglen); 773 return error; 774 775 out_error: 776 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 777 cur->bc_ag.agno, error, _RET_IP_); 778 return error; 779 } 780 781 /* Is this extent valid? */ 782 static inline bool 783 xfs_refc_valid( 784 struct xfs_refcount_irec *rc) 785 { 786 return rc->rc_startblock != NULLAGBLOCK; 787 } 788 789 /* 790 * Try to merge with any extents on the boundaries of the adjustment range. 791 */ 792 STATIC int 793 xfs_refcount_merge_extents( 794 struct xfs_btree_cur *cur, 795 xfs_agblock_t *agbno, 796 xfs_extlen_t *aglen, 797 enum xfs_refc_adjust_op adjust, 798 int flags, 799 bool *shape_changed) 800 { 801 struct xfs_refcount_irec left = {0}, cleft = {0}; 802 struct xfs_refcount_irec cright = {0}, right = {0}; 803 int error; 804 unsigned long long ulen; 805 bool cequal; 806 807 *shape_changed = false; 808 /* 809 * Find the extent just below agbno [left], just above agbno [cleft], 810 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 811 * [right]. 812 */ 813 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 814 *aglen, flags); 815 if (error) 816 return error; 817 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 818 *aglen, flags); 819 if (error) 820 return error; 821 822 /* No left or right extent to merge; exit. */ 823 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 824 return 0; 825 826 cequal = (cleft.rc_startblock == cright.rc_startblock) && 827 (cleft.rc_blockcount == cright.rc_blockcount); 828 829 /* Try to merge left, cleft, and right. cleft must == cright. */ 830 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 831 right.rc_blockcount; 832 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 833 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 834 left.rc_refcount == cleft.rc_refcount + adjust && 835 right.rc_refcount == cleft.rc_refcount + adjust && 836 ulen < MAXREFCEXTLEN) { 837 *shape_changed = true; 838 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 839 &right, ulen, aglen); 840 } 841 842 /* Try to merge left and cleft. */ 843 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 844 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 845 left.rc_refcount == cleft.rc_refcount + adjust && 846 ulen < MAXREFCEXTLEN) { 847 *shape_changed = true; 848 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 849 agbno, aglen); 850 if (error) 851 return error; 852 853 /* 854 * If we just merged left + cleft and cleft == cright, 855 * we no longer have a cright to merge with right. We're done. 856 */ 857 if (cequal) 858 return 0; 859 } 860 861 /* Try to merge cright and right. */ 862 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 863 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 864 right.rc_refcount == cright.rc_refcount + adjust && 865 ulen < MAXREFCEXTLEN) { 866 *shape_changed = true; 867 return xfs_refcount_merge_right_extent(cur, &right, &cright, 868 aglen); 869 } 870 871 return error; 872 } 873 874 /* 875 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 876 * true incorrectly is a shutdown FS; the penalty for guessing false 877 * incorrectly is more transaction rolls than might be necessary. 878 * Be conservative here. 879 */ 880 static bool 881 xfs_refcount_still_have_space( 882 struct xfs_btree_cur *cur) 883 { 884 unsigned long overhead; 885 886 overhead = cur->bc_ag.refc.shape_changes * 887 xfs_allocfree_log_count(cur->bc_mp, 1); 888 overhead *= cur->bc_mp->m_sb.sb_blocksize; 889 890 /* 891 * Only allow 2 refcount extent updates per transaction if the 892 * refcount continue update "error" has been injected. 893 */ 894 if (cur->bc_ag.refc.nr_ops > 2 && 895 XFS_TEST_ERROR(false, cur->bc_mp, 896 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 897 return false; 898 899 if (cur->bc_ag.refc.nr_ops == 0) 900 return true; 901 else if (overhead > cur->bc_tp->t_log_res) 902 return false; 903 return cur->bc_tp->t_log_res - overhead > 904 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 905 } 906 907 /* 908 * Adjust the refcounts of middle extents. At this point we should have 909 * split extents that crossed the adjustment range; merged with adjacent 910 * extents; and updated agbno/aglen to reflect the merges. Therefore, 911 * all we have to do is update the extents inside [agbno, agbno + aglen]. 912 */ 913 STATIC int 914 xfs_refcount_adjust_extents( 915 struct xfs_btree_cur *cur, 916 xfs_agblock_t *agbno, 917 xfs_extlen_t *aglen, 918 enum xfs_refc_adjust_op adj, 919 struct xfs_owner_info *oinfo) 920 { 921 struct xfs_refcount_irec ext, tmp; 922 int error; 923 int found_rec, found_tmp; 924 xfs_fsblock_t fsbno; 925 926 /* Merging did all the work already. */ 927 if (*aglen == 0) 928 return 0; 929 930 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 931 if (error) 932 goto out_error; 933 934 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 935 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 936 if (error) 937 goto out_error; 938 if (!found_rec) { 939 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 940 ext.rc_blockcount = 0; 941 ext.rc_refcount = 0; 942 } 943 944 /* 945 * Deal with a hole in the refcount tree; if a file maps to 946 * these blocks and there's no refcountbt record, pretend that 947 * there is one with refcount == 1. 948 */ 949 if (ext.rc_startblock != *agbno) { 950 tmp.rc_startblock = *agbno; 951 tmp.rc_blockcount = min(*aglen, 952 ext.rc_startblock - *agbno); 953 tmp.rc_refcount = 1 + adj; 954 trace_xfs_refcount_modify_extent(cur->bc_mp, 955 cur->bc_ag.agno, &tmp); 956 957 /* 958 * Either cover the hole (increment) or 959 * delete the range (decrement). 960 */ 961 if (tmp.rc_refcount) { 962 error = xfs_refcount_insert(cur, &tmp, 963 &found_tmp); 964 if (error) 965 goto out_error; 966 if (XFS_IS_CORRUPT(cur->bc_mp, 967 found_tmp != 1)) { 968 error = -EFSCORRUPTED; 969 goto out_error; 970 } 971 cur->bc_ag.refc.nr_ops++; 972 } else { 973 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 974 cur->bc_ag.agno, 975 tmp.rc_startblock); 976 xfs_bmap_add_free(cur->bc_tp, fsbno, 977 tmp.rc_blockcount, oinfo); 978 } 979 980 (*agbno) += tmp.rc_blockcount; 981 (*aglen) -= tmp.rc_blockcount; 982 983 error = xfs_refcount_lookup_ge(cur, *agbno, 984 &found_rec); 985 if (error) 986 goto out_error; 987 } 988 989 /* Stop if there's nothing left to modify */ 990 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 991 break; 992 993 /* 994 * Adjust the reference count and either update the tree 995 * (incr) or free the blocks (decr). 996 */ 997 if (ext.rc_refcount == MAXREFCOUNT) 998 goto skip; 999 ext.rc_refcount += adj; 1000 trace_xfs_refcount_modify_extent(cur->bc_mp, 1001 cur->bc_ag.agno, &ext); 1002 if (ext.rc_refcount > 1) { 1003 error = xfs_refcount_update(cur, &ext); 1004 if (error) 1005 goto out_error; 1006 cur->bc_ag.refc.nr_ops++; 1007 } else if (ext.rc_refcount == 1) { 1008 error = xfs_refcount_delete(cur, &found_rec); 1009 if (error) 1010 goto out_error; 1011 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1012 error = -EFSCORRUPTED; 1013 goto out_error; 1014 } 1015 cur->bc_ag.refc.nr_ops++; 1016 goto advloop; 1017 } else { 1018 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1019 cur->bc_ag.agno, 1020 ext.rc_startblock); 1021 xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, 1022 oinfo); 1023 } 1024 1025 skip: 1026 error = xfs_btree_increment(cur, 0, &found_rec); 1027 if (error) 1028 goto out_error; 1029 1030 advloop: 1031 (*agbno) += ext.rc_blockcount; 1032 (*aglen) -= ext.rc_blockcount; 1033 } 1034 1035 return error; 1036 out_error: 1037 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1038 cur->bc_ag.agno, error, _RET_IP_); 1039 return error; 1040 } 1041 1042 /* Adjust the reference count of a range of AG blocks. */ 1043 STATIC int 1044 xfs_refcount_adjust( 1045 struct xfs_btree_cur *cur, 1046 xfs_agblock_t agbno, 1047 xfs_extlen_t aglen, 1048 xfs_agblock_t *new_agbno, 1049 xfs_extlen_t *new_aglen, 1050 enum xfs_refc_adjust_op adj, 1051 struct xfs_owner_info *oinfo) 1052 { 1053 bool shape_changed; 1054 int shape_changes = 0; 1055 int error; 1056 1057 *new_agbno = agbno; 1058 *new_aglen = aglen; 1059 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1060 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.agno, 1061 agbno, aglen); 1062 else 1063 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.agno, 1064 agbno, aglen); 1065 1066 /* 1067 * Ensure that no rcextents cross the boundary of the adjustment range. 1068 */ 1069 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1070 if (error) 1071 goto out_error; 1072 if (shape_changed) 1073 shape_changes++; 1074 1075 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1076 if (error) 1077 goto out_error; 1078 if (shape_changed) 1079 shape_changes++; 1080 1081 /* 1082 * Try to merge with the left or right extents of the range. 1083 */ 1084 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1085 XFS_FIND_RCEXT_SHARED, &shape_changed); 1086 if (error) 1087 goto out_error; 1088 if (shape_changed) 1089 shape_changes++; 1090 if (shape_changes) 1091 cur->bc_ag.refc.shape_changes++; 1092 1093 /* Now that we've taken care of the ends, adjust the middle extents */ 1094 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1095 adj, oinfo); 1096 if (error) 1097 goto out_error; 1098 1099 return 0; 1100 1101 out_error: 1102 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.agno, 1103 error, _RET_IP_); 1104 return error; 1105 } 1106 1107 /* Clean up after calling xfs_refcount_finish_one. */ 1108 void 1109 xfs_refcount_finish_one_cleanup( 1110 struct xfs_trans *tp, 1111 struct xfs_btree_cur *rcur, 1112 int error) 1113 { 1114 struct xfs_buf *agbp; 1115 1116 if (rcur == NULL) 1117 return; 1118 agbp = rcur->bc_ag.agbp; 1119 xfs_btree_del_cursor(rcur, error); 1120 if (error) 1121 xfs_trans_brelse(tp, agbp); 1122 } 1123 1124 /* 1125 * Process one of the deferred refcount operations. We pass back the 1126 * btree cursor to maintain our lock on the btree between calls. 1127 * This saves time and eliminates a buffer deadlock between the 1128 * superblock and the AGF because we'll always grab them in the same 1129 * order. 1130 */ 1131 int 1132 xfs_refcount_finish_one( 1133 struct xfs_trans *tp, 1134 enum xfs_refcount_intent_type type, 1135 xfs_fsblock_t startblock, 1136 xfs_extlen_t blockcount, 1137 xfs_fsblock_t *new_fsb, 1138 xfs_extlen_t *new_len, 1139 struct xfs_btree_cur **pcur) 1140 { 1141 struct xfs_mount *mp = tp->t_mountp; 1142 struct xfs_btree_cur *rcur; 1143 struct xfs_buf *agbp = NULL; 1144 int error = 0; 1145 xfs_agnumber_t agno; 1146 xfs_agblock_t bno; 1147 xfs_agblock_t new_agbno; 1148 unsigned long nr_ops = 0; 1149 int shape_changes = 0; 1150 1151 agno = XFS_FSB_TO_AGNO(mp, startblock); 1152 ASSERT(agno != NULLAGNUMBER); 1153 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1154 1155 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1156 type, XFS_FSB_TO_AGBNO(mp, startblock), 1157 blockcount); 1158 1159 if (XFS_TEST_ERROR(false, mp, 1160 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1161 return -EIO; 1162 1163 /* 1164 * If we haven't gotten a cursor or the cursor AG doesn't match 1165 * the startblock, get one now. 1166 */ 1167 rcur = *pcur; 1168 if (rcur != NULL && rcur->bc_ag.agno != agno) { 1169 nr_ops = rcur->bc_ag.refc.nr_ops; 1170 shape_changes = rcur->bc_ag.refc.shape_changes; 1171 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1172 rcur = NULL; 1173 *pcur = NULL; 1174 } 1175 if (rcur == NULL) { 1176 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1177 XFS_ALLOC_FLAG_FREEING, &agbp); 1178 if (error) 1179 return error; 1180 1181 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); 1182 rcur->bc_ag.refc.nr_ops = nr_ops; 1183 rcur->bc_ag.refc.shape_changes = shape_changes; 1184 } 1185 *pcur = rcur; 1186 1187 switch (type) { 1188 case XFS_REFCOUNT_INCREASE: 1189 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1190 new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); 1191 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1192 break; 1193 case XFS_REFCOUNT_DECREASE: 1194 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1195 new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); 1196 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1197 break; 1198 case XFS_REFCOUNT_ALLOC_COW: 1199 *new_fsb = startblock + blockcount; 1200 *new_len = 0; 1201 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1202 break; 1203 case XFS_REFCOUNT_FREE_COW: 1204 *new_fsb = startblock + blockcount; 1205 *new_len = 0; 1206 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1207 break; 1208 default: 1209 ASSERT(0); 1210 error = -EFSCORRUPTED; 1211 } 1212 if (!error && *new_len > 0) 1213 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1214 bno, blockcount, new_agbno, *new_len); 1215 return error; 1216 } 1217 1218 /* 1219 * Record a refcount intent for later processing. 1220 */ 1221 static void 1222 __xfs_refcount_add( 1223 struct xfs_trans *tp, 1224 enum xfs_refcount_intent_type type, 1225 xfs_fsblock_t startblock, 1226 xfs_extlen_t blockcount) 1227 { 1228 struct xfs_refcount_intent *ri; 1229 1230 trace_xfs_refcount_defer(tp->t_mountp, 1231 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1232 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1233 blockcount); 1234 1235 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1236 KM_NOFS); 1237 INIT_LIST_HEAD(&ri->ri_list); 1238 ri->ri_type = type; 1239 ri->ri_startblock = startblock; 1240 ri->ri_blockcount = blockcount; 1241 1242 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1243 } 1244 1245 /* 1246 * Increase the reference count of the blocks backing a file's extent. 1247 */ 1248 void 1249 xfs_refcount_increase_extent( 1250 struct xfs_trans *tp, 1251 struct xfs_bmbt_irec *PREV) 1252 { 1253 if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) 1254 return; 1255 1256 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1257 PREV->br_blockcount); 1258 } 1259 1260 /* 1261 * Decrease the reference count of the blocks backing a file's extent. 1262 */ 1263 void 1264 xfs_refcount_decrease_extent( 1265 struct xfs_trans *tp, 1266 struct xfs_bmbt_irec *PREV) 1267 { 1268 if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) 1269 return; 1270 1271 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1272 PREV->br_blockcount); 1273 } 1274 1275 /* 1276 * Given an AG extent, find the lowest-numbered run of shared blocks 1277 * within that range and return the range in fbno/flen. If 1278 * find_end_of_shared is set, return the longest contiguous extent of 1279 * shared blocks; if not, just return the first extent we find. If no 1280 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1281 * and 0, respectively. 1282 */ 1283 int 1284 xfs_refcount_find_shared( 1285 struct xfs_btree_cur *cur, 1286 xfs_agblock_t agbno, 1287 xfs_extlen_t aglen, 1288 xfs_agblock_t *fbno, 1289 xfs_extlen_t *flen, 1290 bool find_end_of_shared) 1291 { 1292 struct xfs_refcount_irec tmp; 1293 int i; 1294 int have; 1295 int error; 1296 1297 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.agno, 1298 agbno, aglen); 1299 1300 /* By default, skip the whole range */ 1301 *fbno = NULLAGBLOCK; 1302 *flen = 0; 1303 1304 /* Try to find a refcount extent that crosses the start */ 1305 error = xfs_refcount_lookup_le(cur, agbno, &have); 1306 if (error) 1307 goto out_error; 1308 if (!have) { 1309 /* No left extent, look at the next one */ 1310 error = xfs_btree_increment(cur, 0, &have); 1311 if (error) 1312 goto out_error; 1313 if (!have) 1314 goto done; 1315 } 1316 error = xfs_refcount_get_rec(cur, &tmp, &i); 1317 if (error) 1318 goto out_error; 1319 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1320 error = -EFSCORRUPTED; 1321 goto out_error; 1322 } 1323 1324 /* If the extent ends before the start, look at the next one */ 1325 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1326 error = xfs_btree_increment(cur, 0, &have); 1327 if (error) 1328 goto out_error; 1329 if (!have) 1330 goto done; 1331 error = xfs_refcount_get_rec(cur, &tmp, &i); 1332 if (error) 1333 goto out_error; 1334 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1335 error = -EFSCORRUPTED; 1336 goto out_error; 1337 } 1338 } 1339 1340 /* If the extent starts after the range we want, bail out */ 1341 if (tmp.rc_startblock >= agbno + aglen) 1342 goto done; 1343 1344 /* We found the start of a shared extent! */ 1345 if (tmp.rc_startblock < agbno) { 1346 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1347 tmp.rc_startblock = agbno; 1348 } 1349 1350 *fbno = tmp.rc_startblock; 1351 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1352 if (!find_end_of_shared) 1353 goto done; 1354 1355 /* Otherwise, find the end of this shared extent */ 1356 while (*fbno + *flen < agbno + aglen) { 1357 error = xfs_btree_increment(cur, 0, &have); 1358 if (error) 1359 goto out_error; 1360 if (!have) 1361 break; 1362 error = xfs_refcount_get_rec(cur, &tmp, &i); 1363 if (error) 1364 goto out_error; 1365 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1366 error = -EFSCORRUPTED; 1367 goto out_error; 1368 } 1369 if (tmp.rc_startblock >= agbno + aglen || 1370 tmp.rc_startblock != *fbno + *flen) 1371 break; 1372 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1373 } 1374 1375 done: 1376 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1377 cur->bc_ag.agno, *fbno, *flen); 1378 1379 out_error: 1380 if (error) 1381 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1382 cur->bc_ag.agno, error, _RET_IP_); 1383 return error; 1384 } 1385 1386 /* 1387 * Recovering CoW Blocks After a Crash 1388 * 1389 * Due to the way that the copy on write mechanism works, there's a window of 1390 * opportunity in which we can lose track of allocated blocks during a crash. 1391 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1392 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1393 * no longer in the free space btree but are not otherwise recorded anywhere 1394 * until the write completes and the blocks are mapped into the file. A crash 1395 * in between allocation and remapping results in the replacement blocks being 1396 * lost. This situation is exacerbated by the CoW extent size hint because 1397 * allocations can hang around for long time. 1398 * 1399 * However, there is a place where we can record these allocations before they 1400 * become mappings -- the reference count btree. The btree does not record 1401 * extents with refcount == 1, so we can record allocations with a refcount of 1402 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1403 * no conflict with shared block records. These mappings should be created 1404 * when we allocate blocks to the CoW fork and deleted when they're removed 1405 * from the CoW fork. 1406 * 1407 * Minor nit: records for in-progress CoW allocations and records for shared 1408 * extents must never be merged, to preserve the property that (except for CoW 1409 * allocations) there are no refcount btree entries with refcount == 1. The 1410 * only time this could potentially happen is when unsharing a block that's 1411 * adjacent to CoW allocations, so we must be careful to avoid this. 1412 * 1413 * At mount time we recover lost CoW allocations by searching the refcount 1414 * btree for these refcount == 1 mappings. These represent CoW allocations 1415 * that were in progress at the time the filesystem went down, so we can free 1416 * them to get the space back. 1417 * 1418 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1419 * several reasons -- first, EFIs pin the tail of the log and would have to be 1420 * periodically relogged to avoid filling up the log. Second, CoW completions 1421 * will have to file an EFD and create new EFIs for whatever remains in the 1422 * CoW fork; this partially takes care of (1) but extent-size reservations 1423 * will have to periodically relog even if there's no writeout in progress. 1424 * This can happen if the CoW extent size hint is set, which you really want. 1425 * Third, EFIs cannot currently be automatically relogged into newer 1426 * transactions to advance the log tail. Fourth, stuffing the log full of 1427 * EFIs places an upper bound on the number of CoW allocations that can be 1428 * held filesystem-wide at any given time. Recording them in the refcount 1429 * btree doesn't require us to maintain any state in memory and doesn't pin 1430 * the log. 1431 */ 1432 /* 1433 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1434 * in that they're not referenced anywhere else in the filesystem, so we 1435 * stash them in the refcount btree with a refcount of 1 until either file 1436 * remapping (or CoW cancellation) happens. 1437 */ 1438 STATIC int 1439 xfs_refcount_adjust_cow_extents( 1440 struct xfs_btree_cur *cur, 1441 xfs_agblock_t agbno, 1442 xfs_extlen_t aglen, 1443 enum xfs_refc_adjust_op adj) 1444 { 1445 struct xfs_refcount_irec ext, tmp; 1446 int error; 1447 int found_rec, found_tmp; 1448 1449 if (aglen == 0) 1450 return 0; 1451 1452 /* Find any overlapping refcount records */ 1453 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1454 if (error) 1455 goto out_error; 1456 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1457 if (error) 1458 goto out_error; 1459 if (!found_rec) { 1460 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1461 XFS_REFC_COW_START; 1462 ext.rc_blockcount = 0; 1463 ext.rc_refcount = 0; 1464 } 1465 1466 switch (adj) { 1467 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1468 /* Adding a CoW reservation, there should be nothing here. */ 1469 if (XFS_IS_CORRUPT(cur->bc_mp, 1470 agbno + aglen > ext.rc_startblock)) { 1471 error = -EFSCORRUPTED; 1472 goto out_error; 1473 } 1474 1475 tmp.rc_startblock = agbno; 1476 tmp.rc_blockcount = aglen; 1477 tmp.rc_refcount = 1; 1478 trace_xfs_refcount_modify_extent(cur->bc_mp, 1479 cur->bc_ag.agno, &tmp); 1480 1481 error = xfs_refcount_insert(cur, &tmp, 1482 &found_tmp); 1483 if (error) 1484 goto out_error; 1485 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1486 error = -EFSCORRUPTED; 1487 goto out_error; 1488 } 1489 break; 1490 case XFS_REFCOUNT_ADJUST_COW_FREE: 1491 /* Removing a CoW reservation, there should be one extent. */ 1492 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1493 error = -EFSCORRUPTED; 1494 goto out_error; 1495 } 1496 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1497 error = -EFSCORRUPTED; 1498 goto out_error; 1499 } 1500 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1501 error = -EFSCORRUPTED; 1502 goto out_error; 1503 } 1504 1505 ext.rc_refcount = 0; 1506 trace_xfs_refcount_modify_extent(cur->bc_mp, 1507 cur->bc_ag.agno, &ext); 1508 error = xfs_refcount_delete(cur, &found_rec); 1509 if (error) 1510 goto out_error; 1511 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1512 error = -EFSCORRUPTED; 1513 goto out_error; 1514 } 1515 break; 1516 default: 1517 ASSERT(0); 1518 } 1519 1520 return error; 1521 out_error: 1522 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1523 cur->bc_ag.agno, error, _RET_IP_); 1524 return error; 1525 } 1526 1527 /* 1528 * Add or remove refcount btree entries for CoW reservations. 1529 */ 1530 STATIC int 1531 xfs_refcount_adjust_cow( 1532 struct xfs_btree_cur *cur, 1533 xfs_agblock_t agbno, 1534 xfs_extlen_t aglen, 1535 enum xfs_refc_adjust_op adj) 1536 { 1537 bool shape_changed; 1538 int error; 1539 1540 agbno += XFS_REFC_COW_START; 1541 1542 /* 1543 * Ensure that no rcextents cross the boundary of the adjustment range. 1544 */ 1545 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1546 if (error) 1547 goto out_error; 1548 1549 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1550 if (error) 1551 goto out_error; 1552 1553 /* 1554 * Try to merge with the left or right extents of the range. 1555 */ 1556 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1557 XFS_FIND_RCEXT_COW, &shape_changed); 1558 if (error) 1559 goto out_error; 1560 1561 /* Now that we've taken care of the ends, adjust the middle extents */ 1562 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1563 if (error) 1564 goto out_error; 1565 1566 return 0; 1567 1568 out_error: 1569 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.agno, 1570 error, _RET_IP_); 1571 return error; 1572 } 1573 1574 /* 1575 * Record a CoW allocation in the refcount btree. 1576 */ 1577 STATIC int 1578 __xfs_refcount_cow_alloc( 1579 struct xfs_btree_cur *rcur, 1580 xfs_agblock_t agbno, 1581 xfs_extlen_t aglen) 1582 { 1583 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.agno, 1584 agbno, aglen); 1585 1586 /* Add refcount btree reservation */ 1587 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1588 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1589 } 1590 1591 /* 1592 * Remove a CoW allocation from the refcount btree. 1593 */ 1594 STATIC int 1595 __xfs_refcount_cow_free( 1596 struct xfs_btree_cur *rcur, 1597 xfs_agblock_t agbno, 1598 xfs_extlen_t aglen) 1599 { 1600 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.agno, 1601 agbno, aglen); 1602 1603 /* Remove refcount btree reservation */ 1604 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1605 XFS_REFCOUNT_ADJUST_COW_FREE); 1606 } 1607 1608 /* Record a CoW staging extent in the refcount btree. */ 1609 void 1610 xfs_refcount_alloc_cow_extent( 1611 struct xfs_trans *tp, 1612 xfs_fsblock_t fsb, 1613 xfs_extlen_t len) 1614 { 1615 struct xfs_mount *mp = tp->t_mountp; 1616 1617 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1618 return; 1619 1620 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1621 1622 /* Add rmap entry */ 1623 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1624 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1625 } 1626 1627 /* Forget a CoW staging event in the refcount btree. */ 1628 void 1629 xfs_refcount_free_cow_extent( 1630 struct xfs_trans *tp, 1631 xfs_fsblock_t fsb, 1632 xfs_extlen_t len) 1633 { 1634 struct xfs_mount *mp = tp->t_mountp; 1635 1636 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1637 return; 1638 1639 /* Remove rmap entry */ 1640 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1641 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1642 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1643 } 1644 1645 struct xfs_refcount_recovery { 1646 struct list_head rr_list; 1647 struct xfs_refcount_irec rr_rrec; 1648 }; 1649 1650 /* Stuff an extent on the recovery list. */ 1651 STATIC int 1652 xfs_refcount_recover_extent( 1653 struct xfs_btree_cur *cur, 1654 union xfs_btree_rec *rec, 1655 void *priv) 1656 { 1657 struct list_head *debris = priv; 1658 struct xfs_refcount_recovery *rr; 1659 1660 if (XFS_IS_CORRUPT(cur->bc_mp, 1661 be32_to_cpu(rec->refc.rc_refcount) != 1)) 1662 return -EFSCORRUPTED; 1663 1664 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); 1665 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1666 list_add_tail(&rr->rr_list, debris); 1667 1668 return 0; 1669 } 1670 1671 /* Find and remove leftover CoW reservations. */ 1672 int 1673 xfs_refcount_recover_cow_leftovers( 1674 struct xfs_mount *mp, 1675 xfs_agnumber_t agno) 1676 { 1677 struct xfs_trans *tp; 1678 struct xfs_btree_cur *cur; 1679 struct xfs_buf *agbp; 1680 struct xfs_refcount_recovery *rr, *n; 1681 struct list_head debris; 1682 union xfs_btree_irec low; 1683 union xfs_btree_irec high; 1684 xfs_fsblock_t fsb; 1685 xfs_agblock_t agbno; 1686 int error; 1687 1688 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1689 return -EOPNOTSUPP; 1690 1691 INIT_LIST_HEAD(&debris); 1692 1693 /* 1694 * In this first part, we use an empty transaction to gather up 1695 * all the leftover CoW extents so that we can subsequently 1696 * delete them. The empty transaction is used to avoid 1697 * a buffer lock deadlock if there happens to be a loop in the 1698 * refcountbt because we're allowed to re-grab a buffer that is 1699 * already attached to our transaction. When we're done 1700 * recording the CoW debris we cancel the (empty) transaction 1701 * and everything goes away cleanly. 1702 */ 1703 error = xfs_trans_alloc_empty(mp, &tp); 1704 if (error) 1705 return error; 1706 1707 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1708 if (error) 1709 goto out_trans; 1710 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); 1711 1712 /* Find all the leftover CoW staging extents. */ 1713 memset(&low, 0, sizeof(low)); 1714 memset(&high, 0, sizeof(high)); 1715 low.rc.rc_startblock = XFS_REFC_COW_START; 1716 high.rc.rc_startblock = -1U; 1717 error = xfs_btree_query_range(cur, &low, &high, 1718 xfs_refcount_recover_extent, &debris); 1719 xfs_btree_del_cursor(cur, error); 1720 xfs_trans_brelse(tp, agbp); 1721 xfs_trans_cancel(tp); 1722 if (error) 1723 goto out_free; 1724 1725 /* Now iterate the list to free the leftovers */ 1726 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1727 /* Set up transaction. */ 1728 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1729 if (error) 1730 goto out_free; 1731 1732 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1733 1734 /* Free the orphan record */ 1735 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1736 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1737 xfs_refcount_free_cow_extent(tp, fsb, 1738 rr->rr_rrec.rc_blockcount); 1739 1740 /* Free the block. */ 1741 xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1742 1743 error = xfs_trans_commit(tp); 1744 if (error) 1745 goto out_free; 1746 1747 list_del(&rr->rr_list); 1748 kmem_free(rr); 1749 } 1750 1751 return error; 1752 out_trans: 1753 xfs_trans_cancel(tp); 1754 out_free: 1755 /* Free the leftover list */ 1756 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1757 list_del(&rr->rr_list); 1758 kmem_free(rr); 1759 } 1760 return error; 1761 } 1762 1763 /* Is there a record covering a given extent? */ 1764 int 1765 xfs_refcount_has_record( 1766 struct xfs_btree_cur *cur, 1767 xfs_agblock_t bno, 1768 xfs_extlen_t len, 1769 bool *exists) 1770 { 1771 union xfs_btree_irec low; 1772 union xfs_btree_irec high; 1773 1774 memset(&low, 0, sizeof(low)); 1775 low.rc.rc_startblock = bno; 1776 memset(&high, 0xFF, sizeof(high)); 1777 high.rc.rc_startblock = bno + len - 1; 1778 1779 return xfs_btree_has_record(cur, &low, &high, exists); 1780 } 1781