1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 26 /* Allowable refcount adjustment amounts. */ 27 enum xfs_refc_adjust_op { 28 XFS_REFCOUNT_ADJUST_INCREASE = 1, 29 XFS_REFCOUNT_ADJUST_DECREASE = -1, 30 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 31 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 32 }; 33 34 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 35 xfs_agblock_t agbno, xfs_extlen_t aglen); 36 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 37 xfs_agblock_t agbno, xfs_extlen_t aglen); 38 39 /* 40 * Look up the first record less than or equal to [bno, len] in the btree 41 * given by cur. 42 */ 43 int 44 xfs_refcount_lookup_le( 45 struct xfs_btree_cur *cur, 46 xfs_agblock_t bno, 47 int *stat) 48 { 49 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 50 XFS_LOOKUP_LE); 51 cur->bc_rec.rc.rc_startblock = bno; 52 cur->bc_rec.rc.rc_blockcount = 0; 53 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 54 } 55 56 /* 57 * Look up the first record greater than or equal to [bno, len] in the btree 58 * given by cur. 59 */ 60 int 61 xfs_refcount_lookup_ge( 62 struct xfs_btree_cur *cur, 63 xfs_agblock_t bno, 64 int *stat) 65 { 66 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 67 XFS_LOOKUP_GE); 68 cur->bc_rec.rc.rc_startblock = bno; 69 cur->bc_rec.rc.rc_blockcount = 0; 70 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 71 } 72 73 /* 74 * Look up the first record equal to [bno, len] in the btree 75 * given by cur. 76 */ 77 int 78 xfs_refcount_lookup_eq( 79 struct xfs_btree_cur *cur, 80 xfs_agblock_t bno, 81 int *stat) 82 { 83 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 84 XFS_LOOKUP_LE); 85 cur->bc_rec.rc.rc_startblock = bno; 86 cur->bc_rec.rc.rc_blockcount = 0; 87 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 88 } 89 90 /* Convert on-disk record to in-core format. */ 91 void 92 xfs_refcount_btrec_to_irec( 93 union xfs_btree_rec *rec, 94 struct xfs_refcount_irec *irec) 95 { 96 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 97 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 98 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 99 } 100 101 /* 102 * Get the data from the pointed-to record. 103 */ 104 int 105 xfs_refcount_get_rec( 106 struct xfs_btree_cur *cur, 107 struct xfs_refcount_irec *irec, 108 int *stat) 109 { 110 struct xfs_mount *mp = cur->bc_mp; 111 xfs_agnumber_t agno = cur->bc_private.a.agno; 112 union xfs_btree_rec *rec; 113 int error; 114 xfs_agblock_t realstart; 115 116 error = xfs_btree_get_rec(cur, &rec, stat); 117 if (error || !*stat) 118 return error; 119 120 xfs_refcount_btrec_to_irec(rec, irec); 121 122 agno = cur->bc_private.a.agno; 123 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 124 goto out_bad_rec; 125 126 /* handle special COW-staging state */ 127 realstart = irec->rc_startblock; 128 if (realstart & XFS_REFC_COW_START) { 129 if (irec->rc_refcount != 1) 130 goto out_bad_rec; 131 realstart &= ~XFS_REFC_COW_START; 132 } else if (irec->rc_refcount < 2) { 133 goto out_bad_rec; 134 } 135 136 /* check for valid extent range, including overflow */ 137 if (!xfs_verify_agbno(mp, agno, realstart)) 138 goto out_bad_rec; 139 if (realstart > realstart + irec->rc_blockcount) 140 goto out_bad_rec; 141 if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) 142 goto out_bad_rec; 143 144 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 145 goto out_bad_rec; 146 147 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec); 148 return 0; 149 150 out_bad_rec: 151 xfs_warn(mp, 152 "Refcount BTree record corruption in AG %d detected!", agno); 153 xfs_warn(mp, 154 "Start block 0x%x, block count 0x%x, references 0x%x", 155 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 156 return -EFSCORRUPTED; 157 } 158 159 /* 160 * Update the record referred to by cur to the value given 161 * by [bno, len, refcount]. 162 * This either works (return 0) or gets an EFSCORRUPTED error. 163 */ 164 STATIC int 165 xfs_refcount_update( 166 struct xfs_btree_cur *cur, 167 struct xfs_refcount_irec *irec) 168 { 169 union xfs_btree_rec rec; 170 int error; 171 172 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); 173 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 174 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 175 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 176 error = xfs_btree_update(cur, &rec); 177 if (error) 178 trace_xfs_refcount_update_error(cur->bc_mp, 179 cur->bc_private.a.agno, error, _RET_IP_); 180 return error; 181 } 182 183 /* 184 * Insert the record referred to by cur to the value given 185 * by [bno, len, refcount]. 186 * This either works (return 0) or gets an EFSCORRUPTED error. 187 */ 188 int 189 xfs_refcount_insert( 190 struct xfs_btree_cur *cur, 191 struct xfs_refcount_irec *irec, 192 int *i) 193 { 194 int error; 195 196 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); 197 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 198 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 199 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 200 error = xfs_btree_insert(cur, i); 201 if (error) 202 goto out_error; 203 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 204 error = -EFSCORRUPTED; 205 goto out_error; 206 } 207 208 out_error: 209 if (error) 210 trace_xfs_refcount_insert_error(cur->bc_mp, 211 cur->bc_private.a.agno, error, _RET_IP_); 212 return error; 213 } 214 215 /* 216 * Remove the record referred to by cur, then set the pointer to the spot 217 * where the record could be re-inserted, in case we want to increment or 218 * decrement the cursor. 219 * This either works (return 0) or gets an EFSCORRUPTED error. 220 */ 221 STATIC int 222 xfs_refcount_delete( 223 struct xfs_btree_cur *cur, 224 int *i) 225 { 226 struct xfs_refcount_irec irec; 227 int found_rec; 228 int error; 229 230 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 231 if (error) 232 goto out_error; 233 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 234 error = -EFSCORRUPTED; 235 goto out_error; 236 } 237 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); 238 error = xfs_btree_delete(cur, i); 239 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 240 error = -EFSCORRUPTED; 241 goto out_error; 242 } 243 if (error) 244 goto out_error; 245 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 246 out_error: 247 if (error) 248 trace_xfs_refcount_delete_error(cur->bc_mp, 249 cur->bc_private.a.agno, error, _RET_IP_); 250 return error; 251 } 252 253 /* 254 * Adjusting the Reference Count 255 * 256 * As stated elsewhere, the reference count btree (refcbt) stores 257 * >1 reference counts for extents of physical blocks. In this 258 * operation, we're either raising or lowering the reference count of 259 * some subrange stored in the tree: 260 * 261 * <------ adjustment range ------> 262 * ----+ +---+-----+ +--+--------+--------- 263 * 2 | | 3 | 4 | |17| 55 | 10 264 * ----+ +---+-----+ +--+--------+--------- 265 * X axis is physical blocks number; 266 * reference counts are the numbers inside the rectangles 267 * 268 * The first thing we need to do is to ensure that there are no 269 * refcount extents crossing either boundary of the range to be 270 * adjusted. For any extent that does cross a boundary, split it into 271 * two extents so that we can increment the refcount of one of the 272 * pieces later: 273 * 274 * <------ adjustment range ------> 275 * ----+ +---+-----+ +--+--------+----+---- 276 * 2 | | 3 | 2 | |17| 55 | 10 | 10 277 * ----+ +---+-----+ +--+--------+----+---- 278 * 279 * For this next step, let's assume that all the physical blocks in 280 * the adjustment range are mapped to a file and are therefore in use 281 * at least once. Therefore, we can infer that any gap in the 282 * refcount tree within the adjustment range represents a physical 283 * extent with refcount == 1: 284 * 285 * <------ adjustment range ------> 286 * ----+---+---+-----+-+--+--------+----+---- 287 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 288 * ----+---+---+-----+-+--+--------+----+---- 289 * ^ 290 * 291 * For each extent that falls within the interval range, figure out 292 * which extent is to the left or the right of that extent. Now we 293 * have a left, current, and right extent. If the new reference count 294 * of the center extent enables us to merge left, center, and right 295 * into one record covering all three, do so. If the center extent is 296 * at the left end of the range, abuts the left extent, and its new 297 * reference count matches the left extent's record, then merge them. 298 * If the center extent is at the right end of the range, abuts the 299 * right extent, and the reference counts match, merge those. In the 300 * example, we can left merge (assuming an increment operation): 301 * 302 * <------ adjustment range ------> 303 * --------+---+-----+-+--+--------+----+---- 304 * 2 | 3 | 2 |1|17| 55 | 10 | 10 305 * --------+---+-----+-+--+--------+----+---- 306 * ^ 307 * 308 * For all other extents within the range, adjust the reference count 309 * or delete it if the refcount falls below 2. If we were 310 * incrementing, the end result looks like this: 311 * 312 * <------ adjustment range ------> 313 * --------+---+-----+-+--+--------+----+---- 314 * 2 | 4 | 3 |2|18| 56 | 11 | 10 315 * --------+---+-----+-+--+--------+----+---- 316 * 317 * The result of a decrement operation looks as such: 318 * 319 * <------ adjustment range ------> 320 * ----+ +---+ +--+--------+----+---- 321 * 2 | | 2 | |16| 54 | 9 | 10 322 * ----+ +---+ +--+--------+----+---- 323 * DDDD 111111DD 324 * 325 * The blocks marked "D" are freed; the blocks marked "1" are only 326 * referenced once and therefore the record is removed from the 327 * refcount btree. 328 */ 329 330 /* Next block after this extent. */ 331 static inline xfs_agblock_t 332 xfs_refc_next( 333 struct xfs_refcount_irec *rc) 334 { 335 return rc->rc_startblock + rc->rc_blockcount; 336 } 337 338 /* 339 * Split a refcount extent that crosses agbno. 340 */ 341 STATIC int 342 xfs_refcount_split_extent( 343 struct xfs_btree_cur *cur, 344 xfs_agblock_t agbno, 345 bool *shape_changed) 346 { 347 struct xfs_refcount_irec rcext, tmp; 348 int found_rec; 349 int error; 350 351 *shape_changed = false; 352 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 353 if (error) 354 goto out_error; 355 if (!found_rec) 356 return 0; 357 358 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 359 if (error) 360 goto out_error; 361 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 362 error = -EFSCORRUPTED; 363 goto out_error; 364 } 365 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 366 return 0; 367 368 *shape_changed = true; 369 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, 370 &rcext, agbno); 371 372 /* Establish the right extent. */ 373 tmp = rcext; 374 tmp.rc_startblock = agbno; 375 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 376 error = xfs_refcount_update(cur, &tmp); 377 if (error) 378 goto out_error; 379 380 /* Insert the left extent. */ 381 tmp = rcext; 382 tmp.rc_blockcount = agbno - rcext.rc_startblock; 383 error = xfs_refcount_insert(cur, &tmp, &found_rec); 384 if (error) 385 goto out_error; 386 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 387 error = -EFSCORRUPTED; 388 goto out_error; 389 } 390 return error; 391 392 out_error: 393 trace_xfs_refcount_split_extent_error(cur->bc_mp, 394 cur->bc_private.a.agno, error, _RET_IP_); 395 return error; 396 } 397 398 /* 399 * Merge the left, center, and right extents. 400 */ 401 STATIC int 402 xfs_refcount_merge_center_extents( 403 struct xfs_btree_cur *cur, 404 struct xfs_refcount_irec *left, 405 struct xfs_refcount_irec *center, 406 struct xfs_refcount_irec *right, 407 unsigned long long extlen, 408 xfs_extlen_t *aglen) 409 { 410 int error; 411 int found_rec; 412 413 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 414 cur->bc_private.a.agno, left, center, right); 415 416 /* 417 * Make sure the center and right extents are not in the btree. 418 * If the center extent was synthesized, the first delete call 419 * removes the right extent and we skip the second deletion. 420 * If center and right were in the btree, then the first delete 421 * call removes the center and the second one removes the right 422 * extent. 423 */ 424 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 425 &found_rec); 426 if (error) 427 goto out_error; 428 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 429 error = -EFSCORRUPTED; 430 goto out_error; 431 } 432 433 error = xfs_refcount_delete(cur, &found_rec); 434 if (error) 435 goto out_error; 436 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 437 error = -EFSCORRUPTED; 438 goto out_error; 439 } 440 441 if (center->rc_refcount > 1) { 442 error = xfs_refcount_delete(cur, &found_rec); 443 if (error) 444 goto out_error; 445 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 446 error = -EFSCORRUPTED; 447 goto out_error; 448 } 449 } 450 451 /* Enlarge the left extent. */ 452 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 453 &found_rec); 454 if (error) 455 goto out_error; 456 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 457 error = -EFSCORRUPTED; 458 goto out_error; 459 } 460 461 left->rc_blockcount = extlen; 462 error = xfs_refcount_update(cur, left); 463 if (error) 464 goto out_error; 465 466 *aglen = 0; 467 return error; 468 469 out_error: 470 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 471 cur->bc_private.a.agno, error, _RET_IP_); 472 return error; 473 } 474 475 /* 476 * Merge with the left extent. 477 */ 478 STATIC int 479 xfs_refcount_merge_left_extent( 480 struct xfs_btree_cur *cur, 481 struct xfs_refcount_irec *left, 482 struct xfs_refcount_irec *cleft, 483 xfs_agblock_t *agbno, 484 xfs_extlen_t *aglen) 485 { 486 int error; 487 int found_rec; 488 489 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 490 cur->bc_private.a.agno, left, cleft); 491 492 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 493 if (cleft->rc_refcount > 1) { 494 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 495 &found_rec); 496 if (error) 497 goto out_error; 498 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 499 error = -EFSCORRUPTED; 500 goto out_error; 501 } 502 503 error = xfs_refcount_delete(cur, &found_rec); 504 if (error) 505 goto out_error; 506 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 507 error = -EFSCORRUPTED; 508 goto out_error; 509 } 510 } 511 512 /* Enlarge the left extent. */ 513 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 514 &found_rec); 515 if (error) 516 goto out_error; 517 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 518 error = -EFSCORRUPTED; 519 goto out_error; 520 } 521 522 left->rc_blockcount += cleft->rc_blockcount; 523 error = xfs_refcount_update(cur, left); 524 if (error) 525 goto out_error; 526 527 *agbno += cleft->rc_blockcount; 528 *aglen -= cleft->rc_blockcount; 529 return error; 530 531 out_error: 532 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 533 cur->bc_private.a.agno, error, _RET_IP_); 534 return error; 535 } 536 537 /* 538 * Merge with the right extent. 539 */ 540 STATIC int 541 xfs_refcount_merge_right_extent( 542 struct xfs_btree_cur *cur, 543 struct xfs_refcount_irec *right, 544 struct xfs_refcount_irec *cright, 545 xfs_extlen_t *aglen) 546 { 547 int error; 548 int found_rec; 549 550 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 551 cur->bc_private.a.agno, cright, right); 552 553 /* 554 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 555 * remove it. 556 */ 557 if (cright->rc_refcount > 1) { 558 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 559 &found_rec); 560 if (error) 561 goto out_error; 562 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 563 error = -EFSCORRUPTED; 564 goto out_error; 565 } 566 567 error = xfs_refcount_delete(cur, &found_rec); 568 if (error) 569 goto out_error; 570 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 571 error = -EFSCORRUPTED; 572 goto out_error; 573 } 574 } 575 576 /* Enlarge the right extent. */ 577 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 578 &found_rec); 579 if (error) 580 goto out_error; 581 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 582 error = -EFSCORRUPTED; 583 goto out_error; 584 } 585 586 right->rc_startblock -= cright->rc_blockcount; 587 right->rc_blockcount += cright->rc_blockcount; 588 error = xfs_refcount_update(cur, right); 589 if (error) 590 goto out_error; 591 592 *aglen -= cright->rc_blockcount; 593 return error; 594 595 out_error: 596 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 597 cur->bc_private.a.agno, error, _RET_IP_); 598 return error; 599 } 600 601 #define XFS_FIND_RCEXT_SHARED 1 602 #define XFS_FIND_RCEXT_COW 2 603 /* 604 * Find the left extent and the one after it (cleft). This function assumes 605 * that we've already split any extent crossing agbno. 606 */ 607 STATIC int 608 xfs_refcount_find_left_extents( 609 struct xfs_btree_cur *cur, 610 struct xfs_refcount_irec *left, 611 struct xfs_refcount_irec *cleft, 612 xfs_agblock_t agbno, 613 xfs_extlen_t aglen, 614 int flags) 615 { 616 struct xfs_refcount_irec tmp; 617 int error; 618 int found_rec; 619 620 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 621 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 622 if (error) 623 goto out_error; 624 if (!found_rec) 625 return 0; 626 627 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 628 if (error) 629 goto out_error; 630 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 631 error = -EFSCORRUPTED; 632 goto out_error; 633 } 634 635 if (xfs_refc_next(&tmp) != agbno) 636 return 0; 637 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 638 return 0; 639 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 640 return 0; 641 /* We have a left extent; retrieve (or invent) the next right one */ 642 *left = tmp; 643 644 error = xfs_btree_increment(cur, 0, &found_rec); 645 if (error) 646 goto out_error; 647 if (found_rec) { 648 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 649 if (error) 650 goto out_error; 651 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 652 error = -EFSCORRUPTED; 653 goto out_error; 654 } 655 656 /* if tmp starts at the end of our range, just use that */ 657 if (tmp.rc_startblock == agbno) 658 *cleft = tmp; 659 else { 660 /* 661 * There's a gap in the refcntbt at the start of the 662 * range we're interested in (refcount == 1) so 663 * synthesize the implied extent and pass it back. 664 * We assume here that the agbno/aglen range was 665 * passed in from a data fork extent mapping and 666 * therefore is allocated to exactly one owner. 667 */ 668 cleft->rc_startblock = agbno; 669 cleft->rc_blockcount = min(aglen, 670 tmp.rc_startblock - agbno); 671 cleft->rc_refcount = 1; 672 } 673 } else { 674 /* 675 * No extents, so pretend that there's one covering the whole 676 * range. 677 */ 678 cleft->rc_startblock = agbno; 679 cleft->rc_blockcount = aglen; 680 cleft->rc_refcount = 1; 681 } 682 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, 683 left, cleft, agbno); 684 return error; 685 686 out_error: 687 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 688 cur->bc_private.a.agno, error, _RET_IP_); 689 return error; 690 } 691 692 /* 693 * Find the right extent and the one before it (cright). This function 694 * assumes that we've already split any extents crossing agbno + aglen. 695 */ 696 STATIC int 697 xfs_refcount_find_right_extents( 698 struct xfs_btree_cur *cur, 699 struct xfs_refcount_irec *right, 700 struct xfs_refcount_irec *cright, 701 xfs_agblock_t agbno, 702 xfs_extlen_t aglen, 703 int flags) 704 { 705 struct xfs_refcount_irec tmp; 706 int error; 707 int found_rec; 708 709 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 710 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 711 if (error) 712 goto out_error; 713 if (!found_rec) 714 return 0; 715 716 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 717 if (error) 718 goto out_error; 719 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 720 error = -EFSCORRUPTED; 721 goto out_error; 722 } 723 724 if (tmp.rc_startblock != agbno + aglen) 725 return 0; 726 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 727 return 0; 728 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 729 return 0; 730 /* We have a right extent; retrieve (or invent) the next left one */ 731 *right = tmp; 732 733 error = xfs_btree_decrement(cur, 0, &found_rec); 734 if (error) 735 goto out_error; 736 if (found_rec) { 737 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 738 if (error) 739 goto out_error; 740 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 741 error = -EFSCORRUPTED; 742 goto out_error; 743 } 744 745 /* if tmp ends at the end of our range, just use that */ 746 if (xfs_refc_next(&tmp) == agbno + aglen) 747 *cright = tmp; 748 else { 749 /* 750 * There's a gap in the refcntbt at the end of the 751 * range we're interested in (refcount == 1) so 752 * create the implied extent and pass it back. 753 * We assume here that the agbno/aglen range was 754 * passed in from a data fork extent mapping and 755 * therefore is allocated to exactly one owner. 756 */ 757 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 758 cright->rc_blockcount = right->rc_startblock - 759 cright->rc_startblock; 760 cright->rc_refcount = 1; 761 } 762 } else { 763 /* 764 * No extents, so pretend that there's one covering the whole 765 * range. 766 */ 767 cright->rc_startblock = agbno; 768 cright->rc_blockcount = aglen; 769 cright->rc_refcount = 1; 770 } 771 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, 772 cright, right, agbno + aglen); 773 return error; 774 775 out_error: 776 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 777 cur->bc_private.a.agno, error, _RET_IP_); 778 return error; 779 } 780 781 /* Is this extent valid? */ 782 static inline bool 783 xfs_refc_valid( 784 struct xfs_refcount_irec *rc) 785 { 786 return rc->rc_startblock != NULLAGBLOCK; 787 } 788 789 /* 790 * Try to merge with any extents on the boundaries of the adjustment range. 791 */ 792 STATIC int 793 xfs_refcount_merge_extents( 794 struct xfs_btree_cur *cur, 795 xfs_agblock_t *agbno, 796 xfs_extlen_t *aglen, 797 enum xfs_refc_adjust_op adjust, 798 int flags, 799 bool *shape_changed) 800 { 801 struct xfs_refcount_irec left = {0}, cleft = {0}; 802 struct xfs_refcount_irec cright = {0}, right = {0}; 803 int error; 804 unsigned long long ulen; 805 bool cequal; 806 807 *shape_changed = false; 808 /* 809 * Find the extent just below agbno [left], just above agbno [cleft], 810 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 811 * [right]. 812 */ 813 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 814 *aglen, flags); 815 if (error) 816 return error; 817 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 818 *aglen, flags); 819 if (error) 820 return error; 821 822 /* No left or right extent to merge; exit. */ 823 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 824 return 0; 825 826 cequal = (cleft.rc_startblock == cright.rc_startblock) && 827 (cleft.rc_blockcount == cright.rc_blockcount); 828 829 /* Try to merge left, cleft, and right. cleft must == cright. */ 830 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 831 right.rc_blockcount; 832 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 833 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 834 left.rc_refcount == cleft.rc_refcount + adjust && 835 right.rc_refcount == cleft.rc_refcount + adjust && 836 ulen < MAXREFCEXTLEN) { 837 *shape_changed = true; 838 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 839 &right, ulen, aglen); 840 } 841 842 /* Try to merge left and cleft. */ 843 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 844 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 845 left.rc_refcount == cleft.rc_refcount + adjust && 846 ulen < MAXREFCEXTLEN) { 847 *shape_changed = true; 848 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 849 agbno, aglen); 850 if (error) 851 return error; 852 853 /* 854 * If we just merged left + cleft and cleft == cright, 855 * we no longer have a cright to merge with right. We're done. 856 */ 857 if (cequal) 858 return 0; 859 } 860 861 /* Try to merge cright and right. */ 862 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 863 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 864 right.rc_refcount == cright.rc_refcount + adjust && 865 ulen < MAXREFCEXTLEN) { 866 *shape_changed = true; 867 return xfs_refcount_merge_right_extent(cur, &right, &cright, 868 aglen); 869 } 870 871 return error; 872 } 873 874 /* 875 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 876 * true incorrectly is a shutdown FS; the penalty for guessing false 877 * incorrectly is more transaction rolls than might be necessary. 878 * Be conservative here. 879 */ 880 static bool 881 xfs_refcount_still_have_space( 882 struct xfs_btree_cur *cur) 883 { 884 unsigned long overhead; 885 886 overhead = cur->bc_private.a.priv.refc.shape_changes * 887 xfs_allocfree_log_count(cur->bc_mp, 1); 888 overhead *= cur->bc_mp->m_sb.sb_blocksize; 889 890 /* 891 * Only allow 2 refcount extent updates per transaction if the 892 * refcount continue update "error" has been injected. 893 */ 894 if (cur->bc_private.a.priv.refc.nr_ops > 2 && 895 XFS_TEST_ERROR(false, cur->bc_mp, 896 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 897 return false; 898 899 if (cur->bc_private.a.priv.refc.nr_ops == 0) 900 return true; 901 else if (overhead > cur->bc_tp->t_log_res) 902 return false; 903 return cur->bc_tp->t_log_res - overhead > 904 cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 905 } 906 907 /* 908 * Adjust the refcounts of middle extents. At this point we should have 909 * split extents that crossed the adjustment range; merged with adjacent 910 * extents; and updated agbno/aglen to reflect the merges. Therefore, 911 * all we have to do is update the extents inside [agbno, agbno + aglen]. 912 */ 913 STATIC int 914 xfs_refcount_adjust_extents( 915 struct xfs_btree_cur *cur, 916 xfs_agblock_t *agbno, 917 xfs_extlen_t *aglen, 918 enum xfs_refc_adjust_op adj, 919 struct xfs_owner_info *oinfo) 920 { 921 struct xfs_refcount_irec ext, tmp; 922 int error; 923 int found_rec, found_tmp; 924 xfs_fsblock_t fsbno; 925 926 /* Merging did all the work already. */ 927 if (*aglen == 0) 928 return 0; 929 930 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 931 if (error) 932 goto out_error; 933 934 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 935 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 936 if (error) 937 goto out_error; 938 if (!found_rec) { 939 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 940 ext.rc_blockcount = 0; 941 ext.rc_refcount = 0; 942 } 943 944 /* 945 * Deal with a hole in the refcount tree; if a file maps to 946 * these blocks and there's no refcountbt record, pretend that 947 * there is one with refcount == 1. 948 */ 949 if (ext.rc_startblock != *agbno) { 950 tmp.rc_startblock = *agbno; 951 tmp.rc_blockcount = min(*aglen, 952 ext.rc_startblock - *agbno); 953 tmp.rc_refcount = 1 + adj; 954 trace_xfs_refcount_modify_extent(cur->bc_mp, 955 cur->bc_private.a.agno, &tmp); 956 957 /* 958 * Either cover the hole (increment) or 959 * delete the range (decrement). 960 */ 961 if (tmp.rc_refcount) { 962 error = xfs_refcount_insert(cur, &tmp, 963 &found_tmp); 964 if (error) 965 goto out_error; 966 if (XFS_IS_CORRUPT(cur->bc_mp, 967 found_tmp != 1)) { 968 error = -EFSCORRUPTED; 969 goto out_error; 970 } 971 cur->bc_private.a.priv.refc.nr_ops++; 972 } else { 973 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 974 cur->bc_private.a.agno, 975 tmp.rc_startblock); 976 xfs_bmap_add_free(cur->bc_tp, fsbno, 977 tmp.rc_blockcount, oinfo); 978 } 979 980 (*agbno) += tmp.rc_blockcount; 981 (*aglen) -= tmp.rc_blockcount; 982 983 error = xfs_refcount_lookup_ge(cur, *agbno, 984 &found_rec); 985 if (error) 986 goto out_error; 987 } 988 989 /* Stop if there's nothing left to modify */ 990 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 991 break; 992 993 /* 994 * Adjust the reference count and either update the tree 995 * (incr) or free the blocks (decr). 996 */ 997 if (ext.rc_refcount == MAXREFCOUNT) 998 goto skip; 999 ext.rc_refcount += adj; 1000 trace_xfs_refcount_modify_extent(cur->bc_mp, 1001 cur->bc_private.a.agno, &ext); 1002 if (ext.rc_refcount > 1) { 1003 error = xfs_refcount_update(cur, &ext); 1004 if (error) 1005 goto out_error; 1006 cur->bc_private.a.priv.refc.nr_ops++; 1007 } else if (ext.rc_refcount == 1) { 1008 error = xfs_refcount_delete(cur, &found_rec); 1009 if (error) 1010 goto out_error; 1011 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1012 error = -EFSCORRUPTED; 1013 goto out_error; 1014 } 1015 cur->bc_private.a.priv.refc.nr_ops++; 1016 goto advloop; 1017 } else { 1018 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1019 cur->bc_private.a.agno, 1020 ext.rc_startblock); 1021 xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, 1022 oinfo); 1023 } 1024 1025 skip: 1026 error = xfs_btree_increment(cur, 0, &found_rec); 1027 if (error) 1028 goto out_error; 1029 1030 advloop: 1031 (*agbno) += ext.rc_blockcount; 1032 (*aglen) -= ext.rc_blockcount; 1033 } 1034 1035 return error; 1036 out_error: 1037 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1038 cur->bc_private.a.agno, error, _RET_IP_); 1039 return error; 1040 } 1041 1042 /* Adjust the reference count of a range of AG blocks. */ 1043 STATIC int 1044 xfs_refcount_adjust( 1045 struct xfs_btree_cur *cur, 1046 xfs_agblock_t agbno, 1047 xfs_extlen_t aglen, 1048 xfs_agblock_t *new_agbno, 1049 xfs_extlen_t *new_aglen, 1050 enum xfs_refc_adjust_op adj, 1051 struct xfs_owner_info *oinfo) 1052 { 1053 bool shape_changed; 1054 int shape_changes = 0; 1055 int error; 1056 1057 *new_agbno = agbno; 1058 *new_aglen = aglen; 1059 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1060 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, 1061 agbno, aglen); 1062 else 1063 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, 1064 agbno, aglen); 1065 1066 /* 1067 * Ensure that no rcextents cross the boundary of the adjustment range. 1068 */ 1069 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1070 if (error) 1071 goto out_error; 1072 if (shape_changed) 1073 shape_changes++; 1074 1075 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1076 if (error) 1077 goto out_error; 1078 if (shape_changed) 1079 shape_changes++; 1080 1081 /* 1082 * Try to merge with the left or right extents of the range. 1083 */ 1084 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1085 XFS_FIND_RCEXT_SHARED, &shape_changed); 1086 if (error) 1087 goto out_error; 1088 if (shape_changed) 1089 shape_changes++; 1090 if (shape_changes) 1091 cur->bc_private.a.priv.refc.shape_changes++; 1092 1093 /* Now that we've taken care of the ends, adjust the middle extents */ 1094 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1095 adj, oinfo); 1096 if (error) 1097 goto out_error; 1098 1099 return 0; 1100 1101 out_error: 1102 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, 1103 error, _RET_IP_); 1104 return error; 1105 } 1106 1107 /* Clean up after calling xfs_refcount_finish_one. */ 1108 void 1109 xfs_refcount_finish_one_cleanup( 1110 struct xfs_trans *tp, 1111 struct xfs_btree_cur *rcur, 1112 int error) 1113 { 1114 struct xfs_buf *agbp; 1115 1116 if (rcur == NULL) 1117 return; 1118 agbp = rcur->bc_private.a.agbp; 1119 xfs_btree_del_cursor(rcur, error); 1120 if (error) 1121 xfs_trans_brelse(tp, agbp); 1122 } 1123 1124 /* 1125 * Process one of the deferred refcount operations. We pass back the 1126 * btree cursor to maintain our lock on the btree between calls. 1127 * This saves time and eliminates a buffer deadlock between the 1128 * superblock and the AGF because we'll always grab them in the same 1129 * order. 1130 */ 1131 int 1132 xfs_refcount_finish_one( 1133 struct xfs_trans *tp, 1134 enum xfs_refcount_intent_type type, 1135 xfs_fsblock_t startblock, 1136 xfs_extlen_t blockcount, 1137 xfs_fsblock_t *new_fsb, 1138 xfs_extlen_t *new_len, 1139 struct xfs_btree_cur **pcur) 1140 { 1141 struct xfs_mount *mp = tp->t_mountp; 1142 struct xfs_btree_cur *rcur; 1143 struct xfs_buf *agbp = NULL; 1144 int error = 0; 1145 xfs_agnumber_t agno; 1146 xfs_agblock_t bno; 1147 xfs_agblock_t new_agbno; 1148 unsigned long nr_ops = 0; 1149 int shape_changes = 0; 1150 1151 agno = XFS_FSB_TO_AGNO(mp, startblock); 1152 ASSERT(agno != NULLAGNUMBER); 1153 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1154 1155 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1156 type, XFS_FSB_TO_AGBNO(mp, startblock), 1157 blockcount); 1158 1159 if (XFS_TEST_ERROR(false, mp, 1160 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1161 return -EIO; 1162 1163 /* 1164 * If we haven't gotten a cursor or the cursor AG doesn't match 1165 * the startblock, get one now. 1166 */ 1167 rcur = *pcur; 1168 if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1169 nr_ops = rcur->bc_private.a.priv.refc.nr_ops; 1170 shape_changes = rcur->bc_private.a.priv.refc.shape_changes; 1171 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1172 rcur = NULL; 1173 *pcur = NULL; 1174 } 1175 if (rcur == NULL) { 1176 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1177 XFS_ALLOC_FLAG_FREEING, &agbp); 1178 if (error) 1179 return error; 1180 if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) 1181 return -EFSCORRUPTED; 1182 1183 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); 1184 if (!rcur) { 1185 error = -ENOMEM; 1186 goto out_cur; 1187 } 1188 rcur->bc_private.a.priv.refc.nr_ops = nr_ops; 1189 rcur->bc_private.a.priv.refc.shape_changes = shape_changes; 1190 } 1191 *pcur = rcur; 1192 1193 switch (type) { 1194 case XFS_REFCOUNT_INCREASE: 1195 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1196 new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); 1197 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1198 break; 1199 case XFS_REFCOUNT_DECREASE: 1200 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1201 new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); 1202 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1203 break; 1204 case XFS_REFCOUNT_ALLOC_COW: 1205 *new_fsb = startblock + blockcount; 1206 *new_len = 0; 1207 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1208 break; 1209 case XFS_REFCOUNT_FREE_COW: 1210 *new_fsb = startblock + blockcount; 1211 *new_len = 0; 1212 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1213 break; 1214 default: 1215 ASSERT(0); 1216 error = -EFSCORRUPTED; 1217 } 1218 if (!error && *new_len > 0) 1219 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1220 bno, blockcount, new_agbno, *new_len); 1221 return error; 1222 1223 out_cur: 1224 xfs_trans_brelse(tp, agbp); 1225 1226 return error; 1227 } 1228 1229 /* 1230 * Record a refcount intent for later processing. 1231 */ 1232 static void 1233 __xfs_refcount_add( 1234 struct xfs_trans *tp, 1235 enum xfs_refcount_intent_type type, 1236 xfs_fsblock_t startblock, 1237 xfs_extlen_t blockcount) 1238 { 1239 struct xfs_refcount_intent *ri; 1240 1241 trace_xfs_refcount_defer(tp->t_mountp, 1242 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1243 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1244 blockcount); 1245 1246 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1247 KM_NOFS); 1248 INIT_LIST_HEAD(&ri->ri_list); 1249 ri->ri_type = type; 1250 ri->ri_startblock = startblock; 1251 ri->ri_blockcount = blockcount; 1252 1253 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1254 } 1255 1256 /* 1257 * Increase the reference count of the blocks backing a file's extent. 1258 */ 1259 void 1260 xfs_refcount_increase_extent( 1261 struct xfs_trans *tp, 1262 struct xfs_bmbt_irec *PREV) 1263 { 1264 if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) 1265 return; 1266 1267 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1268 PREV->br_blockcount); 1269 } 1270 1271 /* 1272 * Decrease the reference count of the blocks backing a file's extent. 1273 */ 1274 void 1275 xfs_refcount_decrease_extent( 1276 struct xfs_trans *tp, 1277 struct xfs_bmbt_irec *PREV) 1278 { 1279 if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) 1280 return; 1281 1282 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1283 PREV->br_blockcount); 1284 } 1285 1286 /* 1287 * Given an AG extent, find the lowest-numbered run of shared blocks 1288 * within that range and return the range in fbno/flen. If 1289 * find_end_of_shared is set, return the longest contiguous extent of 1290 * shared blocks; if not, just return the first extent we find. If no 1291 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1292 * and 0, respectively. 1293 */ 1294 int 1295 xfs_refcount_find_shared( 1296 struct xfs_btree_cur *cur, 1297 xfs_agblock_t agbno, 1298 xfs_extlen_t aglen, 1299 xfs_agblock_t *fbno, 1300 xfs_extlen_t *flen, 1301 bool find_end_of_shared) 1302 { 1303 struct xfs_refcount_irec tmp; 1304 int i; 1305 int have; 1306 int error; 1307 1308 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, 1309 agbno, aglen); 1310 1311 /* By default, skip the whole range */ 1312 *fbno = NULLAGBLOCK; 1313 *flen = 0; 1314 1315 /* Try to find a refcount extent that crosses the start */ 1316 error = xfs_refcount_lookup_le(cur, agbno, &have); 1317 if (error) 1318 goto out_error; 1319 if (!have) { 1320 /* No left extent, look at the next one */ 1321 error = xfs_btree_increment(cur, 0, &have); 1322 if (error) 1323 goto out_error; 1324 if (!have) 1325 goto done; 1326 } 1327 error = xfs_refcount_get_rec(cur, &tmp, &i); 1328 if (error) 1329 goto out_error; 1330 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1331 error = -EFSCORRUPTED; 1332 goto out_error; 1333 } 1334 1335 /* If the extent ends before the start, look at the next one */ 1336 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1337 error = xfs_btree_increment(cur, 0, &have); 1338 if (error) 1339 goto out_error; 1340 if (!have) 1341 goto done; 1342 error = xfs_refcount_get_rec(cur, &tmp, &i); 1343 if (error) 1344 goto out_error; 1345 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1346 error = -EFSCORRUPTED; 1347 goto out_error; 1348 } 1349 } 1350 1351 /* If the extent starts after the range we want, bail out */ 1352 if (tmp.rc_startblock >= agbno + aglen) 1353 goto done; 1354 1355 /* We found the start of a shared extent! */ 1356 if (tmp.rc_startblock < agbno) { 1357 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1358 tmp.rc_startblock = agbno; 1359 } 1360 1361 *fbno = tmp.rc_startblock; 1362 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1363 if (!find_end_of_shared) 1364 goto done; 1365 1366 /* Otherwise, find the end of this shared extent */ 1367 while (*fbno + *flen < agbno + aglen) { 1368 error = xfs_btree_increment(cur, 0, &have); 1369 if (error) 1370 goto out_error; 1371 if (!have) 1372 break; 1373 error = xfs_refcount_get_rec(cur, &tmp, &i); 1374 if (error) 1375 goto out_error; 1376 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1377 error = -EFSCORRUPTED; 1378 goto out_error; 1379 } 1380 if (tmp.rc_startblock >= agbno + aglen || 1381 tmp.rc_startblock != *fbno + *flen) 1382 break; 1383 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1384 } 1385 1386 done: 1387 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1388 cur->bc_private.a.agno, *fbno, *flen); 1389 1390 out_error: 1391 if (error) 1392 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1393 cur->bc_private.a.agno, error, _RET_IP_); 1394 return error; 1395 } 1396 1397 /* 1398 * Recovering CoW Blocks After a Crash 1399 * 1400 * Due to the way that the copy on write mechanism works, there's a window of 1401 * opportunity in which we can lose track of allocated blocks during a crash. 1402 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1403 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1404 * no longer in the free space btree but are not otherwise recorded anywhere 1405 * until the write completes and the blocks are mapped into the file. A crash 1406 * in between allocation and remapping results in the replacement blocks being 1407 * lost. This situation is exacerbated by the CoW extent size hint because 1408 * allocations can hang around for long time. 1409 * 1410 * However, there is a place where we can record these allocations before they 1411 * become mappings -- the reference count btree. The btree does not record 1412 * extents with refcount == 1, so we can record allocations with a refcount of 1413 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1414 * no conflict with shared block records. These mappings should be created 1415 * when we allocate blocks to the CoW fork and deleted when they're removed 1416 * from the CoW fork. 1417 * 1418 * Minor nit: records for in-progress CoW allocations and records for shared 1419 * extents must never be merged, to preserve the property that (except for CoW 1420 * allocations) there are no refcount btree entries with refcount == 1. The 1421 * only time this could potentially happen is when unsharing a block that's 1422 * adjacent to CoW allocations, so we must be careful to avoid this. 1423 * 1424 * At mount time we recover lost CoW allocations by searching the refcount 1425 * btree for these refcount == 1 mappings. These represent CoW allocations 1426 * that were in progress at the time the filesystem went down, so we can free 1427 * them to get the space back. 1428 * 1429 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1430 * several reasons -- first, EFIs pin the tail of the log and would have to be 1431 * periodically relogged to avoid filling up the log. Second, CoW completions 1432 * will have to file an EFD and create new EFIs for whatever remains in the 1433 * CoW fork; this partially takes care of (1) but extent-size reservations 1434 * will have to periodically relog even if there's no writeout in progress. 1435 * This can happen if the CoW extent size hint is set, which you really want. 1436 * Third, EFIs cannot currently be automatically relogged into newer 1437 * transactions to advance the log tail. Fourth, stuffing the log full of 1438 * EFIs places an upper bound on the number of CoW allocations that can be 1439 * held filesystem-wide at any given time. Recording them in the refcount 1440 * btree doesn't require us to maintain any state in memory and doesn't pin 1441 * the log. 1442 */ 1443 /* 1444 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1445 * in that they're not referenced anywhere else in the filesystem, so we 1446 * stash them in the refcount btree with a refcount of 1 until either file 1447 * remapping (or CoW cancellation) happens. 1448 */ 1449 STATIC int 1450 xfs_refcount_adjust_cow_extents( 1451 struct xfs_btree_cur *cur, 1452 xfs_agblock_t agbno, 1453 xfs_extlen_t aglen, 1454 enum xfs_refc_adjust_op adj) 1455 { 1456 struct xfs_refcount_irec ext, tmp; 1457 int error; 1458 int found_rec, found_tmp; 1459 1460 if (aglen == 0) 1461 return 0; 1462 1463 /* Find any overlapping refcount records */ 1464 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1465 if (error) 1466 goto out_error; 1467 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1468 if (error) 1469 goto out_error; 1470 if (!found_rec) { 1471 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1472 XFS_REFC_COW_START; 1473 ext.rc_blockcount = 0; 1474 ext.rc_refcount = 0; 1475 } 1476 1477 switch (adj) { 1478 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1479 /* Adding a CoW reservation, there should be nothing here. */ 1480 if (XFS_IS_CORRUPT(cur->bc_mp, 1481 agbno + aglen > ext.rc_startblock)) { 1482 error = -EFSCORRUPTED; 1483 goto out_error; 1484 } 1485 1486 tmp.rc_startblock = agbno; 1487 tmp.rc_blockcount = aglen; 1488 tmp.rc_refcount = 1; 1489 trace_xfs_refcount_modify_extent(cur->bc_mp, 1490 cur->bc_private.a.agno, &tmp); 1491 1492 error = xfs_refcount_insert(cur, &tmp, 1493 &found_tmp); 1494 if (error) 1495 goto out_error; 1496 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1497 error = -EFSCORRUPTED; 1498 goto out_error; 1499 } 1500 break; 1501 case XFS_REFCOUNT_ADJUST_COW_FREE: 1502 /* Removing a CoW reservation, there should be one extent. */ 1503 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1504 error = -EFSCORRUPTED; 1505 goto out_error; 1506 } 1507 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1508 error = -EFSCORRUPTED; 1509 goto out_error; 1510 } 1511 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1512 error = -EFSCORRUPTED; 1513 goto out_error; 1514 } 1515 1516 ext.rc_refcount = 0; 1517 trace_xfs_refcount_modify_extent(cur->bc_mp, 1518 cur->bc_private.a.agno, &ext); 1519 error = xfs_refcount_delete(cur, &found_rec); 1520 if (error) 1521 goto out_error; 1522 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1523 error = -EFSCORRUPTED; 1524 goto out_error; 1525 } 1526 break; 1527 default: 1528 ASSERT(0); 1529 } 1530 1531 return error; 1532 out_error: 1533 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1534 cur->bc_private.a.agno, error, _RET_IP_); 1535 return error; 1536 } 1537 1538 /* 1539 * Add or remove refcount btree entries for CoW reservations. 1540 */ 1541 STATIC int 1542 xfs_refcount_adjust_cow( 1543 struct xfs_btree_cur *cur, 1544 xfs_agblock_t agbno, 1545 xfs_extlen_t aglen, 1546 enum xfs_refc_adjust_op adj) 1547 { 1548 bool shape_changed; 1549 int error; 1550 1551 agbno += XFS_REFC_COW_START; 1552 1553 /* 1554 * Ensure that no rcextents cross the boundary of the adjustment range. 1555 */ 1556 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1557 if (error) 1558 goto out_error; 1559 1560 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1561 if (error) 1562 goto out_error; 1563 1564 /* 1565 * Try to merge with the left or right extents of the range. 1566 */ 1567 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1568 XFS_FIND_RCEXT_COW, &shape_changed); 1569 if (error) 1570 goto out_error; 1571 1572 /* Now that we've taken care of the ends, adjust the middle extents */ 1573 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1574 if (error) 1575 goto out_error; 1576 1577 return 0; 1578 1579 out_error: 1580 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, 1581 error, _RET_IP_); 1582 return error; 1583 } 1584 1585 /* 1586 * Record a CoW allocation in the refcount btree. 1587 */ 1588 STATIC int 1589 __xfs_refcount_cow_alloc( 1590 struct xfs_btree_cur *rcur, 1591 xfs_agblock_t agbno, 1592 xfs_extlen_t aglen) 1593 { 1594 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1595 agbno, aglen); 1596 1597 /* Add refcount btree reservation */ 1598 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1599 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1600 } 1601 1602 /* 1603 * Remove a CoW allocation from the refcount btree. 1604 */ 1605 STATIC int 1606 __xfs_refcount_cow_free( 1607 struct xfs_btree_cur *rcur, 1608 xfs_agblock_t agbno, 1609 xfs_extlen_t aglen) 1610 { 1611 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1612 agbno, aglen); 1613 1614 /* Remove refcount btree reservation */ 1615 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1616 XFS_REFCOUNT_ADJUST_COW_FREE); 1617 } 1618 1619 /* Record a CoW staging extent in the refcount btree. */ 1620 void 1621 xfs_refcount_alloc_cow_extent( 1622 struct xfs_trans *tp, 1623 xfs_fsblock_t fsb, 1624 xfs_extlen_t len) 1625 { 1626 struct xfs_mount *mp = tp->t_mountp; 1627 1628 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1629 return; 1630 1631 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1632 1633 /* Add rmap entry */ 1634 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1635 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1636 } 1637 1638 /* Forget a CoW staging event in the refcount btree. */ 1639 void 1640 xfs_refcount_free_cow_extent( 1641 struct xfs_trans *tp, 1642 xfs_fsblock_t fsb, 1643 xfs_extlen_t len) 1644 { 1645 struct xfs_mount *mp = tp->t_mountp; 1646 1647 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1648 return; 1649 1650 /* Remove rmap entry */ 1651 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1652 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1653 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1654 } 1655 1656 struct xfs_refcount_recovery { 1657 struct list_head rr_list; 1658 struct xfs_refcount_irec rr_rrec; 1659 }; 1660 1661 /* Stuff an extent on the recovery list. */ 1662 STATIC int 1663 xfs_refcount_recover_extent( 1664 struct xfs_btree_cur *cur, 1665 union xfs_btree_rec *rec, 1666 void *priv) 1667 { 1668 struct list_head *debris = priv; 1669 struct xfs_refcount_recovery *rr; 1670 1671 if (XFS_IS_CORRUPT(cur->bc_mp, 1672 be32_to_cpu(rec->refc.rc_refcount) != 1)) 1673 return -EFSCORRUPTED; 1674 1675 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); 1676 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1677 list_add_tail(&rr->rr_list, debris); 1678 1679 return 0; 1680 } 1681 1682 /* Find and remove leftover CoW reservations. */ 1683 int 1684 xfs_refcount_recover_cow_leftovers( 1685 struct xfs_mount *mp, 1686 xfs_agnumber_t agno) 1687 { 1688 struct xfs_trans *tp; 1689 struct xfs_btree_cur *cur; 1690 struct xfs_buf *agbp; 1691 struct xfs_refcount_recovery *rr, *n; 1692 struct list_head debris; 1693 union xfs_btree_irec low; 1694 union xfs_btree_irec high; 1695 xfs_fsblock_t fsb; 1696 xfs_agblock_t agbno; 1697 int error; 1698 1699 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1700 return -EOPNOTSUPP; 1701 1702 INIT_LIST_HEAD(&debris); 1703 1704 /* 1705 * In this first part, we use an empty transaction to gather up 1706 * all the leftover CoW extents so that we can subsequently 1707 * delete them. The empty transaction is used to avoid 1708 * a buffer lock deadlock if there happens to be a loop in the 1709 * refcountbt because we're allowed to re-grab a buffer that is 1710 * already attached to our transaction. When we're done 1711 * recording the CoW debris we cancel the (empty) transaction 1712 * and everything goes away cleanly. 1713 */ 1714 error = xfs_trans_alloc_empty(mp, &tp); 1715 if (error) 1716 return error; 1717 1718 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1719 if (error) 1720 goto out_trans; 1721 if (!agbp) { 1722 error = -ENOMEM; 1723 goto out_trans; 1724 } 1725 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); 1726 1727 /* Find all the leftover CoW staging extents. */ 1728 memset(&low, 0, sizeof(low)); 1729 memset(&high, 0, sizeof(high)); 1730 low.rc.rc_startblock = XFS_REFC_COW_START; 1731 high.rc.rc_startblock = -1U; 1732 error = xfs_btree_query_range(cur, &low, &high, 1733 xfs_refcount_recover_extent, &debris); 1734 xfs_btree_del_cursor(cur, error); 1735 xfs_trans_brelse(tp, agbp); 1736 xfs_trans_cancel(tp); 1737 if (error) 1738 goto out_free; 1739 1740 /* Now iterate the list to free the leftovers */ 1741 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1742 /* Set up transaction. */ 1743 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1744 if (error) 1745 goto out_free; 1746 1747 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1748 1749 /* Free the orphan record */ 1750 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1751 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1752 xfs_refcount_free_cow_extent(tp, fsb, 1753 rr->rr_rrec.rc_blockcount); 1754 1755 /* Free the block. */ 1756 xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1757 1758 error = xfs_trans_commit(tp); 1759 if (error) 1760 goto out_free; 1761 1762 list_del(&rr->rr_list); 1763 kmem_free(rr); 1764 } 1765 1766 return error; 1767 out_trans: 1768 xfs_trans_cancel(tp); 1769 out_free: 1770 /* Free the leftover list */ 1771 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1772 list_del(&rr->rr_list); 1773 kmem_free(rr); 1774 } 1775 return error; 1776 } 1777 1778 /* Is there a record covering a given extent? */ 1779 int 1780 xfs_refcount_has_record( 1781 struct xfs_btree_cur *cur, 1782 xfs_agblock_t bno, 1783 xfs_extlen_t len, 1784 bool *exists) 1785 { 1786 union xfs_btree_irec low; 1787 union xfs_btree_irec high; 1788 1789 memset(&low, 0, sizeof(low)); 1790 low.rc.rc_startblock = bno; 1791 memset(&high, 0xFF, sizeof(high)); 1792 high.rc.rc_startblock = bno + len - 1; 1793 1794 return xfs_btree_has_record(cur, &low, &high, exists); 1795 } 1796