1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 26 /* Allowable refcount adjustment amounts. */ 27 enum xfs_refc_adjust_op { 28 XFS_REFCOUNT_ADJUST_INCREASE = 1, 29 XFS_REFCOUNT_ADJUST_DECREASE = -1, 30 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 31 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 32 }; 33 34 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 35 xfs_agblock_t agbno, xfs_extlen_t aglen); 36 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 37 xfs_agblock_t agbno, xfs_extlen_t aglen); 38 39 /* 40 * Look up the first record less than or equal to [bno, len] in the btree 41 * given by cur. 42 */ 43 int 44 xfs_refcount_lookup_le( 45 struct xfs_btree_cur *cur, 46 xfs_agblock_t bno, 47 int *stat) 48 { 49 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, 50 XFS_LOOKUP_LE); 51 cur->bc_rec.rc.rc_startblock = bno; 52 cur->bc_rec.rc.rc_blockcount = 0; 53 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 54 } 55 56 /* 57 * Look up the first record greater than or equal to [bno, len] in the btree 58 * given by cur. 59 */ 60 int 61 xfs_refcount_lookup_ge( 62 struct xfs_btree_cur *cur, 63 xfs_agblock_t bno, 64 int *stat) 65 { 66 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, 67 XFS_LOOKUP_GE); 68 cur->bc_rec.rc.rc_startblock = bno; 69 cur->bc_rec.rc.rc_blockcount = 0; 70 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 71 } 72 73 /* 74 * Look up the first record equal to [bno, len] in the btree 75 * given by cur. 76 */ 77 int 78 xfs_refcount_lookup_eq( 79 struct xfs_btree_cur *cur, 80 xfs_agblock_t bno, 81 int *stat) 82 { 83 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.agno, bno, 84 XFS_LOOKUP_LE); 85 cur->bc_rec.rc.rc_startblock = bno; 86 cur->bc_rec.rc.rc_blockcount = 0; 87 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 88 } 89 90 /* Convert on-disk record to in-core format. */ 91 void 92 xfs_refcount_btrec_to_irec( 93 union xfs_btree_rec *rec, 94 struct xfs_refcount_irec *irec) 95 { 96 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 97 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 98 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 99 } 100 101 /* 102 * Get the data from the pointed-to record. 103 */ 104 int 105 xfs_refcount_get_rec( 106 struct xfs_btree_cur *cur, 107 struct xfs_refcount_irec *irec, 108 int *stat) 109 { 110 struct xfs_mount *mp = cur->bc_mp; 111 xfs_agnumber_t agno = cur->bc_ag.agno; 112 union xfs_btree_rec *rec; 113 int error; 114 xfs_agblock_t realstart; 115 116 error = xfs_btree_get_rec(cur, &rec, stat); 117 if (error || !*stat) 118 return error; 119 120 xfs_refcount_btrec_to_irec(rec, irec); 121 122 agno = cur->bc_ag.agno; 123 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 124 goto out_bad_rec; 125 126 /* handle special COW-staging state */ 127 realstart = irec->rc_startblock; 128 if (realstart & XFS_REFC_COW_START) { 129 if (irec->rc_refcount != 1) 130 goto out_bad_rec; 131 realstart &= ~XFS_REFC_COW_START; 132 } else if (irec->rc_refcount < 2) { 133 goto out_bad_rec; 134 } 135 136 /* check for valid extent range, including overflow */ 137 if (!xfs_verify_agbno(mp, agno, realstart)) 138 goto out_bad_rec; 139 if (realstart > realstart + irec->rc_blockcount) 140 goto out_bad_rec; 141 if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) 142 goto out_bad_rec; 143 144 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 145 goto out_bad_rec; 146 147 trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.agno, irec); 148 return 0; 149 150 out_bad_rec: 151 xfs_warn(mp, 152 "Refcount BTree record corruption in AG %d detected!", agno); 153 xfs_warn(mp, 154 "Start block 0x%x, block count 0x%x, references 0x%x", 155 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 156 return -EFSCORRUPTED; 157 } 158 159 /* 160 * Update the record referred to by cur to the value given 161 * by [bno, len, refcount]. 162 * This either works (return 0) or gets an EFSCORRUPTED error. 163 */ 164 STATIC int 165 xfs_refcount_update( 166 struct xfs_btree_cur *cur, 167 struct xfs_refcount_irec *irec) 168 { 169 union xfs_btree_rec rec; 170 int error; 171 172 trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.agno, irec); 173 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 174 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 175 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 176 error = xfs_btree_update(cur, &rec); 177 if (error) 178 trace_xfs_refcount_update_error(cur->bc_mp, 179 cur->bc_ag.agno, error, _RET_IP_); 180 return error; 181 } 182 183 /* 184 * Insert the record referred to by cur to the value given 185 * by [bno, len, refcount]. 186 * This either works (return 0) or gets an EFSCORRUPTED error. 187 */ 188 int 189 xfs_refcount_insert( 190 struct xfs_btree_cur *cur, 191 struct xfs_refcount_irec *irec, 192 int *i) 193 { 194 int error; 195 196 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.agno, irec); 197 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 198 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 199 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 200 error = xfs_btree_insert(cur, i); 201 if (error) 202 goto out_error; 203 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 204 error = -EFSCORRUPTED; 205 goto out_error; 206 } 207 208 out_error: 209 if (error) 210 trace_xfs_refcount_insert_error(cur->bc_mp, 211 cur->bc_ag.agno, error, _RET_IP_); 212 return error; 213 } 214 215 /* 216 * Remove the record referred to by cur, then set the pointer to the spot 217 * where the record could be re-inserted, in case we want to increment or 218 * decrement the cursor. 219 * This either works (return 0) or gets an EFSCORRUPTED error. 220 */ 221 STATIC int 222 xfs_refcount_delete( 223 struct xfs_btree_cur *cur, 224 int *i) 225 { 226 struct xfs_refcount_irec irec; 227 int found_rec; 228 int error; 229 230 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 231 if (error) 232 goto out_error; 233 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 234 error = -EFSCORRUPTED; 235 goto out_error; 236 } 237 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.agno, &irec); 238 error = xfs_btree_delete(cur, i); 239 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 240 error = -EFSCORRUPTED; 241 goto out_error; 242 } 243 if (error) 244 goto out_error; 245 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 246 out_error: 247 if (error) 248 trace_xfs_refcount_delete_error(cur->bc_mp, 249 cur->bc_ag.agno, error, _RET_IP_); 250 return error; 251 } 252 253 /* 254 * Adjusting the Reference Count 255 * 256 * As stated elsewhere, the reference count btree (refcbt) stores 257 * >1 reference counts for extents of physical blocks. In this 258 * operation, we're either raising or lowering the reference count of 259 * some subrange stored in the tree: 260 * 261 * <------ adjustment range ------> 262 * ----+ +---+-----+ +--+--------+--------- 263 * 2 | | 3 | 4 | |17| 55 | 10 264 * ----+ +---+-----+ +--+--------+--------- 265 * X axis is physical blocks number; 266 * reference counts are the numbers inside the rectangles 267 * 268 * The first thing we need to do is to ensure that there are no 269 * refcount extents crossing either boundary of the range to be 270 * adjusted. For any extent that does cross a boundary, split it into 271 * two extents so that we can increment the refcount of one of the 272 * pieces later: 273 * 274 * <------ adjustment range ------> 275 * ----+ +---+-----+ +--+--------+----+---- 276 * 2 | | 3 | 2 | |17| 55 | 10 | 10 277 * ----+ +---+-----+ +--+--------+----+---- 278 * 279 * For this next step, let's assume that all the physical blocks in 280 * the adjustment range are mapped to a file and are therefore in use 281 * at least once. Therefore, we can infer that any gap in the 282 * refcount tree within the adjustment range represents a physical 283 * extent with refcount == 1: 284 * 285 * <------ adjustment range ------> 286 * ----+---+---+-----+-+--+--------+----+---- 287 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 288 * ----+---+---+-----+-+--+--------+----+---- 289 * ^ 290 * 291 * For each extent that falls within the interval range, figure out 292 * which extent is to the left or the right of that extent. Now we 293 * have a left, current, and right extent. If the new reference count 294 * of the center extent enables us to merge left, center, and right 295 * into one record covering all three, do so. If the center extent is 296 * at the left end of the range, abuts the left extent, and its new 297 * reference count matches the left extent's record, then merge them. 298 * If the center extent is at the right end of the range, abuts the 299 * right extent, and the reference counts match, merge those. In the 300 * example, we can left merge (assuming an increment operation): 301 * 302 * <------ adjustment range ------> 303 * --------+---+-----+-+--+--------+----+---- 304 * 2 | 3 | 2 |1|17| 55 | 10 | 10 305 * --------+---+-----+-+--+--------+----+---- 306 * ^ 307 * 308 * For all other extents within the range, adjust the reference count 309 * or delete it if the refcount falls below 2. If we were 310 * incrementing, the end result looks like this: 311 * 312 * <------ adjustment range ------> 313 * --------+---+-----+-+--+--------+----+---- 314 * 2 | 4 | 3 |2|18| 56 | 11 | 10 315 * --------+---+-----+-+--+--------+----+---- 316 * 317 * The result of a decrement operation looks as such: 318 * 319 * <------ adjustment range ------> 320 * ----+ +---+ +--+--------+----+---- 321 * 2 | | 2 | |16| 54 | 9 | 10 322 * ----+ +---+ +--+--------+----+---- 323 * DDDD 111111DD 324 * 325 * The blocks marked "D" are freed; the blocks marked "1" are only 326 * referenced once and therefore the record is removed from the 327 * refcount btree. 328 */ 329 330 /* Next block after this extent. */ 331 static inline xfs_agblock_t 332 xfs_refc_next( 333 struct xfs_refcount_irec *rc) 334 { 335 return rc->rc_startblock + rc->rc_blockcount; 336 } 337 338 /* 339 * Split a refcount extent that crosses agbno. 340 */ 341 STATIC int 342 xfs_refcount_split_extent( 343 struct xfs_btree_cur *cur, 344 xfs_agblock_t agbno, 345 bool *shape_changed) 346 { 347 struct xfs_refcount_irec rcext, tmp; 348 int found_rec; 349 int error; 350 351 *shape_changed = false; 352 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 353 if (error) 354 goto out_error; 355 if (!found_rec) 356 return 0; 357 358 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 359 if (error) 360 goto out_error; 361 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 362 error = -EFSCORRUPTED; 363 goto out_error; 364 } 365 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 366 return 0; 367 368 *shape_changed = true; 369 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.agno, 370 &rcext, agbno); 371 372 /* Establish the right extent. */ 373 tmp = rcext; 374 tmp.rc_startblock = agbno; 375 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 376 error = xfs_refcount_update(cur, &tmp); 377 if (error) 378 goto out_error; 379 380 /* Insert the left extent. */ 381 tmp = rcext; 382 tmp.rc_blockcount = agbno - rcext.rc_startblock; 383 error = xfs_refcount_insert(cur, &tmp, &found_rec); 384 if (error) 385 goto out_error; 386 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 387 error = -EFSCORRUPTED; 388 goto out_error; 389 } 390 return error; 391 392 out_error: 393 trace_xfs_refcount_split_extent_error(cur->bc_mp, 394 cur->bc_ag.agno, error, _RET_IP_); 395 return error; 396 } 397 398 /* 399 * Merge the left, center, and right extents. 400 */ 401 STATIC int 402 xfs_refcount_merge_center_extents( 403 struct xfs_btree_cur *cur, 404 struct xfs_refcount_irec *left, 405 struct xfs_refcount_irec *center, 406 struct xfs_refcount_irec *right, 407 unsigned long long extlen, 408 xfs_extlen_t *aglen) 409 { 410 int error; 411 int found_rec; 412 413 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 414 cur->bc_ag.agno, left, center, right); 415 416 /* 417 * Make sure the center and right extents are not in the btree. 418 * If the center extent was synthesized, the first delete call 419 * removes the right extent and we skip the second deletion. 420 * If center and right were in the btree, then the first delete 421 * call removes the center and the second one removes the right 422 * extent. 423 */ 424 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 425 &found_rec); 426 if (error) 427 goto out_error; 428 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 429 error = -EFSCORRUPTED; 430 goto out_error; 431 } 432 433 error = xfs_refcount_delete(cur, &found_rec); 434 if (error) 435 goto out_error; 436 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 437 error = -EFSCORRUPTED; 438 goto out_error; 439 } 440 441 if (center->rc_refcount > 1) { 442 error = xfs_refcount_delete(cur, &found_rec); 443 if (error) 444 goto out_error; 445 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 446 error = -EFSCORRUPTED; 447 goto out_error; 448 } 449 } 450 451 /* Enlarge the left extent. */ 452 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 453 &found_rec); 454 if (error) 455 goto out_error; 456 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 457 error = -EFSCORRUPTED; 458 goto out_error; 459 } 460 461 left->rc_blockcount = extlen; 462 error = xfs_refcount_update(cur, left); 463 if (error) 464 goto out_error; 465 466 *aglen = 0; 467 return error; 468 469 out_error: 470 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 471 cur->bc_ag.agno, error, _RET_IP_); 472 return error; 473 } 474 475 /* 476 * Merge with the left extent. 477 */ 478 STATIC int 479 xfs_refcount_merge_left_extent( 480 struct xfs_btree_cur *cur, 481 struct xfs_refcount_irec *left, 482 struct xfs_refcount_irec *cleft, 483 xfs_agblock_t *agbno, 484 xfs_extlen_t *aglen) 485 { 486 int error; 487 int found_rec; 488 489 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 490 cur->bc_ag.agno, left, cleft); 491 492 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 493 if (cleft->rc_refcount > 1) { 494 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 495 &found_rec); 496 if (error) 497 goto out_error; 498 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 499 error = -EFSCORRUPTED; 500 goto out_error; 501 } 502 503 error = xfs_refcount_delete(cur, &found_rec); 504 if (error) 505 goto out_error; 506 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 507 error = -EFSCORRUPTED; 508 goto out_error; 509 } 510 } 511 512 /* Enlarge the left extent. */ 513 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 514 &found_rec); 515 if (error) 516 goto out_error; 517 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 518 error = -EFSCORRUPTED; 519 goto out_error; 520 } 521 522 left->rc_blockcount += cleft->rc_blockcount; 523 error = xfs_refcount_update(cur, left); 524 if (error) 525 goto out_error; 526 527 *agbno += cleft->rc_blockcount; 528 *aglen -= cleft->rc_blockcount; 529 return error; 530 531 out_error: 532 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 533 cur->bc_ag.agno, error, _RET_IP_); 534 return error; 535 } 536 537 /* 538 * Merge with the right extent. 539 */ 540 STATIC int 541 xfs_refcount_merge_right_extent( 542 struct xfs_btree_cur *cur, 543 struct xfs_refcount_irec *right, 544 struct xfs_refcount_irec *cright, 545 xfs_extlen_t *aglen) 546 { 547 int error; 548 int found_rec; 549 550 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 551 cur->bc_ag.agno, cright, right); 552 553 /* 554 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 555 * remove it. 556 */ 557 if (cright->rc_refcount > 1) { 558 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 559 &found_rec); 560 if (error) 561 goto out_error; 562 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 563 error = -EFSCORRUPTED; 564 goto out_error; 565 } 566 567 error = xfs_refcount_delete(cur, &found_rec); 568 if (error) 569 goto out_error; 570 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 571 error = -EFSCORRUPTED; 572 goto out_error; 573 } 574 } 575 576 /* Enlarge the right extent. */ 577 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 578 &found_rec); 579 if (error) 580 goto out_error; 581 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 582 error = -EFSCORRUPTED; 583 goto out_error; 584 } 585 586 right->rc_startblock -= cright->rc_blockcount; 587 right->rc_blockcount += cright->rc_blockcount; 588 error = xfs_refcount_update(cur, right); 589 if (error) 590 goto out_error; 591 592 *aglen -= cright->rc_blockcount; 593 return error; 594 595 out_error: 596 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 597 cur->bc_ag.agno, error, _RET_IP_); 598 return error; 599 } 600 601 #define XFS_FIND_RCEXT_SHARED 1 602 #define XFS_FIND_RCEXT_COW 2 603 /* 604 * Find the left extent and the one after it (cleft). This function assumes 605 * that we've already split any extent crossing agbno. 606 */ 607 STATIC int 608 xfs_refcount_find_left_extents( 609 struct xfs_btree_cur *cur, 610 struct xfs_refcount_irec *left, 611 struct xfs_refcount_irec *cleft, 612 xfs_agblock_t agbno, 613 xfs_extlen_t aglen, 614 int flags) 615 { 616 struct xfs_refcount_irec tmp; 617 int error; 618 int found_rec; 619 620 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 621 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 622 if (error) 623 goto out_error; 624 if (!found_rec) 625 return 0; 626 627 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 628 if (error) 629 goto out_error; 630 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 631 error = -EFSCORRUPTED; 632 goto out_error; 633 } 634 635 if (xfs_refc_next(&tmp) != agbno) 636 return 0; 637 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 638 return 0; 639 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 640 return 0; 641 /* We have a left extent; retrieve (or invent) the next right one */ 642 *left = tmp; 643 644 error = xfs_btree_increment(cur, 0, &found_rec); 645 if (error) 646 goto out_error; 647 if (found_rec) { 648 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 649 if (error) 650 goto out_error; 651 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 652 error = -EFSCORRUPTED; 653 goto out_error; 654 } 655 656 /* if tmp starts at the end of our range, just use that */ 657 if (tmp.rc_startblock == agbno) 658 *cleft = tmp; 659 else { 660 /* 661 * There's a gap in the refcntbt at the start of the 662 * range we're interested in (refcount == 1) so 663 * synthesize the implied extent and pass it back. 664 * We assume here that the agbno/aglen range was 665 * passed in from a data fork extent mapping and 666 * therefore is allocated to exactly one owner. 667 */ 668 cleft->rc_startblock = agbno; 669 cleft->rc_blockcount = min(aglen, 670 tmp.rc_startblock - agbno); 671 cleft->rc_refcount = 1; 672 } 673 } else { 674 /* 675 * No extents, so pretend that there's one covering the whole 676 * range. 677 */ 678 cleft->rc_startblock = agbno; 679 cleft->rc_blockcount = aglen; 680 cleft->rc_refcount = 1; 681 } 682 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.agno, 683 left, cleft, agbno); 684 return error; 685 686 out_error: 687 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 688 cur->bc_ag.agno, error, _RET_IP_); 689 return error; 690 } 691 692 /* 693 * Find the right extent and the one before it (cright). This function 694 * assumes that we've already split any extents crossing agbno + aglen. 695 */ 696 STATIC int 697 xfs_refcount_find_right_extents( 698 struct xfs_btree_cur *cur, 699 struct xfs_refcount_irec *right, 700 struct xfs_refcount_irec *cright, 701 xfs_agblock_t agbno, 702 xfs_extlen_t aglen, 703 int flags) 704 { 705 struct xfs_refcount_irec tmp; 706 int error; 707 int found_rec; 708 709 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 710 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 711 if (error) 712 goto out_error; 713 if (!found_rec) 714 return 0; 715 716 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 717 if (error) 718 goto out_error; 719 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 720 error = -EFSCORRUPTED; 721 goto out_error; 722 } 723 724 if (tmp.rc_startblock != agbno + aglen) 725 return 0; 726 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 727 return 0; 728 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 729 return 0; 730 /* We have a right extent; retrieve (or invent) the next left one */ 731 *right = tmp; 732 733 error = xfs_btree_decrement(cur, 0, &found_rec); 734 if (error) 735 goto out_error; 736 if (found_rec) { 737 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 738 if (error) 739 goto out_error; 740 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 741 error = -EFSCORRUPTED; 742 goto out_error; 743 } 744 745 /* if tmp ends at the end of our range, just use that */ 746 if (xfs_refc_next(&tmp) == agbno + aglen) 747 *cright = tmp; 748 else { 749 /* 750 * There's a gap in the refcntbt at the end of the 751 * range we're interested in (refcount == 1) so 752 * create the implied extent and pass it back. 753 * We assume here that the agbno/aglen range was 754 * passed in from a data fork extent mapping and 755 * therefore is allocated to exactly one owner. 756 */ 757 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 758 cright->rc_blockcount = right->rc_startblock - 759 cright->rc_startblock; 760 cright->rc_refcount = 1; 761 } 762 } else { 763 /* 764 * No extents, so pretend that there's one covering the whole 765 * range. 766 */ 767 cright->rc_startblock = agbno; 768 cright->rc_blockcount = aglen; 769 cright->rc_refcount = 1; 770 } 771 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.agno, 772 cright, right, agbno + aglen); 773 return error; 774 775 out_error: 776 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 777 cur->bc_ag.agno, error, _RET_IP_); 778 return error; 779 } 780 781 /* Is this extent valid? */ 782 static inline bool 783 xfs_refc_valid( 784 struct xfs_refcount_irec *rc) 785 { 786 return rc->rc_startblock != NULLAGBLOCK; 787 } 788 789 /* 790 * Try to merge with any extents on the boundaries of the adjustment range. 791 */ 792 STATIC int 793 xfs_refcount_merge_extents( 794 struct xfs_btree_cur *cur, 795 xfs_agblock_t *agbno, 796 xfs_extlen_t *aglen, 797 enum xfs_refc_adjust_op adjust, 798 int flags, 799 bool *shape_changed) 800 { 801 struct xfs_refcount_irec left = {0}, cleft = {0}; 802 struct xfs_refcount_irec cright = {0}, right = {0}; 803 int error; 804 unsigned long long ulen; 805 bool cequal; 806 807 *shape_changed = false; 808 /* 809 * Find the extent just below agbno [left], just above agbno [cleft], 810 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 811 * [right]. 812 */ 813 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 814 *aglen, flags); 815 if (error) 816 return error; 817 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 818 *aglen, flags); 819 if (error) 820 return error; 821 822 /* No left or right extent to merge; exit. */ 823 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 824 return 0; 825 826 cequal = (cleft.rc_startblock == cright.rc_startblock) && 827 (cleft.rc_blockcount == cright.rc_blockcount); 828 829 /* Try to merge left, cleft, and right. cleft must == cright. */ 830 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 831 right.rc_blockcount; 832 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 833 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 834 left.rc_refcount == cleft.rc_refcount + adjust && 835 right.rc_refcount == cleft.rc_refcount + adjust && 836 ulen < MAXREFCEXTLEN) { 837 *shape_changed = true; 838 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 839 &right, ulen, aglen); 840 } 841 842 /* Try to merge left and cleft. */ 843 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 844 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 845 left.rc_refcount == cleft.rc_refcount + adjust && 846 ulen < MAXREFCEXTLEN) { 847 *shape_changed = true; 848 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 849 agbno, aglen); 850 if (error) 851 return error; 852 853 /* 854 * If we just merged left + cleft and cleft == cright, 855 * we no longer have a cright to merge with right. We're done. 856 */ 857 if (cequal) 858 return 0; 859 } 860 861 /* Try to merge cright and right. */ 862 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 863 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 864 right.rc_refcount == cright.rc_refcount + adjust && 865 ulen < MAXREFCEXTLEN) { 866 *shape_changed = true; 867 return xfs_refcount_merge_right_extent(cur, &right, &cright, 868 aglen); 869 } 870 871 return error; 872 } 873 874 /* 875 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 876 * true incorrectly is a shutdown FS; the penalty for guessing false 877 * incorrectly is more transaction rolls than might be necessary. 878 * Be conservative here. 879 */ 880 static bool 881 xfs_refcount_still_have_space( 882 struct xfs_btree_cur *cur) 883 { 884 unsigned long overhead; 885 886 overhead = cur->bc_ag.refc.shape_changes * 887 xfs_allocfree_log_count(cur->bc_mp, 1); 888 overhead *= cur->bc_mp->m_sb.sb_blocksize; 889 890 /* 891 * Only allow 2 refcount extent updates per transaction if the 892 * refcount continue update "error" has been injected. 893 */ 894 if (cur->bc_ag.refc.nr_ops > 2 && 895 XFS_TEST_ERROR(false, cur->bc_mp, 896 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 897 return false; 898 899 if (cur->bc_ag.refc.nr_ops == 0) 900 return true; 901 else if (overhead > cur->bc_tp->t_log_res) 902 return false; 903 return cur->bc_tp->t_log_res - overhead > 904 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 905 } 906 907 /* 908 * Adjust the refcounts of middle extents. At this point we should have 909 * split extents that crossed the adjustment range; merged with adjacent 910 * extents; and updated agbno/aglen to reflect the merges. Therefore, 911 * all we have to do is update the extents inside [agbno, agbno + aglen]. 912 */ 913 STATIC int 914 xfs_refcount_adjust_extents( 915 struct xfs_btree_cur *cur, 916 xfs_agblock_t *agbno, 917 xfs_extlen_t *aglen, 918 enum xfs_refc_adjust_op adj, 919 struct xfs_owner_info *oinfo) 920 { 921 struct xfs_refcount_irec ext, tmp; 922 int error; 923 int found_rec, found_tmp; 924 xfs_fsblock_t fsbno; 925 926 /* Merging did all the work already. */ 927 if (*aglen == 0) 928 return 0; 929 930 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 931 if (error) 932 goto out_error; 933 934 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 935 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 936 if (error) 937 goto out_error; 938 if (!found_rec) { 939 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 940 ext.rc_blockcount = 0; 941 ext.rc_refcount = 0; 942 } 943 944 /* 945 * Deal with a hole in the refcount tree; if a file maps to 946 * these blocks and there's no refcountbt record, pretend that 947 * there is one with refcount == 1. 948 */ 949 if (ext.rc_startblock != *agbno) { 950 tmp.rc_startblock = *agbno; 951 tmp.rc_blockcount = min(*aglen, 952 ext.rc_startblock - *agbno); 953 tmp.rc_refcount = 1 + adj; 954 trace_xfs_refcount_modify_extent(cur->bc_mp, 955 cur->bc_ag.agno, &tmp); 956 957 /* 958 * Either cover the hole (increment) or 959 * delete the range (decrement). 960 */ 961 if (tmp.rc_refcount) { 962 error = xfs_refcount_insert(cur, &tmp, 963 &found_tmp); 964 if (error) 965 goto out_error; 966 if (XFS_IS_CORRUPT(cur->bc_mp, 967 found_tmp != 1)) { 968 error = -EFSCORRUPTED; 969 goto out_error; 970 } 971 cur->bc_ag.refc.nr_ops++; 972 } else { 973 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 974 cur->bc_ag.agno, 975 tmp.rc_startblock); 976 xfs_bmap_add_free(cur->bc_tp, fsbno, 977 tmp.rc_blockcount, oinfo); 978 } 979 980 (*agbno) += tmp.rc_blockcount; 981 (*aglen) -= tmp.rc_blockcount; 982 983 error = xfs_refcount_lookup_ge(cur, *agbno, 984 &found_rec); 985 if (error) 986 goto out_error; 987 } 988 989 /* Stop if there's nothing left to modify */ 990 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 991 break; 992 993 /* 994 * Adjust the reference count and either update the tree 995 * (incr) or free the blocks (decr). 996 */ 997 if (ext.rc_refcount == MAXREFCOUNT) 998 goto skip; 999 ext.rc_refcount += adj; 1000 trace_xfs_refcount_modify_extent(cur->bc_mp, 1001 cur->bc_ag.agno, &ext); 1002 if (ext.rc_refcount > 1) { 1003 error = xfs_refcount_update(cur, &ext); 1004 if (error) 1005 goto out_error; 1006 cur->bc_ag.refc.nr_ops++; 1007 } else if (ext.rc_refcount == 1) { 1008 error = xfs_refcount_delete(cur, &found_rec); 1009 if (error) 1010 goto out_error; 1011 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1012 error = -EFSCORRUPTED; 1013 goto out_error; 1014 } 1015 cur->bc_ag.refc.nr_ops++; 1016 goto advloop; 1017 } else { 1018 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1019 cur->bc_ag.agno, 1020 ext.rc_startblock); 1021 xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, 1022 oinfo); 1023 } 1024 1025 skip: 1026 error = xfs_btree_increment(cur, 0, &found_rec); 1027 if (error) 1028 goto out_error; 1029 1030 advloop: 1031 (*agbno) += ext.rc_blockcount; 1032 (*aglen) -= ext.rc_blockcount; 1033 } 1034 1035 return error; 1036 out_error: 1037 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1038 cur->bc_ag.agno, error, _RET_IP_); 1039 return error; 1040 } 1041 1042 /* Adjust the reference count of a range of AG blocks. */ 1043 STATIC int 1044 xfs_refcount_adjust( 1045 struct xfs_btree_cur *cur, 1046 xfs_agblock_t agbno, 1047 xfs_extlen_t aglen, 1048 xfs_agblock_t *new_agbno, 1049 xfs_extlen_t *new_aglen, 1050 enum xfs_refc_adjust_op adj, 1051 struct xfs_owner_info *oinfo) 1052 { 1053 bool shape_changed; 1054 int shape_changes = 0; 1055 int error; 1056 1057 *new_agbno = agbno; 1058 *new_aglen = aglen; 1059 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1060 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.agno, 1061 agbno, aglen); 1062 else 1063 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.agno, 1064 agbno, aglen); 1065 1066 /* 1067 * Ensure that no rcextents cross the boundary of the adjustment range. 1068 */ 1069 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1070 if (error) 1071 goto out_error; 1072 if (shape_changed) 1073 shape_changes++; 1074 1075 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1076 if (error) 1077 goto out_error; 1078 if (shape_changed) 1079 shape_changes++; 1080 1081 /* 1082 * Try to merge with the left or right extents of the range. 1083 */ 1084 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1085 XFS_FIND_RCEXT_SHARED, &shape_changed); 1086 if (error) 1087 goto out_error; 1088 if (shape_changed) 1089 shape_changes++; 1090 if (shape_changes) 1091 cur->bc_ag.refc.shape_changes++; 1092 1093 /* Now that we've taken care of the ends, adjust the middle extents */ 1094 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1095 adj, oinfo); 1096 if (error) 1097 goto out_error; 1098 1099 return 0; 1100 1101 out_error: 1102 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.agno, 1103 error, _RET_IP_); 1104 return error; 1105 } 1106 1107 /* Clean up after calling xfs_refcount_finish_one. */ 1108 void 1109 xfs_refcount_finish_one_cleanup( 1110 struct xfs_trans *tp, 1111 struct xfs_btree_cur *rcur, 1112 int error) 1113 { 1114 struct xfs_buf *agbp; 1115 1116 if (rcur == NULL) 1117 return; 1118 agbp = rcur->bc_ag.agbp; 1119 xfs_btree_del_cursor(rcur, error); 1120 if (error) 1121 xfs_trans_brelse(tp, agbp); 1122 } 1123 1124 /* 1125 * Process one of the deferred refcount operations. We pass back the 1126 * btree cursor to maintain our lock on the btree between calls. 1127 * This saves time and eliminates a buffer deadlock between the 1128 * superblock and the AGF because we'll always grab them in the same 1129 * order. 1130 */ 1131 int 1132 xfs_refcount_finish_one( 1133 struct xfs_trans *tp, 1134 enum xfs_refcount_intent_type type, 1135 xfs_fsblock_t startblock, 1136 xfs_extlen_t blockcount, 1137 xfs_fsblock_t *new_fsb, 1138 xfs_extlen_t *new_len, 1139 struct xfs_btree_cur **pcur) 1140 { 1141 struct xfs_mount *mp = tp->t_mountp; 1142 struct xfs_btree_cur *rcur; 1143 struct xfs_buf *agbp = NULL; 1144 int error = 0; 1145 xfs_agnumber_t agno; 1146 xfs_agblock_t bno; 1147 xfs_agblock_t new_agbno; 1148 unsigned long nr_ops = 0; 1149 int shape_changes = 0; 1150 1151 agno = XFS_FSB_TO_AGNO(mp, startblock); 1152 ASSERT(agno != NULLAGNUMBER); 1153 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1154 1155 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1156 type, XFS_FSB_TO_AGBNO(mp, startblock), 1157 blockcount); 1158 1159 if (XFS_TEST_ERROR(false, mp, 1160 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1161 return -EIO; 1162 1163 /* 1164 * If we haven't gotten a cursor or the cursor AG doesn't match 1165 * the startblock, get one now. 1166 */ 1167 rcur = *pcur; 1168 if (rcur != NULL && rcur->bc_ag.agno != agno) { 1169 nr_ops = rcur->bc_ag.refc.nr_ops; 1170 shape_changes = rcur->bc_ag.refc.shape_changes; 1171 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1172 rcur = NULL; 1173 *pcur = NULL; 1174 } 1175 if (rcur == NULL) { 1176 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1177 XFS_ALLOC_FLAG_FREEING, &agbp); 1178 if (error) 1179 return error; 1180 1181 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); 1182 if (!rcur) { 1183 error = -ENOMEM; 1184 goto out_cur; 1185 } 1186 rcur->bc_ag.refc.nr_ops = nr_ops; 1187 rcur->bc_ag.refc.shape_changes = shape_changes; 1188 } 1189 *pcur = rcur; 1190 1191 switch (type) { 1192 case XFS_REFCOUNT_INCREASE: 1193 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1194 new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); 1195 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1196 break; 1197 case XFS_REFCOUNT_DECREASE: 1198 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1199 new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); 1200 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1201 break; 1202 case XFS_REFCOUNT_ALLOC_COW: 1203 *new_fsb = startblock + blockcount; 1204 *new_len = 0; 1205 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1206 break; 1207 case XFS_REFCOUNT_FREE_COW: 1208 *new_fsb = startblock + blockcount; 1209 *new_len = 0; 1210 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1211 break; 1212 default: 1213 ASSERT(0); 1214 error = -EFSCORRUPTED; 1215 } 1216 if (!error && *new_len > 0) 1217 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1218 bno, blockcount, new_agbno, *new_len); 1219 return error; 1220 1221 out_cur: 1222 xfs_trans_brelse(tp, agbp); 1223 1224 return error; 1225 } 1226 1227 /* 1228 * Record a refcount intent for later processing. 1229 */ 1230 static void 1231 __xfs_refcount_add( 1232 struct xfs_trans *tp, 1233 enum xfs_refcount_intent_type type, 1234 xfs_fsblock_t startblock, 1235 xfs_extlen_t blockcount) 1236 { 1237 struct xfs_refcount_intent *ri; 1238 1239 trace_xfs_refcount_defer(tp->t_mountp, 1240 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1241 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1242 blockcount); 1243 1244 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1245 KM_NOFS); 1246 INIT_LIST_HEAD(&ri->ri_list); 1247 ri->ri_type = type; 1248 ri->ri_startblock = startblock; 1249 ri->ri_blockcount = blockcount; 1250 1251 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1252 } 1253 1254 /* 1255 * Increase the reference count of the blocks backing a file's extent. 1256 */ 1257 void 1258 xfs_refcount_increase_extent( 1259 struct xfs_trans *tp, 1260 struct xfs_bmbt_irec *PREV) 1261 { 1262 if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) 1263 return; 1264 1265 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1266 PREV->br_blockcount); 1267 } 1268 1269 /* 1270 * Decrease the reference count of the blocks backing a file's extent. 1271 */ 1272 void 1273 xfs_refcount_decrease_extent( 1274 struct xfs_trans *tp, 1275 struct xfs_bmbt_irec *PREV) 1276 { 1277 if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) 1278 return; 1279 1280 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1281 PREV->br_blockcount); 1282 } 1283 1284 /* 1285 * Given an AG extent, find the lowest-numbered run of shared blocks 1286 * within that range and return the range in fbno/flen. If 1287 * find_end_of_shared is set, return the longest contiguous extent of 1288 * shared blocks; if not, just return the first extent we find. If no 1289 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1290 * and 0, respectively. 1291 */ 1292 int 1293 xfs_refcount_find_shared( 1294 struct xfs_btree_cur *cur, 1295 xfs_agblock_t agbno, 1296 xfs_extlen_t aglen, 1297 xfs_agblock_t *fbno, 1298 xfs_extlen_t *flen, 1299 bool find_end_of_shared) 1300 { 1301 struct xfs_refcount_irec tmp; 1302 int i; 1303 int have; 1304 int error; 1305 1306 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.agno, 1307 agbno, aglen); 1308 1309 /* By default, skip the whole range */ 1310 *fbno = NULLAGBLOCK; 1311 *flen = 0; 1312 1313 /* Try to find a refcount extent that crosses the start */ 1314 error = xfs_refcount_lookup_le(cur, agbno, &have); 1315 if (error) 1316 goto out_error; 1317 if (!have) { 1318 /* No left extent, look at the next one */ 1319 error = xfs_btree_increment(cur, 0, &have); 1320 if (error) 1321 goto out_error; 1322 if (!have) 1323 goto done; 1324 } 1325 error = xfs_refcount_get_rec(cur, &tmp, &i); 1326 if (error) 1327 goto out_error; 1328 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1329 error = -EFSCORRUPTED; 1330 goto out_error; 1331 } 1332 1333 /* If the extent ends before the start, look at the next one */ 1334 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1335 error = xfs_btree_increment(cur, 0, &have); 1336 if (error) 1337 goto out_error; 1338 if (!have) 1339 goto done; 1340 error = xfs_refcount_get_rec(cur, &tmp, &i); 1341 if (error) 1342 goto out_error; 1343 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1344 error = -EFSCORRUPTED; 1345 goto out_error; 1346 } 1347 } 1348 1349 /* If the extent starts after the range we want, bail out */ 1350 if (tmp.rc_startblock >= agbno + aglen) 1351 goto done; 1352 1353 /* We found the start of a shared extent! */ 1354 if (tmp.rc_startblock < agbno) { 1355 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1356 tmp.rc_startblock = agbno; 1357 } 1358 1359 *fbno = tmp.rc_startblock; 1360 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1361 if (!find_end_of_shared) 1362 goto done; 1363 1364 /* Otherwise, find the end of this shared extent */ 1365 while (*fbno + *flen < agbno + aglen) { 1366 error = xfs_btree_increment(cur, 0, &have); 1367 if (error) 1368 goto out_error; 1369 if (!have) 1370 break; 1371 error = xfs_refcount_get_rec(cur, &tmp, &i); 1372 if (error) 1373 goto out_error; 1374 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1375 error = -EFSCORRUPTED; 1376 goto out_error; 1377 } 1378 if (tmp.rc_startblock >= agbno + aglen || 1379 tmp.rc_startblock != *fbno + *flen) 1380 break; 1381 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1382 } 1383 1384 done: 1385 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1386 cur->bc_ag.agno, *fbno, *flen); 1387 1388 out_error: 1389 if (error) 1390 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1391 cur->bc_ag.agno, error, _RET_IP_); 1392 return error; 1393 } 1394 1395 /* 1396 * Recovering CoW Blocks After a Crash 1397 * 1398 * Due to the way that the copy on write mechanism works, there's a window of 1399 * opportunity in which we can lose track of allocated blocks during a crash. 1400 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1401 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1402 * no longer in the free space btree but are not otherwise recorded anywhere 1403 * until the write completes and the blocks are mapped into the file. A crash 1404 * in between allocation and remapping results in the replacement blocks being 1405 * lost. This situation is exacerbated by the CoW extent size hint because 1406 * allocations can hang around for long time. 1407 * 1408 * However, there is a place where we can record these allocations before they 1409 * become mappings -- the reference count btree. The btree does not record 1410 * extents with refcount == 1, so we can record allocations with a refcount of 1411 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1412 * no conflict with shared block records. These mappings should be created 1413 * when we allocate blocks to the CoW fork and deleted when they're removed 1414 * from the CoW fork. 1415 * 1416 * Minor nit: records for in-progress CoW allocations and records for shared 1417 * extents must never be merged, to preserve the property that (except for CoW 1418 * allocations) there are no refcount btree entries with refcount == 1. The 1419 * only time this could potentially happen is when unsharing a block that's 1420 * adjacent to CoW allocations, so we must be careful to avoid this. 1421 * 1422 * At mount time we recover lost CoW allocations by searching the refcount 1423 * btree for these refcount == 1 mappings. These represent CoW allocations 1424 * that were in progress at the time the filesystem went down, so we can free 1425 * them to get the space back. 1426 * 1427 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1428 * several reasons -- first, EFIs pin the tail of the log and would have to be 1429 * periodically relogged to avoid filling up the log. Second, CoW completions 1430 * will have to file an EFD and create new EFIs for whatever remains in the 1431 * CoW fork; this partially takes care of (1) but extent-size reservations 1432 * will have to periodically relog even if there's no writeout in progress. 1433 * This can happen if the CoW extent size hint is set, which you really want. 1434 * Third, EFIs cannot currently be automatically relogged into newer 1435 * transactions to advance the log tail. Fourth, stuffing the log full of 1436 * EFIs places an upper bound on the number of CoW allocations that can be 1437 * held filesystem-wide at any given time. Recording them in the refcount 1438 * btree doesn't require us to maintain any state in memory and doesn't pin 1439 * the log. 1440 */ 1441 /* 1442 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1443 * in that they're not referenced anywhere else in the filesystem, so we 1444 * stash them in the refcount btree with a refcount of 1 until either file 1445 * remapping (or CoW cancellation) happens. 1446 */ 1447 STATIC int 1448 xfs_refcount_adjust_cow_extents( 1449 struct xfs_btree_cur *cur, 1450 xfs_agblock_t agbno, 1451 xfs_extlen_t aglen, 1452 enum xfs_refc_adjust_op adj) 1453 { 1454 struct xfs_refcount_irec ext, tmp; 1455 int error; 1456 int found_rec, found_tmp; 1457 1458 if (aglen == 0) 1459 return 0; 1460 1461 /* Find any overlapping refcount records */ 1462 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1463 if (error) 1464 goto out_error; 1465 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1466 if (error) 1467 goto out_error; 1468 if (!found_rec) { 1469 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1470 XFS_REFC_COW_START; 1471 ext.rc_blockcount = 0; 1472 ext.rc_refcount = 0; 1473 } 1474 1475 switch (adj) { 1476 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1477 /* Adding a CoW reservation, there should be nothing here. */ 1478 if (XFS_IS_CORRUPT(cur->bc_mp, 1479 agbno + aglen > ext.rc_startblock)) { 1480 error = -EFSCORRUPTED; 1481 goto out_error; 1482 } 1483 1484 tmp.rc_startblock = agbno; 1485 tmp.rc_blockcount = aglen; 1486 tmp.rc_refcount = 1; 1487 trace_xfs_refcount_modify_extent(cur->bc_mp, 1488 cur->bc_ag.agno, &tmp); 1489 1490 error = xfs_refcount_insert(cur, &tmp, 1491 &found_tmp); 1492 if (error) 1493 goto out_error; 1494 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1495 error = -EFSCORRUPTED; 1496 goto out_error; 1497 } 1498 break; 1499 case XFS_REFCOUNT_ADJUST_COW_FREE: 1500 /* Removing a CoW reservation, there should be one extent. */ 1501 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1502 error = -EFSCORRUPTED; 1503 goto out_error; 1504 } 1505 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1506 error = -EFSCORRUPTED; 1507 goto out_error; 1508 } 1509 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1510 error = -EFSCORRUPTED; 1511 goto out_error; 1512 } 1513 1514 ext.rc_refcount = 0; 1515 trace_xfs_refcount_modify_extent(cur->bc_mp, 1516 cur->bc_ag.agno, &ext); 1517 error = xfs_refcount_delete(cur, &found_rec); 1518 if (error) 1519 goto out_error; 1520 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1521 error = -EFSCORRUPTED; 1522 goto out_error; 1523 } 1524 break; 1525 default: 1526 ASSERT(0); 1527 } 1528 1529 return error; 1530 out_error: 1531 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1532 cur->bc_ag.agno, error, _RET_IP_); 1533 return error; 1534 } 1535 1536 /* 1537 * Add or remove refcount btree entries for CoW reservations. 1538 */ 1539 STATIC int 1540 xfs_refcount_adjust_cow( 1541 struct xfs_btree_cur *cur, 1542 xfs_agblock_t agbno, 1543 xfs_extlen_t aglen, 1544 enum xfs_refc_adjust_op adj) 1545 { 1546 bool shape_changed; 1547 int error; 1548 1549 agbno += XFS_REFC_COW_START; 1550 1551 /* 1552 * Ensure that no rcextents cross the boundary of the adjustment range. 1553 */ 1554 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1555 if (error) 1556 goto out_error; 1557 1558 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1559 if (error) 1560 goto out_error; 1561 1562 /* 1563 * Try to merge with the left or right extents of the range. 1564 */ 1565 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1566 XFS_FIND_RCEXT_COW, &shape_changed); 1567 if (error) 1568 goto out_error; 1569 1570 /* Now that we've taken care of the ends, adjust the middle extents */ 1571 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1572 if (error) 1573 goto out_error; 1574 1575 return 0; 1576 1577 out_error: 1578 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.agno, 1579 error, _RET_IP_); 1580 return error; 1581 } 1582 1583 /* 1584 * Record a CoW allocation in the refcount btree. 1585 */ 1586 STATIC int 1587 __xfs_refcount_cow_alloc( 1588 struct xfs_btree_cur *rcur, 1589 xfs_agblock_t agbno, 1590 xfs_extlen_t aglen) 1591 { 1592 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.agno, 1593 agbno, aglen); 1594 1595 /* Add refcount btree reservation */ 1596 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1597 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1598 } 1599 1600 /* 1601 * Remove a CoW allocation from the refcount btree. 1602 */ 1603 STATIC int 1604 __xfs_refcount_cow_free( 1605 struct xfs_btree_cur *rcur, 1606 xfs_agblock_t agbno, 1607 xfs_extlen_t aglen) 1608 { 1609 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.agno, 1610 agbno, aglen); 1611 1612 /* Remove refcount btree reservation */ 1613 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1614 XFS_REFCOUNT_ADJUST_COW_FREE); 1615 } 1616 1617 /* Record a CoW staging extent in the refcount btree. */ 1618 void 1619 xfs_refcount_alloc_cow_extent( 1620 struct xfs_trans *tp, 1621 xfs_fsblock_t fsb, 1622 xfs_extlen_t len) 1623 { 1624 struct xfs_mount *mp = tp->t_mountp; 1625 1626 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1627 return; 1628 1629 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1630 1631 /* Add rmap entry */ 1632 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1633 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1634 } 1635 1636 /* Forget a CoW staging event in the refcount btree. */ 1637 void 1638 xfs_refcount_free_cow_extent( 1639 struct xfs_trans *tp, 1640 xfs_fsblock_t fsb, 1641 xfs_extlen_t len) 1642 { 1643 struct xfs_mount *mp = tp->t_mountp; 1644 1645 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1646 return; 1647 1648 /* Remove rmap entry */ 1649 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1650 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1651 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1652 } 1653 1654 struct xfs_refcount_recovery { 1655 struct list_head rr_list; 1656 struct xfs_refcount_irec rr_rrec; 1657 }; 1658 1659 /* Stuff an extent on the recovery list. */ 1660 STATIC int 1661 xfs_refcount_recover_extent( 1662 struct xfs_btree_cur *cur, 1663 union xfs_btree_rec *rec, 1664 void *priv) 1665 { 1666 struct list_head *debris = priv; 1667 struct xfs_refcount_recovery *rr; 1668 1669 if (XFS_IS_CORRUPT(cur->bc_mp, 1670 be32_to_cpu(rec->refc.rc_refcount) != 1)) 1671 return -EFSCORRUPTED; 1672 1673 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); 1674 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1675 list_add_tail(&rr->rr_list, debris); 1676 1677 return 0; 1678 } 1679 1680 /* Find and remove leftover CoW reservations. */ 1681 int 1682 xfs_refcount_recover_cow_leftovers( 1683 struct xfs_mount *mp, 1684 xfs_agnumber_t agno) 1685 { 1686 struct xfs_trans *tp; 1687 struct xfs_btree_cur *cur; 1688 struct xfs_buf *agbp; 1689 struct xfs_refcount_recovery *rr, *n; 1690 struct list_head debris; 1691 union xfs_btree_irec low; 1692 union xfs_btree_irec high; 1693 xfs_fsblock_t fsb; 1694 xfs_agblock_t agbno; 1695 int error; 1696 1697 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1698 return -EOPNOTSUPP; 1699 1700 INIT_LIST_HEAD(&debris); 1701 1702 /* 1703 * In this first part, we use an empty transaction to gather up 1704 * all the leftover CoW extents so that we can subsequently 1705 * delete them. The empty transaction is used to avoid 1706 * a buffer lock deadlock if there happens to be a loop in the 1707 * refcountbt because we're allowed to re-grab a buffer that is 1708 * already attached to our transaction. When we're done 1709 * recording the CoW debris we cancel the (empty) transaction 1710 * and everything goes away cleanly. 1711 */ 1712 error = xfs_trans_alloc_empty(mp, &tp); 1713 if (error) 1714 return error; 1715 1716 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1717 if (error) 1718 goto out_trans; 1719 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); 1720 1721 /* Find all the leftover CoW staging extents. */ 1722 memset(&low, 0, sizeof(low)); 1723 memset(&high, 0, sizeof(high)); 1724 low.rc.rc_startblock = XFS_REFC_COW_START; 1725 high.rc.rc_startblock = -1U; 1726 error = xfs_btree_query_range(cur, &low, &high, 1727 xfs_refcount_recover_extent, &debris); 1728 xfs_btree_del_cursor(cur, error); 1729 xfs_trans_brelse(tp, agbp); 1730 xfs_trans_cancel(tp); 1731 if (error) 1732 goto out_free; 1733 1734 /* Now iterate the list to free the leftovers */ 1735 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1736 /* Set up transaction. */ 1737 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1738 if (error) 1739 goto out_free; 1740 1741 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1742 1743 /* Free the orphan record */ 1744 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1745 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1746 xfs_refcount_free_cow_extent(tp, fsb, 1747 rr->rr_rrec.rc_blockcount); 1748 1749 /* Free the block. */ 1750 xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1751 1752 error = xfs_trans_commit(tp); 1753 if (error) 1754 goto out_free; 1755 1756 list_del(&rr->rr_list); 1757 kmem_free(rr); 1758 } 1759 1760 return error; 1761 out_trans: 1762 xfs_trans_cancel(tp); 1763 out_free: 1764 /* Free the leftover list */ 1765 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1766 list_del(&rr->rr_list); 1767 kmem_free(rr); 1768 } 1769 return error; 1770 } 1771 1772 /* Is there a record covering a given extent? */ 1773 int 1774 xfs_refcount_has_record( 1775 struct xfs_btree_cur *cur, 1776 xfs_agblock_t bno, 1777 xfs_extlen_t len, 1778 bool *exists) 1779 { 1780 union xfs_btree_irec low; 1781 union xfs_btree_irec high; 1782 1783 memset(&low, 0, sizeof(low)); 1784 low.rc.rc_startblock = bno; 1785 memset(&high, 0xFF, sizeof(high)); 1786 high.rc.rc_startblock = bno + len - 1; 1787 1788 return xfs_btree_has_record(cur, &low, &high, exists); 1789 } 1790