1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_refcount_btree.h" 18 #include "xfs_alloc.h" 19 #include "xfs_errortag.h" 20 #include "xfs_error.h" 21 #include "xfs_trace.h" 22 #include "xfs_cksum.h" 23 #include "xfs_trans.h" 24 #include "xfs_bit.h" 25 #include "xfs_refcount.h" 26 #include "xfs_rmap.h" 27 28 /* Allowable refcount adjustment amounts. */ 29 enum xfs_refc_adjust_op { 30 XFS_REFCOUNT_ADJUST_INCREASE = 1, 31 XFS_REFCOUNT_ADJUST_DECREASE = -1, 32 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 33 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 34 }; 35 36 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 37 xfs_agblock_t agbno, xfs_extlen_t aglen); 38 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 39 xfs_agblock_t agbno, xfs_extlen_t aglen); 40 41 /* 42 * Look up the first record less than or equal to [bno, len] in the btree 43 * given by cur. 44 */ 45 int 46 xfs_refcount_lookup_le( 47 struct xfs_btree_cur *cur, 48 xfs_agblock_t bno, 49 int *stat) 50 { 51 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 52 XFS_LOOKUP_LE); 53 cur->bc_rec.rc.rc_startblock = bno; 54 cur->bc_rec.rc.rc_blockcount = 0; 55 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 56 } 57 58 /* 59 * Look up the first record greater than or equal to [bno, len] in the btree 60 * given by cur. 61 */ 62 int 63 xfs_refcount_lookup_ge( 64 struct xfs_btree_cur *cur, 65 xfs_agblock_t bno, 66 int *stat) 67 { 68 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 69 XFS_LOOKUP_GE); 70 cur->bc_rec.rc.rc_startblock = bno; 71 cur->bc_rec.rc.rc_blockcount = 0; 72 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 73 } 74 75 /* 76 * Look up the first record equal to [bno, len] in the btree 77 * given by cur. 78 */ 79 int 80 xfs_refcount_lookup_eq( 81 struct xfs_btree_cur *cur, 82 xfs_agblock_t bno, 83 int *stat) 84 { 85 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 86 XFS_LOOKUP_LE); 87 cur->bc_rec.rc.rc_startblock = bno; 88 cur->bc_rec.rc.rc_blockcount = 0; 89 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 90 } 91 92 /* Convert on-disk record to in-core format. */ 93 void 94 xfs_refcount_btrec_to_irec( 95 union xfs_btree_rec *rec, 96 struct xfs_refcount_irec *irec) 97 { 98 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 99 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 100 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 101 } 102 103 /* 104 * Get the data from the pointed-to record. 105 */ 106 int 107 xfs_refcount_get_rec( 108 struct xfs_btree_cur *cur, 109 struct xfs_refcount_irec *irec, 110 int *stat) 111 { 112 struct xfs_mount *mp = cur->bc_mp; 113 xfs_agnumber_t agno = cur->bc_private.a.agno; 114 union xfs_btree_rec *rec; 115 int error; 116 xfs_agblock_t realstart; 117 118 error = xfs_btree_get_rec(cur, &rec, stat); 119 if (error || !*stat) 120 return error; 121 122 xfs_refcount_btrec_to_irec(rec, irec); 123 124 agno = cur->bc_private.a.agno; 125 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 126 goto out_bad_rec; 127 128 /* handle special COW-staging state */ 129 realstart = irec->rc_startblock; 130 if (realstart & XFS_REFC_COW_START) { 131 if (irec->rc_refcount != 1) 132 goto out_bad_rec; 133 realstart &= ~XFS_REFC_COW_START; 134 } else if (irec->rc_refcount < 2) { 135 goto out_bad_rec; 136 } 137 138 /* check for valid extent range, including overflow */ 139 if (!xfs_verify_agbno(mp, agno, realstart)) 140 goto out_bad_rec; 141 if (realstart > realstart + irec->rc_blockcount) 142 goto out_bad_rec; 143 if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) 144 goto out_bad_rec; 145 146 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 147 goto out_bad_rec; 148 149 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec); 150 return 0; 151 152 out_bad_rec: 153 xfs_warn(mp, 154 "Refcount BTree record corruption in AG %d detected!", agno); 155 xfs_warn(mp, 156 "Start block 0x%x, block count 0x%x, references 0x%x", 157 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 158 return -EFSCORRUPTED; 159 } 160 161 /* 162 * Update the record referred to by cur to the value given 163 * by [bno, len, refcount]. 164 * This either works (return 0) or gets an EFSCORRUPTED error. 165 */ 166 STATIC int 167 xfs_refcount_update( 168 struct xfs_btree_cur *cur, 169 struct xfs_refcount_irec *irec) 170 { 171 union xfs_btree_rec rec; 172 int error; 173 174 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); 175 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 176 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 177 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 178 error = xfs_btree_update(cur, &rec); 179 if (error) 180 trace_xfs_refcount_update_error(cur->bc_mp, 181 cur->bc_private.a.agno, error, _RET_IP_); 182 return error; 183 } 184 185 /* 186 * Insert the record referred to by cur to the value given 187 * by [bno, len, refcount]. 188 * This either works (return 0) or gets an EFSCORRUPTED error. 189 */ 190 int 191 xfs_refcount_insert( 192 struct xfs_btree_cur *cur, 193 struct xfs_refcount_irec *irec, 194 int *i) 195 { 196 int error; 197 198 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); 199 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 200 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 201 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 202 error = xfs_btree_insert(cur, i); 203 if (error) 204 goto out_error; 205 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 206 207 out_error: 208 if (error) 209 trace_xfs_refcount_insert_error(cur->bc_mp, 210 cur->bc_private.a.agno, error, _RET_IP_); 211 return error; 212 } 213 214 /* 215 * Remove the record referred to by cur, then set the pointer to the spot 216 * where the record could be re-inserted, in case we want to increment or 217 * decrement the cursor. 218 * This either works (return 0) or gets an EFSCORRUPTED error. 219 */ 220 STATIC int 221 xfs_refcount_delete( 222 struct xfs_btree_cur *cur, 223 int *i) 224 { 225 struct xfs_refcount_irec irec; 226 int found_rec; 227 int error; 228 229 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 230 if (error) 231 goto out_error; 232 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 233 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); 234 error = xfs_btree_delete(cur, i); 235 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 236 if (error) 237 goto out_error; 238 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 239 out_error: 240 if (error) 241 trace_xfs_refcount_delete_error(cur->bc_mp, 242 cur->bc_private.a.agno, error, _RET_IP_); 243 return error; 244 } 245 246 /* 247 * Adjusting the Reference Count 248 * 249 * As stated elsewhere, the reference count btree (refcbt) stores 250 * >1 reference counts for extents of physical blocks. In this 251 * operation, we're either raising or lowering the reference count of 252 * some subrange stored in the tree: 253 * 254 * <------ adjustment range ------> 255 * ----+ +---+-----+ +--+--------+--------- 256 * 2 | | 3 | 4 | |17| 55 | 10 257 * ----+ +---+-----+ +--+--------+--------- 258 * X axis is physical blocks number; 259 * reference counts are the numbers inside the rectangles 260 * 261 * The first thing we need to do is to ensure that there are no 262 * refcount extents crossing either boundary of the range to be 263 * adjusted. For any extent that does cross a boundary, split it into 264 * two extents so that we can increment the refcount of one of the 265 * pieces later: 266 * 267 * <------ adjustment range ------> 268 * ----+ +---+-----+ +--+--------+----+---- 269 * 2 | | 3 | 2 | |17| 55 | 10 | 10 270 * ----+ +---+-----+ +--+--------+----+---- 271 * 272 * For this next step, let's assume that all the physical blocks in 273 * the adjustment range are mapped to a file and are therefore in use 274 * at least once. Therefore, we can infer that any gap in the 275 * refcount tree within the adjustment range represents a physical 276 * extent with refcount == 1: 277 * 278 * <------ adjustment range ------> 279 * ----+---+---+-----+-+--+--------+----+---- 280 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 281 * ----+---+---+-----+-+--+--------+----+---- 282 * ^ 283 * 284 * For each extent that falls within the interval range, figure out 285 * which extent is to the left or the right of that extent. Now we 286 * have a left, current, and right extent. If the new reference count 287 * of the center extent enables us to merge left, center, and right 288 * into one record covering all three, do so. If the center extent is 289 * at the left end of the range, abuts the left extent, and its new 290 * reference count matches the left extent's record, then merge them. 291 * If the center extent is at the right end of the range, abuts the 292 * right extent, and the reference counts match, merge those. In the 293 * example, we can left merge (assuming an increment operation): 294 * 295 * <------ adjustment range ------> 296 * --------+---+-----+-+--+--------+----+---- 297 * 2 | 3 | 2 |1|17| 55 | 10 | 10 298 * --------+---+-----+-+--+--------+----+---- 299 * ^ 300 * 301 * For all other extents within the range, adjust the reference count 302 * or delete it if the refcount falls below 2. If we were 303 * incrementing, the end result looks like this: 304 * 305 * <------ adjustment range ------> 306 * --------+---+-----+-+--+--------+----+---- 307 * 2 | 4 | 3 |2|18| 56 | 11 | 10 308 * --------+---+-----+-+--+--------+----+---- 309 * 310 * The result of a decrement operation looks as such: 311 * 312 * <------ adjustment range ------> 313 * ----+ +---+ +--+--------+----+---- 314 * 2 | | 2 | |16| 54 | 9 | 10 315 * ----+ +---+ +--+--------+----+---- 316 * DDDD 111111DD 317 * 318 * The blocks marked "D" are freed; the blocks marked "1" are only 319 * referenced once and therefore the record is removed from the 320 * refcount btree. 321 */ 322 323 /* Next block after this extent. */ 324 static inline xfs_agblock_t 325 xfs_refc_next( 326 struct xfs_refcount_irec *rc) 327 { 328 return rc->rc_startblock + rc->rc_blockcount; 329 } 330 331 /* 332 * Split a refcount extent that crosses agbno. 333 */ 334 STATIC int 335 xfs_refcount_split_extent( 336 struct xfs_btree_cur *cur, 337 xfs_agblock_t agbno, 338 bool *shape_changed) 339 { 340 struct xfs_refcount_irec rcext, tmp; 341 int found_rec; 342 int error; 343 344 *shape_changed = false; 345 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 346 if (error) 347 goto out_error; 348 if (!found_rec) 349 return 0; 350 351 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 352 if (error) 353 goto out_error; 354 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 355 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 356 return 0; 357 358 *shape_changed = true; 359 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, 360 &rcext, agbno); 361 362 /* Establish the right extent. */ 363 tmp = rcext; 364 tmp.rc_startblock = agbno; 365 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 366 error = xfs_refcount_update(cur, &tmp); 367 if (error) 368 goto out_error; 369 370 /* Insert the left extent. */ 371 tmp = rcext; 372 tmp.rc_blockcount = agbno - rcext.rc_startblock; 373 error = xfs_refcount_insert(cur, &tmp, &found_rec); 374 if (error) 375 goto out_error; 376 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 377 return error; 378 379 out_error: 380 trace_xfs_refcount_split_extent_error(cur->bc_mp, 381 cur->bc_private.a.agno, error, _RET_IP_); 382 return error; 383 } 384 385 /* 386 * Merge the left, center, and right extents. 387 */ 388 STATIC int 389 xfs_refcount_merge_center_extents( 390 struct xfs_btree_cur *cur, 391 struct xfs_refcount_irec *left, 392 struct xfs_refcount_irec *center, 393 struct xfs_refcount_irec *right, 394 unsigned long long extlen, 395 xfs_extlen_t *aglen) 396 { 397 int error; 398 int found_rec; 399 400 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 401 cur->bc_private.a.agno, left, center, right); 402 403 /* 404 * Make sure the center and right extents are not in the btree. 405 * If the center extent was synthesized, the first delete call 406 * removes the right extent and we skip the second deletion. 407 * If center and right were in the btree, then the first delete 408 * call removes the center and the second one removes the right 409 * extent. 410 */ 411 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 412 &found_rec); 413 if (error) 414 goto out_error; 415 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 416 417 error = xfs_refcount_delete(cur, &found_rec); 418 if (error) 419 goto out_error; 420 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 421 422 if (center->rc_refcount > 1) { 423 error = xfs_refcount_delete(cur, &found_rec); 424 if (error) 425 goto out_error; 426 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 427 out_error); 428 } 429 430 /* Enlarge the left extent. */ 431 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 432 &found_rec); 433 if (error) 434 goto out_error; 435 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 436 437 left->rc_blockcount = extlen; 438 error = xfs_refcount_update(cur, left); 439 if (error) 440 goto out_error; 441 442 *aglen = 0; 443 return error; 444 445 out_error: 446 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 447 cur->bc_private.a.agno, error, _RET_IP_); 448 return error; 449 } 450 451 /* 452 * Merge with the left extent. 453 */ 454 STATIC int 455 xfs_refcount_merge_left_extent( 456 struct xfs_btree_cur *cur, 457 struct xfs_refcount_irec *left, 458 struct xfs_refcount_irec *cleft, 459 xfs_agblock_t *agbno, 460 xfs_extlen_t *aglen) 461 { 462 int error; 463 int found_rec; 464 465 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 466 cur->bc_private.a.agno, left, cleft); 467 468 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 469 if (cleft->rc_refcount > 1) { 470 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 471 &found_rec); 472 if (error) 473 goto out_error; 474 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 475 out_error); 476 477 error = xfs_refcount_delete(cur, &found_rec); 478 if (error) 479 goto out_error; 480 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 481 out_error); 482 } 483 484 /* Enlarge the left extent. */ 485 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 486 &found_rec); 487 if (error) 488 goto out_error; 489 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 490 491 left->rc_blockcount += cleft->rc_blockcount; 492 error = xfs_refcount_update(cur, left); 493 if (error) 494 goto out_error; 495 496 *agbno += cleft->rc_blockcount; 497 *aglen -= cleft->rc_blockcount; 498 return error; 499 500 out_error: 501 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 502 cur->bc_private.a.agno, error, _RET_IP_); 503 return error; 504 } 505 506 /* 507 * Merge with the right extent. 508 */ 509 STATIC int 510 xfs_refcount_merge_right_extent( 511 struct xfs_btree_cur *cur, 512 struct xfs_refcount_irec *right, 513 struct xfs_refcount_irec *cright, 514 xfs_extlen_t *aglen) 515 { 516 int error; 517 int found_rec; 518 519 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 520 cur->bc_private.a.agno, cright, right); 521 522 /* 523 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 524 * remove it. 525 */ 526 if (cright->rc_refcount > 1) { 527 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 528 &found_rec); 529 if (error) 530 goto out_error; 531 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 532 out_error); 533 534 error = xfs_refcount_delete(cur, &found_rec); 535 if (error) 536 goto out_error; 537 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 538 out_error); 539 } 540 541 /* Enlarge the right extent. */ 542 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 543 &found_rec); 544 if (error) 545 goto out_error; 546 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 547 548 right->rc_startblock -= cright->rc_blockcount; 549 right->rc_blockcount += cright->rc_blockcount; 550 error = xfs_refcount_update(cur, right); 551 if (error) 552 goto out_error; 553 554 *aglen -= cright->rc_blockcount; 555 return error; 556 557 out_error: 558 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 559 cur->bc_private.a.agno, error, _RET_IP_); 560 return error; 561 } 562 563 #define XFS_FIND_RCEXT_SHARED 1 564 #define XFS_FIND_RCEXT_COW 2 565 /* 566 * Find the left extent and the one after it (cleft). This function assumes 567 * that we've already split any extent crossing agbno. 568 */ 569 STATIC int 570 xfs_refcount_find_left_extents( 571 struct xfs_btree_cur *cur, 572 struct xfs_refcount_irec *left, 573 struct xfs_refcount_irec *cleft, 574 xfs_agblock_t agbno, 575 xfs_extlen_t aglen, 576 int flags) 577 { 578 struct xfs_refcount_irec tmp; 579 int error; 580 int found_rec; 581 582 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 583 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 584 if (error) 585 goto out_error; 586 if (!found_rec) 587 return 0; 588 589 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 590 if (error) 591 goto out_error; 592 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 593 594 if (xfs_refc_next(&tmp) != agbno) 595 return 0; 596 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 597 return 0; 598 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 599 return 0; 600 /* We have a left extent; retrieve (or invent) the next right one */ 601 *left = tmp; 602 603 error = xfs_btree_increment(cur, 0, &found_rec); 604 if (error) 605 goto out_error; 606 if (found_rec) { 607 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 608 if (error) 609 goto out_error; 610 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 611 out_error); 612 613 /* if tmp starts at the end of our range, just use that */ 614 if (tmp.rc_startblock == agbno) 615 *cleft = tmp; 616 else { 617 /* 618 * There's a gap in the refcntbt at the start of the 619 * range we're interested in (refcount == 1) so 620 * synthesize the implied extent and pass it back. 621 * We assume here that the agbno/aglen range was 622 * passed in from a data fork extent mapping and 623 * therefore is allocated to exactly one owner. 624 */ 625 cleft->rc_startblock = agbno; 626 cleft->rc_blockcount = min(aglen, 627 tmp.rc_startblock - agbno); 628 cleft->rc_refcount = 1; 629 } 630 } else { 631 /* 632 * No extents, so pretend that there's one covering the whole 633 * range. 634 */ 635 cleft->rc_startblock = agbno; 636 cleft->rc_blockcount = aglen; 637 cleft->rc_refcount = 1; 638 } 639 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, 640 left, cleft, agbno); 641 return error; 642 643 out_error: 644 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 645 cur->bc_private.a.agno, error, _RET_IP_); 646 return error; 647 } 648 649 /* 650 * Find the right extent and the one before it (cright). This function 651 * assumes that we've already split any extents crossing agbno + aglen. 652 */ 653 STATIC int 654 xfs_refcount_find_right_extents( 655 struct xfs_btree_cur *cur, 656 struct xfs_refcount_irec *right, 657 struct xfs_refcount_irec *cright, 658 xfs_agblock_t agbno, 659 xfs_extlen_t aglen, 660 int flags) 661 { 662 struct xfs_refcount_irec tmp; 663 int error; 664 int found_rec; 665 666 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 667 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 668 if (error) 669 goto out_error; 670 if (!found_rec) 671 return 0; 672 673 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 674 if (error) 675 goto out_error; 676 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 677 678 if (tmp.rc_startblock != agbno + aglen) 679 return 0; 680 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 681 return 0; 682 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 683 return 0; 684 /* We have a right extent; retrieve (or invent) the next left one */ 685 *right = tmp; 686 687 error = xfs_btree_decrement(cur, 0, &found_rec); 688 if (error) 689 goto out_error; 690 if (found_rec) { 691 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 692 if (error) 693 goto out_error; 694 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 695 out_error); 696 697 /* if tmp ends at the end of our range, just use that */ 698 if (xfs_refc_next(&tmp) == agbno + aglen) 699 *cright = tmp; 700 else { 701 /* 702 * There's a gap in the refcntbt at the end of the 703 * range we're interested in (refcount == 1) so 704 * create the implied extent and pass it back. 705 * We assume here that the agbno/aglen range was 706 * passed in from a data fork extent mapping and 707 * therefore is allocated to exactly one owner. 708 */ 709 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 710 cright->rc_blockcount = right->rc_startblock - 711 cright->rc_startblock; 712 cright->rc_refcount = 1; 713 } 714 } else { 715 /* 716 * No extents, so pretend that there's one covering the whole 717 * range. 718 */ 719 cright->rc_startblock = agbno; 720 cright->rc_blockcount = aglen; 721 cright->rc_refcount = 1; 722 } 723 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, 724 cright, right, agbno + aglen); 725 return error; 726 727 out_error: 728 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 729 cur->bc_private.a.agno, error, _RET_IP_); 730 return error; 731 } 732 733 /* Is this extent valid? */ 734 static inline bool 735 xfs_refc_valid( 736 struct xfs_refcount_irec *rc) 737 { 738 return rc->rc_startblock != NULLAGBLOCK; 739 } 740 741 /* 742 * Try to merge with any extents on the boundaries of the adjustment range. 743 */ 744 STATIC int 745 xfs_refcount_merge_extents( 746 struct xfs_btree_cur *cur, 747 xfs_agblock_t *agbno, 748 xfs_extlen_t *aglen, 749 enum xfs_refc_adjust_op adjust, 750 int flags, 751 bool *shape_changed) 752 { 753 struct xfs_refcount_irec left = {0}, cleft = {0}; 754 struct xfs_refcount_irec cright = {0}, right = {0}; 755 int error; 756 unsigned long long ulen; 757 bool cequal; 758 759 *shape_changed = false; 760 /* 761 * Find the extent just below agbno [left], just above agbno [cleft], 762 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 763 * [right]. 764 */ 765 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 766 *aglen, flags); 767 if (error) 768 return error; 769 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 770 *aglen, flags); 771 if (error) 772 return error; 773 774 /* No left or right extent to merge; exit. */ 775 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 776 return 0; 777 778 cequal = (cleft.rc_startblock == cright.rc_startblock) && 779 (cleft.rc_blockcount == cright.rc_blockcount); 780 781 /* Try to merge left, cleft, and right. cleft must == cright. */ 782 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 783 right.rc_blockcount; 784 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 785 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 786 left.rc_refcount == cleft.rc_refcount + adjust && 787 right.rc_refcount == cleft.rc_refcount + adjust && 788 ulen < MAXREFCEXTLEN) { 789 *shape_changed = true; 790 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 791 &right, ulen, aglen); 792 } 793 794 /* Try to merge left and cleft. */ 795 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 796 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 797 left.rc_refcount == cleft.rc_refcount + adjust && 798 ulen < MAXREFCEXTLEN) { 799 *shape_changed = true; 800 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 801 agbno, aglen); 802 if (error) 803 return error; 804 805 /* 806 * If we just merged left + cleft and cleft == cright, 807 * we no longer have a cright to merge with right. We're done. 808 */ 809 if (cequal) 810 return 0; 811 } 812 813 /* Try to merge cright and right. */ 814 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 815 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 816 right.rc_refcount == cright.rc_refcount + adjust && 817 ulen < MAXREFCEXTLEN) { 818 *shape_changed = true; 819 return xfs_refcount_merge_right_extent(cur, &right, &cright, 820 aglen); 821 } 822 823 return error; 824 } 825 826 /* 827 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 828 * true incorrectly is a shutdown FS; the penalty for guessing false 829 * incorrectly is more transaction rolls than might be necessary. 830 * Be conservative here. 831 */ 832 static bool 833 xfs_refcount_still_have_space( 834 struct xfs_btree_cur *cur) 835 { 836 unsigned long overhead; 837 838 overhead = cur->bc_private.a.priv.refc.shape_changes * 839 xfs_allocfree_log_count(cur->bc_mp, 1); 840 overhead *= cur->bc_mp->m_sb.sb_blocksize; 841 842 /* 843 * Only allow 2 refcount extent updates per transaction if the 844 * refcount continue update "error" has been injected. 845 */ 846 if (cur->bc_private.a.priv.refc.nr_ops > 2 && 847 XFS_TEST_ERROR(false, cur->bc_mp, 848 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 849 return false; 850 851 if (cur->bc_private.a.priv.refc.nr_ops == 0) 852 return true; 853 else if (overhead > cur->bc_tp->t_log_res) 854 return false; 855 return cur->bc_tp->t_log_res - overhead > 856 cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 857 } 858 859 /* 860 * Adjust the refcounts of middle extents. At this point we should have 861 * split extents that crossed the adjustment range; merged with adjacent 862 * extents; and updated agbno/aglen to reflect the merges. Therefore, 863 * all we have to do is update the extents inside [agbno, agbno + aglen]. 864 */ 865 STATIC int 866 xfs_refcount_adjust_extents( 867 struct xfs_btree_cur *cur, 868 xfs_agblock_t *agbno, 869 xfs_extlen_t *aglen, 870 enum xfs_refc_adjust_op adj, 871 struct xfs_owner_info *oinfo) 872 { 873 struct xfs_refcount_irec ext, tmp; 874 int error; 875 int found_rec, found_tmp; 876 xfs_fsblock_t fsbno; 877 878 /* Merging did all the work already. */ 879 if (*aglen == 0) 880 return 0; 881 882 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 883 if (error) 884 goto out_error; 885 886 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 887 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 888 if (error) 889 goto out_error; 890 if (!found_rec) { 891 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 892 ext.rc_blockcount = 0; 893 ext.rc_refcount = 0; 894 } 895 896 /* 897 * Deal with a hole in the refcount tree; if a file maps to 898 * these blocks and there's no refcountbt record, pretend that 899 * there is one with refcount == 1. 900 */ 901 if (ext.rc_startblock != *agbno) { 902 tmp.rc_startblock = *agbno; 903 tmp.rc_blockcount = min(*aglen, 904 ext.rc_startblock - *agbno); 905 tmp.rc_refcount = 1 + adj; 906 trace_xfs_refcount_modify_extent(cur->bc_mp, 907 cur->bc_private.a.agno, &tmp); 908 909 /* 910 * Either cover the hole (increment) or 911 * delete the range (decrement). 912 */ 913 if (tmp.rc_refcount) { 914 error = xfs_refcount_insert(cur, &tmp, 915 &found_tmp); 916 if (error) 917 goto out_error; 918 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 919 found_tmp == 1, out_error); 920 cur->bc_private.a.priv.refc.nr_ops++; 921 } else { 922 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 923 cur->bc_private.a.agno, 924 tmp.rc_startblock); 925 xfs_bmap_add_free(cur->bc_tp, fsbno, 926 tmp.rc_blockcount, oinfo); 927 } 928 929 (*agbno) += tmp.rc_blockcount; 930 (*aglen) -= tmp.rc_blockcount; 931 932 error = xfs_refcount_lookup_ge(cur, *agbno, 933 &found_rec); 934 if (error) 935 goto out_error; 936 } 937 938 /* Stop if there's nothing left to modify */ 939 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 940 break; 941 942 /* 943 * Adjust the reference count and either update the tree 944 * (incr) or free the blocks (decr). 945 */ 946 if (ext.rc_refcount == MAXREFCOUNT) 947 goto skip; 948 ext.rc_refcount += adj; 949 trace_xfs_refcount_modify_extent(cur->bc_mp, 950 cur->bc_private.a.agno, &ext); 951 if (ext.rc_refcount > 1) { 952 error = xfs_refcount_update(cur, &ext); 953 if (error) 954 goto out_error; 955 cur->bc_private.a.priv.refc.nr_ops++; 956 } else if (ext.rc_refcount == 1) { 957 error = xfs_refcount_delete(cur, &found_rec); 958 if (error) 959 goto out_error; 960 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 961 found_rec == 1, out_error); 962 cur->bc_private.a.priv.refc.nr_ops++; 963 goto advloop; 964 } else { 965 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 966 cur->bc_private.a.agno, 967 ext.rc_startblock); 968 xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, 969 oinfo); 970 } 971 972 skip: 973 error = xfs_btree_increment(cur, 0, &found_rec); 974 if (error) 975 goto out_error; 976 977 advloop: 978 (*agbno) += ext.rc_blockcount; 979 (*aglen) -= ext.rc_blockcount; 980 } 981 982 return error; 983 out_error: 984 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 985 cur->bc_private.a.agno, error, _RET_IP_); 986 return error; 987 } 988 989 /* Adjust the reference count of a range of AG blocks. */ 990 STATIC int 991 xfs_refcount_adjust( 992 struct xfs_btree_cur *cur, 993 xfs_agblock_t agbno, 994 xfs_extlen_t aglen, 995 xfs_agblock_t *new_agbno, 996 xfs_extlen_t *new_aglen, 997 enum xfs_refc_adjust_op adj, 998 struct xfs_owner_info *oinfo) 999 { 1000 bool shape_changed; 1001 int shape_changes = 0; 1002 int error; 1003 1004 *new_agbno = agbno; 1005 *new_aglen = aglen; 1006 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1007 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, 1008 agbno, aglen); 1009 else 1010 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, 1011 agbno, aglen); 1012 1013 /* 1014 * Ensure that no rcextents cross the boundary of the adjustment range. 1015 */ 1016 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1017 if (error) 1018 goto out_error; 1019 if (shape_changed) 1020 shape_changes++; 1021 1022 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1023 if (error) 1024 goto out_error; 1025 if (shape_changed) 1026 shape_changes++; 1027 1028 /* 1029 * Try to merge with the left or right extents of the range. 1030 */ 1031 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1032 XFS_FIND_RCEXT_SHARED, &shape_changed); 1033 if (error) 1034 goto out_error; 1035 if (shape_changed) 1036 shape_changes++; 1037 if (shape_changes) 1038 cur->bc_private.a.priv.refc.shape_changes++; 1039 1040 /* Now that we've taken care of the ends, adjust the middle extents */ 1041 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1042 adj, oinfo); 1043 if (error) 1044 goto out_error; 1045 1046 return 0; 1047 1048 out_error: 1049 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, 1050 error, _RET_IP_); 1051 return error; 1052 } 1053 1054 /* Clean up after calling xfs_refcount_finish_one. */ 1055 void 1056 xfs_refcount_finish_one_cleanup( 1057 struct xfs_trans *tp, 1058 struct xfs_btree_cur *rcur, 1059 int error) 1060 { 1061 struct xfs_buf *agbp; 1062 1063 if (rcur == NULL) 1064 return; 1065 agbp = rcur->bc_private.a.agbp; 1066 xfs_btree_del_cursor(rcur, error); 1067 if (error) 1068 xfs_trans_brelse(tp, agbp); 1069 } 1070 1071 /* 1072 * Process one of the deferred refcount operations. We pass back the 1073 * btree cursor to maintain our lock on the btree between calls. 1074 * This saves time and eliminates a buffer deadlock between the 1075 * superblock and the AGF because we'll always grab them in the same 1076 * order. 1077 */ 1078 int 1079 xfs_refcount_finish_one( 1080 struct xfs_trans *tp, 1081 enum xfs_refcount_intent_type type, 1082 xfs_fsblock_t startblock, 1083 xfs_extlen_t blockcount, 1084 xfs_fsblock_t *new_fsb, 1085 xfs_extlen_t *new_len, 1086 struct xfs_btree_cur **pcur) 1087 { 1088 struct xfs_mount *mp = tp->t_mountp; 1089 struct xfs_btree_cur *rcur; 1090 struct xfs_buf *agbp = NULL; 1091 int error = 0; 1092 xfs_agnumber_t agno; 1093 xfs_agblock_t bno; 1094 xfs_agblock_t new_agbno; 1095 unsigned long nr_ops = 0; 1096 int shape_changes = 0; 1097 1098 agno = XFS_FSB_TO_AGNO(mp, startblock); 1099 ASSERT(agno != NULLAGNUMBER); 1100 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1101 1102 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1103 type, XFS_FSB_TO_AGBNO(mp, startblock), 1104 blockcount); 1105 1106 if (XFS_TEST_ERROR(false, mp, 1107 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1108 return -EIO; 1109 1110 /* 1111 * If we haven't gotten a cursor or the cursor AG doesn't match 1112 * the startblock, get one now. 1113 */ 1114 rcur = *pcur; 1115 if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1116 nr_ops = rcur->bc_private.a.priv.refc.nr_ops; 1117 shape_changes = rcur->bc_private.a.priv.refc.shape_changes; 1118 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1119 rcur = NULL; 1120 *pcur = NULL; 1121 } 1122 if (rcur == NULL) { 1123 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1124 XFS_ALLOC_FLAG_FREEING, &agbp); 1125 if (error) 1126 return error; 1127 if (!agbp) 1128 return -EFSCORRUPTED; 1129 1130 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); 1131 if (!rcur) { 1132 error = -ENOMEM; 1133 goto out_cur; 1134 } 1135 rcur->bc_private.a.priv.refc.nr_ops = nr_ops; 1136 rcur->bc_private.a.priv.refc.shape_changes = shape_changes; 1137 } 1138 *pcur = rcur; 1139 1140 switch (type) { 1141 case XFS_REFCOUNT_INCREASE: 1142 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1143 new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); 1144 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1145 break; 1146 case XFS_REFCOUNT_DECREASE: 1147 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1148 new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); 1149 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1150 break; 1151 case XFS_REFCOUNT_ALLOC_COW: 1152 *new_fsb = startblock + blockcount; 1153 *new_len = 0; 1154 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1155 break; 1156 case XFS_REFCOUNT_FREE_COW: 1157 *new_fsb = startblock + blockcount; 1158 *new_len = 0; 1159 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1160 break; 1161 default: 1162 ASSERT(0); 1163 error = -EFSCORRUPTED; 1164 } 1165 if (!error && *new_len > 0) 1166 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1167 bno, blockcount, new_agbno, *new_len); 1168 return error; 1169 1170 out_cur: 1171 xfs_trans_brelse(tp, agbp); 1172 1173 return error; 1174 } 1175 1176 /* 1177 * Record a refcount intent for later processing. 1178 */ 1179 static int 1180 __xfs_refcount_add( 1181 struct xfs_trans *tp, 1182 enum xfs_refcount_intent_type type, 1183 xfs_fsblock_t startblock, 1184 xfs_extlen_t blockcount) 1185 { 1186 struct xfs_refcount_intent *ri; 1187 1188 trace_xfs_refcount_defer(tp->t_mountp, 1189 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1190 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1191 blockcount); 1192 1193 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1194 KM_SLEEP | KM_NOFS); 1195 INIT_LIST_HEAD(&ri->ri_list); 1196 ri->ri_type = type; 1197 ri->ri_startblock = startblock; 1198 ri->ri_blockcount = blockcount; 1199 1200 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1201 return 0; 1202 } 1203 1204 /* 1205 * Increase the reference count of the blocks backing a file's extent. 1206 */ 1207 int 1208 xfs_refcount_increase_extent( 1209 struct xfs_trans *tp, 1210 struct xfs_bmbt_irec *PREV) 1211 { 1212 if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) 1213 return 0; 1214 1215 return __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, 1216 PREV->br_startblock, PREV->br_blockcount); 1217 } 1218 1219 /* 1220 * Decrease the reference count of the blocks backing a file's extent. 1221 */ 1222 int 1223 xfs_refcount_decrease_extent( 1224 struct xfs_trans *tp, 1225 struct xfs_bmbt_irec *PREV) 1226 { 1227 if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb)) 1228 return 0; 1229 1230 return __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, 1231 PREV->br_startblock, PREV->br_blockcount); 1232 } 1233 1234 /* 1235 * Given an AG extent, find the lowest-numbered run of shared blocks 1236 * within that range and return the range in fbno/flen. If 1237 * find_end_of_shared is set, return the longest contiguous extent of 1238 * shared blocks; if not, just return the first extent we find. If no 1239 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1240 * and 0, respectively. 1241 */ 1242 int 1243 xfs_refcount_find_shared( 1244 struct xfs_btree_cur *cur, 1245 xfs_agblock_t agbno, 1246 xfs_extlen_t aglen, 1247 xfs_agblock_t *fbno, 1248 xfs_extlen_t *flen, 1249 bool find_end_of_shared) 1250 { 1251 struct xfs_refcount_irec tmp; 1252 int i; 1253 int have; 1254 int error; 1255 1256 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, 1257 agbno, aglen); 1258 1259 /* By default, skip the whole range */ 1260 *fbno = NULLAGBLOCK; 1261 *flen = 0; 1262 1263 /* Try to find a refcount extent that crosses the start */ 1264 error = xfs_refcount_lookup_le(cur, agbno, &have); 1265 if (error) 1266 goto out_error; 1267 if (!have) { 1268 /* No left extent, look at the next one */ 1269 error = xfs_btree_increment(cur, 0, &have); 1270 if (error) 1271 goto out_error; 1272 if (!have) 1273 goto done; 1274 } 1275 error = xfs_refcount_get_rec(cur, &tmp, &i); 1276 if (error) 1277 goto out_error; 1278 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1279 1280 /* If the extent ends before the start, look at the next one */ 1281 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1282 error = xfs_btree_increment(cur, 0, &have); 1283 if (error) 1284 goto out_error; 1285 if (!have) 1286 goto done; 1287 error = xfs_refcount_get_rec(cur, &tmp, &i); 1288 if (error) 1289 goto out_error; 1290 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1291 } 1292 1293 /* If the extent starts after the range we want, bail out */ 1294 if (tmp.rc_startblock >= agbno + aglen) 1295 goto done; 1296 1297 /* We found the start of a shared extent! */ 1298 if (tmp.rc_startblock < agbno) { 1299 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1300 tmp.rc_startblock = agbno; 1301 } 1302 1303 *fbno = tmp.rc_startblock; 1304 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1305 if (!find_end_of_shared) 1306 goto done; 1307 1308 /* Otherwise, find the end of this shared extent */ 1309 while (*fbno + *flen < agbno + aglen) { 1310 error = xfs_btree_increment(cur, 0, &have); 1311 if (error) 1312 goto out_error; 1313 if (!have) 1314 break; 1315 error = xfs_refcount_get_rec(cur, &tmp, &i); 1316 if (error) 1317 goto out_error; 1318 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1319 if (tmp.rc_startblock >= agbno + aglen || 1320 tmp.rc_startblock != *fbno + *flen) 1321 break; 1322 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1323 } 1324 1325 done: 1326 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1327 cur->bc_private.a.agno, *fbno, *flen); 1328 1329 out_error: 1330 if (error) 1331 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1332 cur->bc_private.a.agno, error, _RET_IP_); 1333 return error; 1334 } 1335 1336 /* 1337 * Recovering CoW Blocks After a Crash 1338 * 1339 * Due to the way that the copy on write mechanism works, there's a window of 1340 * opportunity in which we can lose track of allocated blocks during a crash. 1341 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1342 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1343 * no longer in the free space btree but are not otherwise recorded anywhere 1344 * until the write completes and the blocks are mapped into the file. A crash 1345 * in between allocation and remapping results in the replacement blocks being 1346 * lost. This situation is exacerbated by the CoW extent size hint because 1347 * allocations can hang around for long time. 1348 * 1349 * However, there is a place where we can record these allocations before they 1350 * become mappings -- the reference count btree. The btree does not record 1351 * extents with refcount == 1, so we can record allocations with a refcount of 1352 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1353 * no conflict with shared block records. These mappings should be created 1354 * when we allocate blocks to the CoW fork and deleted when they're removed 1355 * from the CoW fork. 1356 * 1357 * Minor nit: records for in-progress CoW allocations and records for shared 1358 * extents must never be merged, to preserve the property that (except for CoW 1359 * allocations) there are no refcount btree entries with refcount == 1. The 1360 * only time this could potentially happen is when unsharing a block that's 1361 * adjacent to CoW allocations, so we must be careful to avoid this. 1362 * 1363 * At mount time we recover lost CoW allocations by searching the refcount 1364 * btree for these refcount == 1 mappings. These represent CoW allocations 1365 * that were in progress at the time the filesystem went down, so we can free 1366 * them to get the space back. 1367 * 1368 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1369 * several reasons -- first, EFIs pin the tail of the log and would have to be 1370 * periodically relogged to avoid filling up the log. Second, CoW completions 1371 * will have to file an EFD and create new EFIs for whatever remains in the 1372 * CoW fork; this partially takes care of (1) but extent-size reservations 1373 * will have to periodically relog even if there's no writeout in progress. 1374 * This can happen if the CoW extent size hint is set, which you really want. 1375 * Third, EFIs cannot currently be automatically relogged into newer 1376 * transactions to advance the log tail. Fourth, stuffing the log full of 1377 * EFIs places an upper bound on the number of CoW allocations that can be 1378 * held filesystem-wide at any given time. Recording them in the refcount 1379 * btree doesn't require us to maintain any state in memory and doesn't pin 1380 * the log. 1381 */ 1382 /* 1383 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1384 * in that they're not referenced anywhere else in the filesystem, so we 1385 * stash them in the refcount btree with a refcount of 1 until either file 1386 * remapping (or CoW cancellation) happens. 1387 */ 1388 STATIC int 1389 xfs_refcount_adjust_cow_extents( 1390 struct xfs_btree_cur *cur, 1391 xfs_agblock_t agbno, 1392 xfs_extlen_t aglen, 1393 enum xfs_refc_adjust_op adj) 1394 { 1395 struct xfs_refcount_irec ext, tmp; 1396 int error; 1397 int found_rec, found_tmp; 1398 1399 if (aglen == 0) 1400 return 0; 1401 1402 /* Find any overlapping refcount records */ 1403 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1404 if (error) 1405 goto out_error; 1406 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1407 if (error) 1408 goto out_error; 1409 if (!found_rec) { 1410 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1411 XFS_REFC_COW_START; 1412 ext.rc_blockcount = 0; 1413 ext.rc_refcount = 0; 1414 } 1415 1416 switch (adj) { 1417 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1418 /* Adding a CoW reservation, there should be nothing here. */ 1419 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1420 ext.rc_startblock >= agbno + aglen, out_error); 1421 1422 tmp.rc_startblock = agbno; 1423 tmp.rc_blockcount = aglen; 1424 tmp.rc_refcount = 1; 1425 trace_xfs_refcount_modify_extent(cur->bc_mp, 1426 cur->bc_private.a.agno, &tmp); 1427 1428 error = xfs_refcount_insert(cur, &tmp, 1429 &found_tmp); 1430 if (error) 1431 goto out_error; 1432 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1433 found_tmp == 1, out_error); 1434 break; 1435 case XFS_REFCOUNT_ADJUST_COW_FREE: 1436 /* Removing a CoW reservation, there should be one extent. */ 1437 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1438 ext.rc_startblock == agbno, out_error); 1439 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1440 ext.rc_blockcount == aglen, out_error); 1441 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1442 ext.rc_refcount == 1, out_error); 1443 1444 ext.rc_refcount = 0; 1445 trace_xfs_refcount_modify_extent(cur->bc_mp, 1446 cur->bc_private.a.agno, &ext); 1447 error = xfs_refcount_delete(cur, &found_rec); 1448 if (error) 1449 goto out_error; 1450 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1451 found_rec == 1, out_error); 1452 break; 1453 default: 1454 ASSERT(0); 1455 } 1456 1457 return error; 1458 out_error: 1459 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1460 cur->bc_private.a.agno, error, _RET_IP_); 1461 return error; 1462 } 1463 1464 /* 1465 * Add or remove refcount btree entries for CoW reservations. 1466 */ 1467 STATIC int 1468 xfs_refcount_adjust_cow( 1469 struct xfs_btree_cur *cur, 1470 xfs_agblock_t agbno, 1471 xfs_extlen_t aglen, 1472 enum xfs_refc_adjust_op adj) 1473 { 1474 bool shape_changed; 1475 int error; 1476 1477 agbno += XFS_REFC_COW_START; 1478 1479 /* 1480 * Ensure that no rcextents cross the boundary of the adjustment range. 1481 */ 1482 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1483 if (error) 1484 goto out_error; 1485 1486 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1487 if (error) 1488 goto out_error; 1489 1490 /* 1491 * Try to merge with the left or right extents of the range. 1492 */ 1493 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1494 XFS_FIND_RCEXT_COW, &shape_changed); 1495 if (error) 1496 goto out_error; 1497 1498 /* Now that we've taken care of the ends, adjust the middle extents */ 1499 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1500 if (error) 1501 goto out_error; 1502 1503 return 0; 1504 1505 out_error: 1506 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, 1507 error, _RET_IP_); 1508 return error; 1509 } 1510 1511 /* 1512 * Record a CoW allocation in the refcount btree. 1513 */ 1514 STATIC int 1515 __xfs_refcount_cow_alloc( 1516 struct xfs_btree_cur *rcur, 1517 xfs_agblock_t agbno, 1518 xfs_extlen_t aglen) 1519 { 1520 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1521 agbno, aglen); 1522 1523 /* Add refcount btree reservation */ 1524 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1525 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1526 } 1527 1528 /* 1529 * Remove a CoW allocation from the refcount btree. 1530 */ 1531 STATIC int 1532 __xfs_refcount_cow_free( 1533 struct xfs_btree_cur *rcur, 1534 xfs_agblock_t agbno, 1535 xfs_extlen_t aglen) 1536 { 1537 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1538 agbno, aglen); 1539 1540 /* Remove refcount btree reservation */ 1541 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1542 XFS_REFCOUNT_ADJUST_COW_FREE); 1543 } 1544 1545 /* Record a CoW staging extent in the refcount btree. */ 1546 int 1547 xfs_refcount_alloc_cow_extent( 1548 struct xfs_trans *tp, 1549 xfs_fsblock_t fsb, 1550 xfs_extlen_t len) 1551 { 1552 struct xfs_mount *mp = tp->t_mountp; 1553 int error; 1554 1555 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1556 return 0; 1557 1558 error = __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1559 if (error) 1560 return error; 1561 1562 /* Add rmap entry */ 1563 return xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1564 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1565 } 1566 1567 /* Forget a CoW staging event in the refcount btree. */ 1568 int 1569 xfs_refcount_free_cow_extent( 1570 struct xfs_trans *tp, 1571 xfs_fsblock_t fsb, 1572 xfs_extlen_t len) 1573 { 1574 struct xfs_mount *mp = tp->t_mountp; 1575 int error; 1576 1577 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1578 return 0; 1579 1580 /* Remove rmap entry */ 1581 error = xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1582 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1583 if (error) 1584 return error; 1585 1586 return __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1587 } 1588 1589 struct xfs_refcount_recovery { 1590 struct list_head rr_list; 1591 struct xfs_refcount_irec rr_rrec; 1592 }; 1593 1594 /* Stuff an extent on the recovery list. */ 1595 STATIC int 1596 xfs_refcount_recover_extent( 1597 struct xfs_btree_cur *cur, 1598 union xfs_btree_rec *rec, 1599 void *priv) 1600 { 1601 struct list_head *debris = priv; 1602 struct xfs_refcount_recovery *rr; 1603 1604 if (be32_to_cpu(rec->refc.rc_refcount) != 1) 1605 return -EFSCORRUPTED; 1606 1607 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); 1608 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1609 list_add_tail(&rr->rr_list, debris); 1610 1611 return 0; 1612 } 1613 1614 /* Find and remove leftover CoW reservations. */ 1615 int 1616 xfs_refcount_recover_cow_leftovers( 1617 struct xfs_mount *mp, 1618 xfs_agnumber_t agno) 1619 { 1620 struct xfs_trans *tp; 1621 struct xfs_btree_cur *cur; 1622 struct xfs_buf *agbp; 1623 struct xfs_refcount_recovery *rr, *n; 1624 struct list_head debris; 1625 union xfs_btree_irec low; 1626 union xfs_btree_irec high; 1627 xfs_fsblock_t fsb; 1628 xfs_agblock_t agbno; 1629 int error; 1630 1631 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1632 return -EOPNOTSUPP; 1633 1634 INIT_LIST_HEAD(&debris); 1635 1636 /* 1637 * In this first part, we use an empty transaction to gather up 1638 * all the leftover CoW extents so that we can subsequently 1639 * delete them. The empty transaction is used to avoid 1640 * a buffer lock deadlock if there happens to be a loop in the 1641 * refcountbt because we're allowed to re-grab a buffer that is 1642 * already attached to our transaction. When we're done 1643 * recording the CoW debris we cancel the (empty) transaction 1644 * and everything goes away cleanly. 1645 */ 1646 error = xfs_trans_alloc_empty(mp, &tp); 1647 if (error) 1648 return error; 1649 1650 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1651 if (error) 1652 goto out_trans; 1653 if (!agbp) { 1654 error = -ENOMEM; 1655 goto out_trans; 1656 } 1657 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); 1658 1659 /* Find all the leftover CoW staging extents. */ 1660 memset(&low, 0, sizeof(low)); 1661 memset(&high, 0, sizeof(high)); 1662 low.rc.rc_startblock = XFS_REFC_COW_START; 1663 high.rc.rc_startblock = -1U; 1664 error = xfs_btree_query_range(cur, &low, &high, 1665 xfs_refcount_recover_extent, &debris); 1666 xfs_btree_del_cursor(cur, error); 1667 xfs_trans_brelse(tp, agbp); 1668 xfs_trans_cancel(tp); 1669 if (error) 1670 goto out_free; 1671 1672 /* Now iterate the list to free the leftovers */ 1673 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1674 /* Set up transaction. */ 1675 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1676 if (error) 1677 goto out_free; 1678 1679 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1680 1681 /* Free the orphan record */ 1682 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1683 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1684 error = xfs_refcount_free_cow_extent(tp, fsb, 1685 rr->rr_rrec.rc_blockcount); 1686 if (error) 1687 goto out_trans; 1688 1689 /* Free the block. */ 1690 xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1691 1692 error = xfs_trans_commit(tp); 1693 if (error) 1694 goto out_free; 1695 1696 list_del(&rr->rr_list); 1697 kmem_free(rr); 1698 } 1699 1700 return error; 1701 out_trans: 1702 xfs_trans_cancel(tp); 1703 out_free: 1704 /* Free the leftover list */ 1705 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1706 list_del(&rr->rr_list); 1707 kmem_free(rr); 1708 } 1709 return error; 1710 } 1711 1712 /* Is there a record covering a given extent? */ 1713 int 1714 xfs_refcount_has_record( 1715 struct xfs_btree_cur *cur, 1716 xfs_agblock_t bno, 1717 xfs_extlen_t len, 1718 bool *exists) 1719 { 1720 union xfs_btree_irec low; 1721 union xfs_btree_irec high; 1722 1723 memset(&low, 0, sizeof(low)); 1724 low.rc.rc_startblock = bno; 1725 memset(&high, 0xFF, sizeof(high)); 1726 high.rc.rc_startblock = bno + len - 1; 1727 1728 return xfs_btree_has_record(cur, &low, &high, exists); 1729 } 1730