1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_sb.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap.h" 17 #include "xfs_refcount_btree.h" 18 #include "xfs_alloc.h" 19 #include "xfs_errortag.h" 20 #include "xfs_error.h" 21 #include "xfs_trace.h" 22 #include "xfs_cksum.h" 23 #include "xfs_trans.h" 24 #include "xfs_bit.h" 25 #include "xfs_refcount.h" 26 #include "xfs_rmap.h" 27 28 /* Allowable refcount adjustment amounts. */ 29 enum xfs_refc_adjust_op { 30 XFS_REFCOUNT_ADJUST_INCREASE = 1, 31 XFS_REFCOUNT_ADJUST_DECREASE = -1, 32 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 33 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 34 }; 35 36 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 37 xfs_agblock_t agbno, xfs_extlen_t aglen, 38 struct xfs_defer_ops *dfops); 39 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 40 xfs_agblock_t agbno, xfs_extlen_t aglen, 41 struct xfs_defer_ops *dfops); 42 43 /* 44 * Look up the first record less than or equal to [bno, len] in the btree 45 * given by cur. 46 */ 47 int 48 xfs_refcount_lookup_le( 49 struct xfs_btree_cur *cur, 50 xfs_agblock_t bno, 51 int *stat) 52 { 53 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 54 XFS_LOOKUP_LE); 55 cur->bc_rec.rc.rc_startblock = bno; 56 cur->bc_rec.rc.rc_blockcount = 0; 57 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 58 } 59 60 /* 61 * Look up the first record greater than or equal to [bno, len] in the btree 62 * given by cur. 63 */ 64 int 65 xfs_refcount_lookup_ge( 66 struct xfs_btree_cur *cur, 67 xfs_agblock_t bno, 68 int *stat) 69 { 70 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 71 XFS_LOOKUP_GE); 72 cur->bc_rec.rc.rc_startblock = bno; 73 cur->bc_rec.rc.rc_blockcount = 0; 74 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 75 } 76 77 /* 78 * Look up the first record equal to [bno, len] in the btree 79 * given by cur. 80 */ 81 int 82 xfs_refcount_lookup_eq( 83 struct xfs_btree_cur *cur, 84 xfs_agblock_t bno, 85 int *stat) 86 { 87 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 88 XFS_LOOKUP_LE); 89 cur->bc_rec.rc.rc_startblock = bno; 90 cur->bc_rec.rc.rc_blockcount = 0; 91 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 92 } 93 94 /* Convert on-disk record to in-core format. */ 95 void 96 xfs_refcount_btrec_to_irec( 97 union xfs_btree_rec *rec, 98 struct xfs_refcount_irec *irec) 99 { 100 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 101 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 102 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 103 } 104 105 /* 106 * Get the data from the pointed-to record. 107 */ 108 int 109 xfs_refcount_get_rec( 110 struct xfs_btree_cur *cur, 111 struct xfs_refcount_irec *irec, 112 int *stat) 113 { 114 struct xfs_mount *mp = cur->bc_mp; 115 xfs_agnumber_t agno = cur->bc_private.a.agno; 116 union xfs_btree_rec *rec; 117 int error; 118 xfs_agblock_t realstart; 119 120 error = xfs_btree_get_rec(cur, &rec, stat); 121 if (error || !*stat) 122 return error; 123 124 xfs_refcount_btrec_to_irec(rec, irec); 125 126 agno = cur->bc_private.a.agno; 127 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 128 goto out_bad_rec; 129 130 /* handle special COW-staging state */ 131 realstart = irec->rc_startblock; 132 if (realstart & XFS_REFC_COW_START) { 133 if (irec->rc_refcount != 1) 134 goto out_bad_rec; 135 realstart &= ~XFS_REFC_COW_START; 136 } else if (irec->rc_refcount < 2) { 137 goto out_bad_rec; 138 } 139 140 /* check for valid extent range, including overflow */ 141 if (!xfs_verify_agbno(mp, agno, realstart)) 142 goto out_bad_rec; 143 if (realstart > realstart + irec->rc_blockcount) 144 goto out_bad_rec; 145 if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) 146 goto out_bad_rec; 147 148 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 149 goto out_bad_rec; 150 151 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec); 152 return 0; 153 154 out_bad_rec: 155 xfs_warn(mp, 156 "Refcount BTree record corruption in AG %d detected!", agno); 157 xfs_warn(mp, 158 "Start block 0x%x, block count 0x%x, references 0x%x", 159 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 160 return -EFSCORRUPTED; 161 } 162 163 /* 164 * Update the record referred to by cur to the value given 165 * by [bno, len, refcount]. 166 * This either works (return 0) or gets an EFSCORRUPTED error. 167 */ 168 STATIC int 169 xfs_refcount_update( 170 struct xfs_btree_cur *cur, 171 struct xfs_refcount_irec *irec) 172 { 173 union xfs_btree_rec rec; 174 int error; 175 176 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); 177 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 178 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 179 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 180 error = xfs_btree_update(cur, &rec); 181 if (error) 182 trace_xfs_refcount_update_error(cur->bc_mp, 183 cur->bc_private.a.agno, error, _RET_IP_); 184 return error; 185 } 186 187 /* 188 * Insert the record referred to by cur to the value given 189 * by [bno, len, refcount]. 190 * This either works (return 0) or gets an EFSCORRUPTED error. 191 */ 192 int 193 xfs_refcount_insert( 194 struct xfs_btree_cur *cur, 195 struct xfs_refcount_irec *irec, 196 int *i) 197 { 198 int error; 199 200 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); 201 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 202 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 203 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 204 error = xfs_btree_insert(cur, i); 205 if (error) 206 goto out_error; 207 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 208 209 out_error: 210 if (error) 211 trace_xfs_refcount_insert_error(cur->bc_mp, 212 cur->bc_private.a.agno, error, _RET_IP_); 213 return error; 214 } 215 216 /* 217 * Remove the record referred to by cur, then set the pointer to the spot 218 * where the record could be re-inserted, in case we want to increment or 219 * decrement the cursor. 220 * This either works (return 0) or gets an EFSCORRUPTED error. 221 */ 222 STATIC int 223 xfs_refcount_delete( 224 struct xfs_btree_cur *cur, 225 int *i) 226 { 227 struct xfs_refcount_irec irec; 228 int found_rec; 229 int error; 230 231 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 232 if (error) 233 goto out_error; 234 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 235 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); 236 error = xfs_btree_delete(cur, i); 237 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 238 if (error) 239 goto out_error; 240 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 241 out_error: 242 if (error) 243 trace_xfs_refcount_delete_error(cur->bc_mp, 244 cur->bc_private.a.agno, error, _RET_IP_); 245 return error; 246 } 247 248 /* 249 * Adjusting the Reference Count 250 * 251 * As stated elsewhere, the reference count btree (refcbt) stores 252 * >1 reference counts for extents of physical blocks. In this 253 * operation, we're either raising or lowering the reference count of 254 * some subrange stored in the tree: 255 * 256 * <------ adjustment range ------> 257 * ----+ +---+-----+ +--+--------+--------- 258 * 2 | | 3 | 4 | |17| 55 | 10 259 * ----+ +---+-----+ +--+--------+--------- 260 * X axis is physical blocks number; 261 * reference counts are the numbers inside the rectangles 262 * 263 * The first thing we need to do is to ensure that there are no 264 * refcount extents crossing either boundary of the range to be 265 * adjusted. For any extent that does cross a boundary, split it into 266 * two extents so that we can increment the refcount of one of the 267 * pieces later: 268 * 269 * <------ adjustment range ------> 270 * ----+ +---+-----+ +--+--------+----+---- 271 * 2 | | 3 | 2 | |17| 55 | 10 | 10 272 * ----+ +---+-----+ +--+--------+----+---- 273 * 274 * For this next step, let's assume that all the physical blocks in 275 * the adjustment range are mapped to a file and are therefore in use 276 * at least once. Therefore, we can infer that any gap in the 277 * refcount tree within the adjustment range represents a physical 278 * extent with refcount == 1: 279 * 280 * <------ adjustment range ------> 281 * ----+---+---+-----+-+--+--------+----+---- 282 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 283 * ----+---+---+-----+-+--+--------+----+---- 284 * ^ 285 * 286 * For each extent that falls within the interval range, figure out 287 * which extent is to the left or the right of that extent. Now we 288 * have a left, current, and right extent. If the new reference count 289 * of the center extent enables us to merge left, center, and right 290 * into one record covering all three, do so. If the center extent is 291 * at the left end of the range, abuts the left extent, and its new 292 * reference count matches the left extent's record, then merge them. 293 * If the center extent is at the right end of the range, abuts the 294 * right extent, and the reference counts match, merge those. In the 295 * example, we can left merge (assuming an increment operation): 296 * 297 * <------ adjustment range ------> 298 * --------+---+-----+-+--+--------+----+---- 299 * 2 | 3 | 2 |1|17| 55 | 10 | 10 300 * --------+---+-----+-+--+--------+----+---- 301 * ^ 302 * 303 * For all other extents within the range, adjust the reference count 304 * or delete it if the refcount falls below 2. If we were 305 * incrementing, the end result looks like this: 306 * 307 * <------ adjustment range ------> 308 * --------+---+-----+-+--+--------+----+---- 309 * 2 | 4 | 3 |2|18| 56 | 11 | 10 310 * --------+---+-----+-+--+--------+----+---- 311 * 312 * The result of a decrement operation looks as such: 313 * 314 * <------ adjustment range ------> 315 * ----+ +---+ +--+--------+----+---- 316 * 2 | | 2 | |16| 54 | 9 | 10 317 * ----+ +---+ +--+--------+----+---- 318 * DDDD 111111DD 319 * 320 * The blocks marked "D" are freed; the blocks marked "1" are only 321 * referenced once and therefore the record is removed from the 322 * refcount btree. 323 */ 324 325 /* Next block after this extent. */ 326 static inline xfs_agblock_t 327 xfs_refc_next( 328 struct xfs_refcount_irec *rc) 329 { 330 return rc->rc_startblock + rc->rc_blockcount; 331 } 332 333 /* 334 * Split a refcount extent that crosses agbno. 335 */ 336 STATIC int 337 xfs_refcount_split_extent( 338 struct xfs_btree_cur *cur, 339 xfs_agblock_t agbno, 340 bool *shape_changed) 341 { 342 struct xfs_refcount_irec rcext, tmp; 343 int found_rec; 344 int error; 345 346 *shape_changed = false; 347 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 348 if (error) 349 goto out_error; 350 if (!found_rec) 351 return 0; 352 353 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 354 if (error) 355 goto out_error; 356 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 357 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 358 return 0; 359 360 *shape_changed = true; 361 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, 362 &rcext, agbno); 363 364 /* Establish the right extent. */ 365 tmp = rcext; 366 tmp.rc_startblock = agbno; 367 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 368 error = xfs_refcount_update(cur, &tmp); 369 if (error) 370 goto out_error; 371 372 /* Insert the left extent. */ 373 tmp = rcext; 374 tmp.rc_blockcount = agbno - rcext.rc_startblock; 375 error = xfs_refcount_insert(cur, &tmp, &found_rec); 376 if (error) 377 goto out_error; 378 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 379 return error; 380 381 out_error: 382 trace_xfs_refcount_split_extent_error(cur->bc_mp, 383 cur->bc_private.a.agno, error, _RET_IP_); 384 return error; 385 } 386 387 /* 388 * Merge the left, center, and right extents. 389 */ 390 STATIC int 391 xfs_refcount_merge_center_extents( 392 struct xfs_btree_cur *cur, 393 struct xfs_refcount_irec *left, 394 struct xfs_refcount_irec *center, 395 struct xfs_refcount_irec *right, 396 unsigned long long extlen, 397 xfs_extlen_t *aglen) 398 { 399 int error; 400 int found_rec; 401 402 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 403 cur->bc_private.a.agno, left, center, right); 404 405 /* 406 * Make sure the center and right extents are not in the btree. 407 * If the center extent was synthesized, the first delete call 408 * removes the right extent and we skip the second deletion. 409 * If center and right were in the btree, then the first delete 410 * call removes the center and the second one removes the right 411 * extent. 412 */ 413 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 414 &found_rec); 415 if (error) 416 goto out_error; 417 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 418 419 error = xfs_refcount_delete(cur, &found_rec); 420 if (error) 421 goto out_error; 422 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 423 424 if (center->rc_refcount > 1) { 425 error = xfs_refcount_delete(cur, &found_rec); 426 if (error) 427 goto out_error; 428 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 429 out_error); 430 } 431 432 /* Enlarge the left extent. */ 433 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 434 &found_rec); 435 if (error) 436 goto out_error; 437 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 438 439 left->rc_blockcount = extlen; 440 error = xfs_refcount_update(cur, left); 441 if (error) 442 goto out_error; 443 444 *aglen = 0; 445 return error; 446 447 out_error: 448 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 449 cur->bc_private.a.agno, error, _RET_IP_); 450 return error; 451 } 452 453 /* 454 * Merge with the left extent. 455 */ 456 STATIC int 457 xfs_refcount_merge_left_extent( 458 struct xfs_btree_cur *cur, 459 struct xfs_refcount_irec *left, 460 struct xfs_refcount_irec *cleft, 461 xfs_agblock_t *agbno, 462 xfs_extlen_t *aglen) 463 { 464 int error; 465 int found_rec; 466 467 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 468 cur->bc_private.a.agno, left, cleft); 469 470 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 471 if (cleft->rc_refcount > 1) { 472 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 473 &found_rec); 474 if (error) 475 goto out_error; 476 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 477 out_error); 478 479 error = xfs_refcount_delete(cur, &found_rec); 480 if (error) 481 goto out_error; 482 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 483 out_error); 484 } 485 486 /* Enlarge the left extent. */ 487 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 488 &found_rec); 489 if (error) 490 goto out_error; 491 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 492 493 left->rc_blockcount += cleft->rc_blockcount; 494 error = xfs_refcount_update(cur, left); 495 if (error) 496 goto out_error; 497 498 *agbno += cleft->rc_blockcount; 499 *aglen -= cleft->rc_blockcount; 500 return error; 501 502 out_error: 503 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 504 cur->bc_private.a.agno, error, _RET_IP_); 505 return error; 506 } 507 508 /* 509 * Merge with the right extent. 510 */ 511 STATIC int 512 xfs_refcount_merge_right_extent( 513 struct xfs_btree_cur *cur, 514 struct xfs_refcount_irec *right, 515 struct xfs_refcount_irec *cright, 516 xfs_extlen_t *aglen) 517 { 518 int error; 519 int found_rec; 520 521 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 522 cur->bc_private.a.agno, cright, right); 523 524 /* 525 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 526 * remove it. 527 */ 528 if (cright->rc_refcount > 1) { 529 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 530 &found_rec); 531 if (error) 532 goto out_error; 533 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 534 out_error); 535 536 error = xfs_refcount_delete(cur, &found_rec); 537 if (error) 538 goto out_error; 539 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 540 out_error); 541 } 542 543 /* Enlarge the right extent. */ 544 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 545 &found_rec); 546 if (error) 547 goto out_error; 548 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 549 550 right->rc_startblock -= cright->rc_blockcount; 551 right->rc_blockcount += cright->rc_blockcount; 552 error = xfs_refcount_update(cur, right); 553 if (error) 554 goto out_error; 555 556 *aglen -= cright->rc_blockcount; 557 return error; 558 559 out_error: 560 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 561 cur->bc_private.a.agno, error, _RET_IP_); 562 return error; 563 } 564 565 #define XFS_FIND_RCEXT_SHARED 1 566 #define XFS_FIND_RCEXT_COW 2 567 /* 568 * Find the left extent and the one after it (cleft). This function assumes 569 * that we've already split any extent crossing agbno. 570 */ 571 STATIC int 572 xfs_refcount_find_left_extents( 573 struct xfs_btree_cur *cur, 574 struct xfs_refcount_irec *left, 575 struct xfs_refcount_irec *cleft, 576 xfs_agblock_t agbno, 577 xfs_extlen_t aglen, 578 int flags) 579 { 580 struct xfs_refcount_irec tmp; 581 int error; 582 int found_rec; 583 584 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 585 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 586 if (error) 587 goto out_error; 588 if (!found_rec) 589 return 0; 590 591 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 592 if (error) 593 goto out_error; 594 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 595 596 if (xfs_refc_next(&tmp) != agbno) 597 return 0; 598 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 599 return 0; 600 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 601 return 0; 602 /* We have a left extent; retrieve (or invent) the next right one */ 603 *left = tmp; 604 605 error = xfs_btree_increment(cur, 0, &found_rec); 606 if (error) 607 goto out_error; 608 if (found_rec) { 609 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 610 if (error) 611 goto out_error; 612 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 613 out_error); 614 615 /* if tmp starts at the end of our range, just use that */ 616 if (tmp.rc_startblock == agbno) 617 *cleft = tmp; 618 else { 619 /* 620 * There's a gap in the refcntbt at the start of the 621 * range we're interested in (refcount == 1) so 622 * synthesize the implied extent and pass it back. 623 * We assume here that the agbno/aglen range was 624 * passed in from a data fork extent mapping and 625 * therefore is allocated to exactly one owner. 626 */ 627 cleft->rc_startblock = agbno; 628 cleft->rc_blockcount = min(aglen, 629 tmp.rc_startblock - agbno); 630 cleft->rc_refcount = 1; 631 } 632 } else { 633 /* 634 * No extents, so pretend that there's one covering the whole 635 * range. 636 */ 637 cleft->rc_startblock = agbno; 638 cleft->rc_blockcount = aglen; 639 cleft->rc_refcount = 1; 640 } 641 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, 642 left, cleft, agbno); 643 return error; 644 645 out_error: 646 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 647 cur->bc_private.a.agno, error, _RET_IP_); 648 return error; 649 } 650 651 /* 652 * Find the right extent and the one before it (cright). This function 653 * assumes that we've already split any extents crossing agbno + aglen. 654 */ 655 STATIC int 656 xfs_refcount_find_right_extents( 657 struct xfs_btree_cur *cur, 658 struct xfs_refcount_irec *right, 659 struct xfs_refcount_irec *cright, 660 xfs_agblock_t agbno, 661 xfs_extlen_t aglen, 662 int flags) 663 { 664 struct xfs_refcount_irec tmp; 665 int error; 666 int found_rec; 667 668 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 669 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 670 if (error) 671 goto out_error; 672 if (!found_rec) 673 return 0; 674 675 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 676 if (error) 677 goto out_error; 678 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 679 680 if (tmp.rc_startblock != agbno + aglen) 681 return 0; 682 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 683 return 0; 684 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 685 return 0; 686 /* We have a right extent; retrieve (or invent) the next left one */ 687 *right = tmp; 688 689 error = xfs_btree_decrement(cur, 0, &found_rec); 690 if (error) 691 goto out_error; 692 if (found_rec) { 693 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 694 if (error) 695 goto out_error; 696 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 697 out_error); 698 699 /* if tmp ends at the end of our range, just use that */ 700 if (xfs_refc_next(&tmp) == agbno + aglen) 701 *cright = tmp; 702 else { 703 /* 704 * There's a gap in the refcntbt at the end of the 705 * range we're interested in (refcount == 1) so 706 * create the implied extent and pass it back. 707 * We assume here that the agbno/aglen range was 708 * passed in from a data fork extent mapping and 709 * therefore is allocated to exactly one owner. 710 */ 711 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 712 cright->rc_blockcount = right->rc_startblock - 713 cright->rc_startblock; 714 cright->rc_refcount = 1; 715 } 716 } else { 717 /* 718 * No extents, so pretend that there's one covering the whole 719 * range. 720 */ 721 cright->rc_startblock = agbno; 722 cright->rc_blockcount = aglen; 723 cright->rc_refcount = 1; 724 } 725 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, 726 cright, right, agbno + aglen); 727 return error; 728 729 out_error: 730 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 731 cur->bc_private.a.agno, error, _RET_IP_); 732 return error; 733 } 734 735 /* Is this extent valid? */ 736 static inline bool 737 xfs_refc_valid( 738 struct xfs_refcount_irec *rc) 739 { 740 return rc->rc_startblock != NULLAGBLOCK; 741 } 742 743 /* 744 * Try to merge with any extents on the boundaries of the adjustment range. 745 */ 746 STATIC int 747 xfs_refcount_merge_extents( 748 struct xfs_btree_cur *cur, 749 xfs_agblock_t *agbno, 750 xfs_extlen_t *aglen, 751 enum xfs_refc_adjust_op adjust, 752 int flags, 753 bool *shape_changed) 754 { 755 struct xfs_refcount_irec left = {0}, cleft = {0}; 756 struct xfs_refcount_irec cright = {0}, right = {0}; 757 int error; 758 unsigned long long ulen; 759 bool cequal; 760 761 *shape_changed = false; 762 /* 763 * Find the extent just below agbno [left], just above agbno [cleft], 764 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 765 * [right]. 766 */ 767 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 768 *aglen, flags); 769 if (error) 770 return error; 771 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 772 *aglen, flags); 773 if (error) 774 return error; 775 776 /* No left or right extent to merge; exit. */ 777 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 778 return 0; 779 780 cequal = (cleft.rc_startblock == cright.rc_startblock) && 781 (cleft.rc_blockcount == cright.rc_blockcount); 782 783 /* Try to merge left, cleft, and right. cleft must == cright. */ 784 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 785 right.rc_blockcount; 786 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 787 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 788 left.rc_refcount == cleft.rc_refcount + adjust && 789 right.rc_refcount == cleft.rc_refcount + adjust && 790 ulen < MAXREFCEXTLEN) { 791 *shape_changed = true; 792 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 793 &right, ulen, aglen); 794 } 795 796 /* Try to merge left and cleft. */ 797 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 798 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 799 left.rc_refcount == cleft.rc_refcount + adjust && 800 ulen < MAXREFCEXTLEN) { 801 *shape_changed = true; 802 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 803 agbno, aglen); 804 if (error) 805 return error; 806 807 /* 808 * If we just merged left + cleft and cleft == cright, 809 * we no longer have a cright to merge with right. We're done. 810 */ 811 if (cequal) 812 return 0; 813 } 814 815 /* Try to merge cright and right. */ 816 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 817 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 818 right.rc_refcount == cright.rc_refcount + adjust && 819 ulen < MAXREFCEXTLEN) { 820 *shape_changed = true; 821 return xfs_refcount_merge_right_extent(cur, &right, &cright, 822 aglen); 823 } 824 825 return error; 826 } 827 828 /* 829 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 830 * true incorrectly is a shutdown FS; the penalty for guessing false 831 * incorrectly is more transaction rolls than might be necessary. 832 * Be conservative here. 833 */ 834 static bool 835 xfs_refcount_still_have_space( 836 struct xfs_btree_cur *cur) 837 { 838 unsigned long overhead; 839 840 overhead = cur->bc_private.a.priv.refc.shape_changes * 841 xfs_allocfree_log_count(cur->bc_mp, 1); 842 overhead *= cur->bc_mp->m_sb.sb_blocksize; 843 844 /* 845 * Only allow 2 refcount extent updates per transaction if the 846 * refcount continue update "error" has been injected. 847 */ 848 if (cur->bc_private.a.priv.refc.nr_ops > 2 && 849 XFS_TEST_ERROR(false, cur->bc_mp, 850 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 851 return false; 852 853 if (cur->bc_private.a.priv.refc.nr_ops == 0) 854 return true; 855 else if (overhead > cur->bc_tp->t_log_res) 856 return false; 857 return cur->bc_tp->t_log_res - overhead > 858 cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 859 } 860 861 /* 862 * Adjust the refcounts of middle extents. At this point we should have 863 * split extents that crossed the adjustment range; merged with adjacent 864 * extents; and updated agbno/aglen to reflect the merges. Therefore, 865 * all we have to do is update the extents inside [agbno, agbno + aglen]. 866 */ 867 STATIC int 868 xfs_refcount_adjust_extents( 869 struct xfs_btree_cur *cur, 870 xfs_agblock_t *agbno, 871 xfs_extlen_t *aglen, 872 enum xfs_refc_adjust_op adj, 873 struct xfs_defer_ops *dfops, 874 struct xfs_owner_info *oinfo) 875 { 876 struct xfs_refcount_irec ext, tmp; 877 int error; 878 int found_rec, found_tmp; 879 xfs_fsblock_t fsbno; 880 881 /* Merging did all the work already. */ 882 if (*aglen == 0) 883 return 0; 884 885 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 886 if (error) 887 goto out_error; 888 889 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 890 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 891 if (error) 892 goto out_error; 893 if (!found_rec) { 894 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 895 ext.rc_blockcount = 0; 896 ext.rc_refcount = 0; 897 } 898 899 /* 900 * Deal with a hole in the refcount tree; if a file maps to 901 * these blocks and there's no refcountbt record, pretend that 902 * there is one with refcount == 1. 903 */ 904 if (ext.rc_startblock != *agbno) { 905 tmp.rc_startblock = *agbno; 906 tmp.rc_blockcount = min(*aglen, 907 ext.rc_startblock - *agbno); 908 tmp.rc_refcount = 1 + adj; 909 trace_xfs_refcount_modify_extent(cur->bc_mp, 910 cur->bc_private.a.agno, &tmp); 911 912 /* 913 * Either cover the hole (increment) or 914 * delete the range (decrement). 915 */ 916 if (tmp.rc_refcount) { 917 error = xfs_refcount_insert(cur, &tmp, 918 &found_tmp); 919 if (error) 920 goto out_error; 921 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 922 found_tmp == 1, out_error); 923 cur->bc_private.a.priv.refc.nr_ops++; 924 } else { 925 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 926 cur->bc_private.a.agno, 927 tmp.rc_startblock); 928 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 929 tmp.rc_blockcount, oinfo); 930 } 931 932 (*agbno) += tmp.rc_blockcount; 933 (*aglen) -= tmp.rc_blockcount; 934 935 error = xfs_refcount_lookup_ge(cur, *agbno, 936 &found_rec); 937 if (error) 938 goto out_error; 939 } 940 941 /* Stop if there's nothing left to modify */ 942 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 943 break; 944 945 /* 946 * Adjust the reference count and either update the tree 947 * (incr) or free the blocks (decr). 948 */ 949 if (ext.rc_refcount == MAXREFCOUNT) 950 goto skip; 951 ext.rc_refcount += adj; 952 trace_xfs_refcount_modify_extent(cur->bc_mp, 953 cur->bc_private.a.agno, &ext); 954 if (ext.rc_refcount > 1) { 955 error = xfs_refcount_update(cur, &ext); 956 if (error) 957 goto out_error; 958 cur->bc_private.a.priv.refc.nr_ops++; 959 } else if (ext.rc_refcount == 1) { 960 error = xfs_refcount_delete(cur, &found_rec); 961 if (error) 962 goto out_error; 963 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 964 found_rec == 1, out_error); 965 cur->bc_private.a.priv.refc.nr_ops++; 966 goto advloop; 967 } else { 968 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 969 cur->bc_private.a.agno, 970 ext.rc_startblock); 971 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 972 ext.rc_blockcount, oinfo); 973 } 974 975 skip: 976 error = xfs_btree_increment(cur, 0, &found_rec); 977 if (error) 978 goto out_error; 979 980 advloop: 981 (*agbno) += ext.rc_blockcount; 982 (*aglen) -= ext.rc_blockcount; 983 } 984 985 return error; 986 out_error: 987 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 988 cur->bc_private.a.agno, error, _RET_IP_); 989 return error; 990 } 991 992 /* Adjust the reference count of a range of AG blocks. */ 993 STATIC int 994 xfs_refcount_adjust( 995 struct xfs_btree_cur *cur, 996 xfs_agblock_t agbno, 997 xfs_extlen_t aglen, 998 xfs_agblock_t *new_agbno, 999 xfs_extlen_t *new_aglen, 1000 enum xfs_refc_adjust_op adj, 1001 struct xfs_defer_ops *dfops, 1002 struct xfs_owner_info *oinfo) 1003 { 1004 bool shape_changed; 1005 int shape_changes = 0; 1006 int error; 1007 1008 *new_agbno = agbno; 1009 *new_aglen = aglen; 1010 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1011 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, 1012 agbno, aglen); 1013 else 1014 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, 1015 agbno, aglen); 1016 1017 /* 1018 * Ensure that no rcextents cross the boundary of the adjustment range. 1019 */ 1020 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1021 if (error) 1022 goto out_error; 1023 if (shape_changed) 1024 shape_changes++; 1025 1026 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1027 if (error) 1028 goto out_error; 1029 if (shape_changed) 1030 shape_changes++; 1031 1032 /* 1033 * Try to merge with the left or right extents of the range. 1034 */ 1035 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1036 XFS_FIND_RCEXT_SHARED, &shape_changed); 1037 if (error) 1038 goto out_error; 1039 if (shape_changed) 1040 shape_changes++; 1041 if (shape_changes) 1042 cur->bc_private.a.priv.refc.shape_changes++; 1043 1044 /* Now that we've taken care of the ends, adjust the middle extents */ 1045 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1046 adj, dfops, oinfo); 1047 if (error) 1048 goto out_error; 1049 1050 return 0; 1051 1052 out_error: 1053 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, 1054 error, _RET_IP_); 1055 return error; 1056 } 1057 1058 /* Clean up after calling xfs_refcount_finish_one. */ 1059 void 1060 xfs_refcount_finish_one_cleanup( 1061 struct xfs_trans *tp, 1062 struct xfs_btree_cur *rcur, 1063 int error) 1064 { 1065 struct xfs_buf *agbp; 1066 1067 if (rcur == NULL) 1068 return; 1069 agbp = rcur->bc_private.a.agbp; 1070 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 1071 if (error) 1072 xfs_trans_brelse(tp, agbp); 1073 } 1074 1075 /* 1076 * Process one of the deferred refcount operations. We pass back the 1077 * btree cursor to maintain our lock on the btree between calls. 1078 * This saves time and eliminates a buffer deadlock between the 1079 * superblock and the AGF because we'll always grab them in the same 1080 * order. 1081 */ 1082 int 1083 xfs_refcount_finish_one( 1084 struct xfs_trans *tp, 1085 struct xfs_defer_ops *dfops, 1086 enum xfs_refcount_intent_type type, 1087 xfs_fsblock_t startblock, 1088 xfs_extlen_t blockcount, 1089 xfs_fsblock_t *new_fsb, 1090 xfs_extlen_t *new_len, 1091 struct xfs_btree_cur **pcur) 1092 { 1093 struct xfs_mount *mp = tp->t_mountp; 1094 struct xfs_btree_cur *rcur; 1095 struct xfs_buf *agbp = NULL; 1096 int error = 0; 1097 xfs_agnumber_t agno; 1098 xfs_agblock_t bno; 1099 xfs_agblock_t new_agbno; 1100 unsigned long nr_ops = 0; 1101 int shape_changes = 0; 1102 1103 agno = XFS_FSB_TO_AGNO(mp, startblock); 1104 ASSERT(agno != NULLAGNUMBER); 1105 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1106 1107 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1108 type, XFS_FSB_TO_AGBNO(mp, startblock), 1109 blockcount); 1110 1111 if (XFS_TEST_ERROR(false, mp, 1112 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1113 return -EIO; 1114 1115 /* 1116 * If we haven't gotten a cursor or the cursor AG doesn't match 1117 * the startblock, get one now. 1118 */ 1119 rcur = *pcur; 1120 if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1121 nr_ops = rcur->bc_private.a.priv.refc.nr_ops; 1122 shape_changes = rcur->bc_private.a.priv.refc.shape_changes; 1123 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1124 rcur = NULL; 1125 *pcur = NULL; 1126 } 1127 if (rcur == NULL) { 1128 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1129 XFS_ALLOC_FLAG_FREEING, &agbp); 1130 if (error) 1131 return error; 1132 if (!agbp) 1133 return -EFSCORRUPTED; 1134 1135 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops); 1136 if (!rcur) { 1137 error = -ENOMEM; 1138 goto out_cur; 1139 } 1140 rcur->bc_private.a.priv.refc.nr_ops = nr_ops; 1141 rcur->bc_private.a.priv.refc.shape_changes = shape_changes; 1142 } 1143 *pcur = rcur; 1144 1145 switch (type) { 1146 case XFS_REFCOUNT_INCREASE: 1147 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1148 new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL); 1149 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1150 break; 1151 case XFS_REFCOUNT_DECREASE: 1152 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1153 new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL); 1154 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1155 break; 1156 case XFS_REFCOUNT_ALLOC_COW: 1157 *new_fsb = startblock + blockcount; 1158 *new_len = 0; 1159 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops); 1160 break; 1161 case XFS_REFCOUNT_FREE_COW: 1162 *new_fsb = startblock + blockcount; 1163 *new_len = 0; 1164 error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops); 1165 break; 1166 default: 1167 ASSERT(0); 1168 error = -EFSCORRUPTED; 1169 } 1170 if (!error && *new_len > 0) 1171 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1172 bno, blockcount, new_agbno, *new_len); 1173 return error; 1174 1175 out_cur: 1176 xfs_trans_brelse(tp, agbp); 1177 1178 return error; 1179 } 1180 1181 /* 1182 * Record a refcount intent for later processing. 1183 */ 1184 static int 1185 __xfs_refcount_add( 1186 struct xfs_mount *mp, 1187 struct xfs_defer_ops *dfops, 1188 enum xfs_refcount_intent_type type, 1189 xfs_fsblock_t startblock, 1190 xfs_extlen_t blockcount) 1191 { 1192 struct xfs_refcount_intent *ri; 1193 1194 trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock), 1195 type, XFS_FSB_TO_AGBNO(mp, startblock), 1196 blockcount); 1197 1198 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1199 KM_SLEEP | KM_NOFS); 1200 INIT_LIST_HEAD(&ri->ri_list); 1201 ri->ri_type = type; 1202 ri->ri_startblock = startblock; 1203 ri->ri_blockcount = blockcount; 1204 1205 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1206 return 0; 1207 } 1208 1209 /* 1210 * Increase the reference count of the blocks backing a file's extent. 1211 */ 1212 int 1213 xfs_refcount_increase_extent( 1214 struct xfs_mount *mp, 1215 struct xfs_defer_ops *dfops, 1216 struct xfs_bmbt_irec *PREV) 1217 { 1218 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1219 return 0; 1220 1221 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE, 1222 PREV->br_startblock, PREV->br_blockcount); 1223 } 1224 1225 /* 1226 * Decrease the reference count of the blocks backing a file's extent. 1227 */ 1228 int 1229 xfs_refcount_decrease_extent( 1230 struct xfs_mount *mp, 1231 struct xfs_defer_ops *dfops, 1232 struct xfs_bmbt_irec *PREV) 1233 { 1234 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1235 return 0; 1236 1237 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE, 1238 PREV->br_startblock, PREV->br_blockcount); 1239 } 1240 1241 /* 1242 * Given an AG extent, find the lowest-numbered run of shared blocks 1243 * within that range and return the range in fbno/flen. If 1244 * find_end_of_shared is set, return the longest contiguous extent of 1245 * shared blocks; if not, just return the first extent we find. If no 1246 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1247 * and 0, respectively. 1248 */ 1249 int 1250 xfs_refcount_find_shared( 1251 struct xfs_btree_cur *cur, 1252 xfs_agblock_t agbno, 1253 xfs_extlen_t aglen, 1254 xfs_agblock_t *fbno, 1255 xfs_extlen_t *flen, 1256 bool find_end_of_shared) 1257 { 1258 struct xfs_refcount_irec tmp; 1259 int i; 1260 int have; 1261 int error; 1262 1263 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, 1264 agbno, aglen); 1265 1266 /* By default, skip the whole range */ 1267 *fbno = NULLAGBLOCK; 1268 *flen = 0; 1269 1270 /* Try to find a refcount extent that crosses the start */ 1271 error = xfs_refcount_lookup_le(cur, agbno, &have); 1272 if (error) 1273 goto out_error; 1274 if (!have) { 1275 /* No left extent, look at the next one */ 1276 error = xfs_btree_increment(cur, 0, &have); 1277 if (error) 1278 goto out_error; 1279 if (!have) 1280 goto done; 1281 } 1282 error = xfs_refcount_get_rec(cur, &tmp, &i); 1283 if (error) 1284 goto out_error; 1285 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1286 1287 /* If the extent ends before the start, look at the next one */ 1288 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1289 error = xfs_btree_increment(cur, 0, &have); 1290 if (error) 1291 goto out_error; 1292 if (!have) 1293 goto done; 1294 error = xfs_refcount_get_rec(cur, &tmp, &i); 1295 if (error) 1296 goto out_error; 1297 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1298 } 1299 1300 /* If the extent starts after the range we want, bail out */ 1301 if (tmp.rc_startblock >= agbno + aglen) 1302 goto done; 1303 1304 /* We found the start of a shared extent! */ 1305 if (tmp.rc_startblock < agbno) { 1306 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1307 tmp.rc_startblock = agbno; 1308 } 1309 1310 *fbno = tmp.rc_startblock; 1311 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1312 if (!find_end_of_shared) 1313 goto done; 1314 1315 /* Otherwise, find the end of this shared extent */ 1316 while (*fbno + *flen < agbno + aglen) { 1317 error = xfs_btree_increment(cur, 0, &have); 1318 if (error) 1319 goto out_error; 1320 if (!have) 1321 break; 1322 error = xfs_refcount_get_rec(cur, &tmp, &i); 1323 if (error) 1324 goto out_error; 1325 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1326 if (tmp.rc_startblock >= agbno + aglen || 1327 tmp.rc_startblock != *fbno + *flen) 1328 break; 1329 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1330 } 1331 1332 done: 1333 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1334 cur->bc_private.a.agno, *fbno, *flen); 1335 1336 out_error: 1337 if (error) 1338 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1339 cur->bc_private.a.agno, error, _RET_IP_); 1340 return error; 1341 } 1342 1343 /* 1344 * Recovering CoW Blocks After a Crash 1345 * 1346 * Due to the way that the copy on write mechanism works, there's a window of 1347 * opportunity in which we can lose track of allocated blocks during a crash. 1348 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1349 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1350 * no longer in the free space btree but are not otherwise recorded anywhere 1351 * until the write completes and the blocks are mapped into the file. A crash 1352 * in between allocation and remapping results in the replacement blocks being 1353 * lost. This situation is exacerbated by the CoW extent size hint because 1354 * allocations can hang around for long time. 1355 * 1356 * However, there is a place where we can record these allocations before they 1357 * become mappings -- the reference count btree. The btree does not record 1358 * extents with refcount == 1, so we can record allocations with a refcount of 1359 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1360 * no conflict with shared block records. These mappings should be created 1361 * when we allocate blocks to the CoW fork and deleted when they're removed 1362 * from the CoW fork. 1363 * 1364 * Minor nit: records for in-progress CoW allocations and records for shared 1365 * extents must never be merged, to preserve the property that (except for CoW 1366 * allocations) there are no refcount btree entries with refcount == 1. The 1367 * only time this could potentially happen is when unsharing a block that's 1368 * adjacent to CoW allocations, so we must be careful to avoid this. 1369 * 1370 * At mount time we recover lost CoW allocations by searching the refcount 1371 * btree for these refcount == 1 mappings. These represent CoW allocations 1372 * that were in progress at the time the filesystem went down, so we can free 1373 * them to get the space back. 1374 * 1375 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1376 * several reasons -- first, EFIs pin the tail of the log and would have to be 1377 * periodically relogged to avoid filling up the log. Second, CoW completions 1378 * will have to file an EFD and create new EFIs for whatever remains in the 1379 * CoW fork; this partially takes care of (1) but extent-size reservations 1380 * will have to periodically relog even if there's no writeout in progress. 1381 * This can happen if the CoW extent size hint is set, which you really want. 1382 * Third, EFIs cannot currently be automatically relogged into newer 1383 * transactions to advance the log tail. Fourth, stuffing the log full of 1384 * EFIs places an upper bound on the number of CoW allocations that can be 1385 * held filesystem-wide at any given time. Recording them in the refcount 1386 * btree doesn't require us to maintain any state in memory and doesn't pin 1387 * the log. 1388 */ 1389 /* 1390 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1391 * in that they're not referenced anywhere else in the filesystem, so we 1392 * stash them in the refcount btree with a refcount of 1 until either file 1393 * remapping (or CoW cancellation) happens. 1394 */ 1395 STATIC int 1396 xfs_refcount_adjust_cow_extents( 1397 struct xfs_btree_cur *cur, 1398 xfs_agblock_t agbno, 1399 xfs_extlen_t aglen, 1400 enum xfs_refc_adjust_op adj) 1401 { 1402 struct xfs_refcount_irec ext, tmp; 1403 int error; 1404 int found_rec, found_tmp; 1405 1406 if (aglen == 0) 1407 return 0; 1408 1409 /* Find any overlapping refcount records */ 1410 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1411 if (error) 1412 goto out_error; 1413 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1414 if (error) 1415 goto out_error; 1416 if (!found_rec) { 1417 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1418 XFS_REFC_COW_START; 1419 ext.rc_blockcount = 0; 1420 ext.rc_refcount = 0; 1421 } 1422 1423 switch (adj) { 1424 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1425 /* Adding a CoW reservation, there should be nothing here. */ 1426 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1427 ext.rc_startblock >= agbno + aglen, out_error); 1428 1429 tmp.rc_startblock = agbno; 1430 tmp.rc_blockcount = aglen; 1431 tmp.rc_refcount = 1; 1432 trace_xfs_refcount_modify_extent(cur->bc_mp, 1433 cur->bc_private.a.agno, &tmp); 1434 1435 error = xfs_refcount_insert(cur, &tmp, 1436 &found_tmp); 1437 if (error) 1438 goto out_error; 1439 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1440 found_tmp == 1, out_error); 1441 break; 1442 case XFS_REFCOUNT_ADJUST_COW_FREE: 1443 /* Removing a CoW reservation, there should be one extent. */ 1444 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1445 ext.rc_startblock == agbno, out_error); 1446 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1447 ext.rc_blockcount == aglen, out_error); 1448 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1449 ext.rc_refcount == 1, out_error); 1450 1451 ext.rc_refcount = 0; 1452 trace_xfs_refcount_modify_extent(cur->bc_mp, 1453 cur->bc_private.a.agno, &ext); 1454 error = xfs_refcount_delete(cur, &found_rec); 1455 if (error) 1456 goto out_error; 1457 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1458 found_rec == 1, out_error); 1459 break; 1460 default: 1461 ASSERT(0); 1462 } 1463 1464 return error; 1465 out_error: 1466 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1467 cur->bc_private.a.agno, error, _RET_IP_); 1468 return error; 1469 } 1470 1471 /* 1472 * Add or remove refcount btree entries for CoW reservations. 1473 */ 1474 STATIC int 1475 xfs_refcount_adjust_cow( 1476 struct xfs_btree_cur *cur, 1477 xfs_agblock_t agbno, 1478 xfs_extlen_t aglen, 1479 enum xfs_refc_adjust_op adj) 1480 { 1481 bool shape_changed; 1482 int error; 1483 1484 agbno += XFS_REFC_COW_START; 1485 1486 /* 1487 * Ensure that no rcextents cross the boundary of the adjustment range. 1488 */ 1489 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1490 if (error) 1491 goto out_error; 1492 1493 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1494 if (error) 1495 goto out_error; 1496 1497 /* 1498 * Try to merge with the left or right extents of the range. 1499 */ 1500 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1501 XFS_FIND_RCEXT_COW, &shape_changed); 1502 if (error) 1503 goto out_error; 1504 1505 /* Now that we've taken care of the ends, adjust the middle extents */ 1506 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1507 if (error) 1508 goto out_error; 1509 1510 return 0; 1511 1512 out_error: 1513 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, 1514 error, _RET_IP_); 1515 return error; 1516 } 1517 1518 /* 1519 * Record a CoW allocation in the refcount btree. 1520 */ 1521 STATIC int 1522 __xfs_refcount_cow_alloc( 1523 struct xfs_btree_cur *rcur, 1524 xfs_agblock_t agbno, 1525 xfs_extlen_t aglen, 1526 struct xfs_defer_ops *dfops) 1527 { 1528 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1529 agbno, aglen); 1530 1531 /* Add refcount btree reservation */ 1532 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1533 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1534 } 1535 1536 /* 1537 * Remove a CoW allocation from the refcount btree. 1538 */ 1539 STATIC int 1540 __xfs_refcount_cow_free( 1541 struct xfs_btree_cur *rcur, 1542 xfs_agblock_t agbno, 1543 xfs_extlen_t aglen, 1544 struct xfs_defer_ops *dfops) 1545 { 1546 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1547 agbno, aglen); 1548 1549 /* Remove refcount btree reservation */ 1550 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1551 XFS_REFCOUNT_ADJUST_COW_FREE); 1552 } 1553 1554 /* Record a CoW staging extent in the refcount btree. */ 1555 int 1556 xfs_refcount_alloc_cow_extent( 1557 struct xfs_mount *mp, 1558 struct xfs_defer_ops *dfops, 1559 xfs_fsblock_t fsb, 1560 xfs_extlen_t len) 1561 { 1562 int error; 1563 1564 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1565 return 0; 1566 1567 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1568 fsb, len); 1569 if (error) 1570 return error; 1571 1572 /* Add rmap entry */ 1573 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1574 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1575 } 1576 1577 /* Forget a CoW staging event in the refcount btree. */ 1578 int 1579 xfs_refcount_free_cow_extent( 1580 struct xfs_mount *mp, 1581 struct xfs_defer_ops *dfops, 1582 xfs_fsblock_t fsb, 1583 xfs_extlen_t len) 1584 { 1585 int error; 1586 1587 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1588 return 0; 1589 1590 /* Remove rmap entry */ 1591 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1592 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1593 if (error) 1594 return error; 1595 1596 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1597 fsb, len); 1598 } 1599 1600 struct xfs_refcount_recovery { 1601 struct list_head rr_list; 1602 struct xfs_refcount_irec rr_rrec; 1603 }; 1604 1605 /* Stuff an extent on the recovery list. */ 1606 STATIC int 1607 xfs_refcount_recover_extent( 1608 struct xfs_btree_cur *cur, 1609 union xfs_btree_rec *rec, 1610 void *priv) 1611 { 1612 struct list_head *debris = priv; 1613 struct xfs_refcount_recovery *rr; 1614 1615 if (be32_to_cpu(rec->refc.rc_refcount) != 1) 1616 return -EFSCORRUPTED; 1617 1618 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); 1619 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1620 list_add_tail(&rr->rr_list, debris); 1621 1622 return 0; 1623 } 1624 1625 /* Find and remove leftover CoW reservations. */ 1626 int 1627 xfs_refcount_recover_cow_leftovers( 1628 struct xfs_mount *mp, 1629 xfs_agnumber_t agno) 1630 { 1631 struct xfs_trans *tp; 1632 struct xfs_btree_cur *cur; 1633 struct xfs_buf *agbp; 1634 struct xfs_refcount_recovery *rr, *n; 1635 struct list_head debris; 1636 union xfs_btree_irec low; 1637 union xfs_btree_irec high; 1638 struct xfs_defer_ops dfops; 1639 xfs_fsblock_t fsb; 1640 xfs_agblock_t agbno; 1641 int error; 1642 1643 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1644 return -EOPNOTSUPP; 1645 1646 INIT_LIST_HEAD(&debris); 1647 1648 /* 1649 * In this first part, we use an empty transaction to gather up 1650 * all the leftover CoW extents so that we can subsequently 1651 * delete them. The empty transaction is used to avoid 1652 * a buffer lock deadlock if there happens to be a loop in the 1653 * refcountbt because we're allowed to re-grab a buffer that is 1654 * already attached to our transaction. When we're done 1655 * recording the CoW debris we cancel the (empty) transaction 1656 * and everything goes away cleanly. 1657 */ 1658 error = xfs_trans_alloc_empty(mp, &tp); 1659 if (error) 1660 return error; 1661 1662 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1663 if (error) 1664 goto out_trans; 1665 if (!agbp) { 1666 error = -ENOMEM; 1667 goto out_trans; 1668 } 1669 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); 1670 1671 /* Find all the leftover CoW staging extents. */ 1672 memset(&low, 0, sizeof(low)); 1673 memset(&high, 0, sizeof(high)); 1674 low.rc.rc_startblock = XFS_REFC_COW_START; 1675 high.rc.rc_startblock = -1U; 1676 error = xfs_btree_query_range(cur, &low, &high, 1677 xfs_refcount_recover_extent, &debris); 1678 if (error) 1679 goto out_cursor; 1680 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1681 xfs_trans_brelse(tp, agbp); 1682 xfs_trans_cancel(tp); 1683 1684 /* Now iterate the list to free the leftovers */ 1685 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1686 /* Set up transaction. */ 1687 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1688 if (error) 1689 goto out_free; 1690 1691 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1692 1693 /* Free the orphan record */ 1694 xfs_defer_init(&dfops, &fsb); 1695 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1696 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1697 error = xfs_refcount_free_cow_extent(mp, &dfops, fsb, 1698 rr->rr_rrec.rc_blockcount); 1699 if (error) 1700 goto out_defer; 1701 1702 /* Free the block. */ 1703 xfs_bmap_add_free(mp, &dfops, fsb, 1704 rr->rr_rrec.rc_blockcount, NULL); 1705 1706 error = xfs_defer_finish(&tp, &dfops); 1707 if (error) 1708 goto out_defer; 1709 1710 error = xfs_trans_commit(tp); 1711 if (error) 1712 goto out_free; 1713 1714 list_del(&rr->rr_list); 1715 kmem_free(rr); 1716 } 1717 1718 return error; 1719 out_defer: 1720 xfs_defer_cancel(&dfops); 1721 out_trans: 1722 xfs_trans_cancel(tp); 1723 out_free: 1724 /* Free the leftover list */ 1725 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1726 list_del(&rr->rr_list); 1727 kmem_free(rr); 1728 } 1729 return error; 1730 1731 out_cursor: 1732 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1733 xfs_trans_brelse(tp, agbp); 1734 goto out_trans; 1735 } 1736 1737 /* Is there a record covering a given extent? */ 1738 int 1739 xfs_refcount_has_record( 1740 struct xfs_btree_cur *cur, 1741 xfs_agblock_t bno, 1742 xfs_extlen_t len, 1743 bool *exists) 1744 { 1745 union xfs_btree_irec low; 1746 union xfs_btree_irec high; 1747 1748 memset(&low, 0, sizeof(low)); 1749 low.rc.rc_startblock = bno; 1750 memset(&high, 0xFF, sizeof(high)); 1751 high.rc.rc_startblock = bno + len - 1; 1752 1753 return xfs_btree_has_record(cur, &low, &high, exists); 1754 } 1755