1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 27 struct kmem_cache *xfs_refcount_intent_cache; 28 29 /* Allowable refcount adjustment amounts. */ 30 enum xfs_refc_adjust_op { 31 XFS_REFCOUNT_ADJUST_INCREASE = 1, 32 XFS_REFCOUNT_ADJUST_DECREASE = -1, 33 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 34 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 35 }; 36 37 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 38 xfs_agblock_t agbno, xfs_extlen_t aglen); 39 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 40 xfs_agblock_t agbno, xfs_extlen_t aglen); 41 42 /* 43 * Look up the first record less than or equal to [bno, len] in the btree 44 * given by cur. 45 */ 46 int 47 xfs_refcount_lookup_le( 48 struct xfs_btree_cur *cur, 49 xfs_agblock_t bno, 50 int *stat) 51 { 52 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 53 XFS_LOOKUP_LE); 54 cur->bc_rec.rc.rc_startblock = bno; 55 cur->bc_rec.rc.rc_blockcount = 0; 56 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 57 } 58 59 /* 60 * Look up the first record greater than or equal to [bno, len] in the btree 61 * given by cur. 62 */ 63 int 64 xfs_refcount_lookup_ge( 65 struct xfs_btree_cur *cur, 66 xfs_agblock_t bno, 67 int *stat) 68 { 69 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 70 XFS_LOOKUP_GE); 71 cur->bc_rec.rc.rc_startblock = bno; 72 cur->bc_rec.rc.rc_blockcount = 0; 73 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 74 } 75 76 /* 77 * Look up the first record equal to [bno, len] in the btree 78 * given by cur. 79 */ 80 int 81 xfs_refcount_lookup_eq( 82 struct xfs_btree_cur *cur, 83 xfs_agblock_t bno, 84 int *stat) 85 { 86 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, bno, 87 XFS_LOOKUP_LE); 88 cur->bc_rec.rc.rc_startblock = bno; 89 cur->bc_rec.rc.rc_blockcount = 0; 90 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 91 } 92 93 /* Convert on-disk record to in-core format. */ 94 void 95 xfs_refcount_btrec_to_irec( 96 const union xfs_btree_rec *rec, 97 struct xfs_refcount_irec *irec) 98 { 99 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 100 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 101 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 102 } 103 104 /* 105 * Get the data from the pointed-to record. 106 */ 107 int 108 xfs_refcount_get_rec( 109 struct xfs_btree_cur *cur, 110 struct xfs_refcount_irec *irec, 111 int *stat) 112 { 113 struct xfs_mount *mp = cur->bc_mp; 114 xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; 115 union xfs_btree_rec *rec; 116 int error; 117 xfs_agblock_t realstart; 118 119 error = xfs_btree_get_rec(cur, &rec, stat); 120 if (error || !*stat) 121 return error; 122 123 xfs_refcount_btrec_to_irec(rec, irec); 124 125 agno = cur->bc_ag.pag->pag_agno; 126 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 127 goto out_bad_rec; 128 129 /* handle special COW-staging state */ 130 realstart = irec->rc_startblock; 131 if (realstart & XFS_REFC_COW_START) { 132 if (irec->rc_refcount != 1) 133 goto out_bad_rec; 134 realstart &= ~XFS_REFC_COW_START; 135 } else if (irec->rc_refcount < 2) { 136 goto out_bad_rec; 137 } 138 139 /* check for valid extent range, including overflow */ 140 if (!xfs_verify_agbno(mp, agno, realstart)) 141 goto out_bad_rec; 142 if (realstart > realstart + irec->rc_blockcount) 143 goto out_bad_rec; 144 if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) 145 goto out_bad_rec; 146 147 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 148 goto out_bad_rec; 149 150 trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 151 return 0; 152 153 out_bad_rec: 154 xfs_warn(mp, 155 "Refcount BTree record corruption in AG %d detected!", agno); 156 xfs_warn(mp, 157 "Start block 0x%x, block count 0x%x, references 0x%x", 158 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 159 return -EFSCORRUPTED; 160 } 161 162 /* 163 * Update the record referred to by cur to the value given 164 * by [bno, len, refcount]. 165 * This either works (return 0) or gets an EFSCORRUPTED error. 166 */ 167 STATIC int 168 xfs_refcount_update( 169 struct xfs_btree_cur *cur, 170 struct xfs_refcount_irec *irec) 171 { 172 union xfs_btree_rec rec; 173 int error; 174 175 trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 176 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 177 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 178 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 179 error = xfs_btree_update(cur, &rec); 180 if (error) 181 trace_xfs_refcount_update_error(cur->bc_mp, 182 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 183 return error; 184 } 185 186 /* 187 * Insert the record referred to by cur to the value given 188 * by [bno, len, refcount]. 189 * This either works (return 0) or gets an EFSCORRUPTED error. 190 */ 191 int 192 xfs_refcount_insert( 193 struct xfs_btree_cur *cur, 194 struct xfs_refcount_irec *irec, 195 int *i) 196 { 197 int error; 198 199 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 200 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 201 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 202 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 203 error = xfs_btree_insert(cur, i); 204 if (error) 205 goto out_error; 206 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 207 error = -EFSCORRUPTED; 208 goto out_error; 209 } 210 211 out_error: 212 if (error) 213 trace_xfs_refcount_insert_error(cur->bc_mp, 214 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 215 return error; 216 } 217 218 /* 219 * Remove the record referred to by cur, then set the pointer to the spot 220 * where the record could be re-inserted, in case we want to increment or 221 * decrement the cursor. 222 * This either works (return 0) or gets an EFSCORRUPTED error. 223 */ 224 STATIC int 225 xfs_refcount_delete( 226 struct xfs_btree_cur *cur, 227 int *i) 228 { 229 struct xfs_refcount_irec irec; 230 int found_rec; 231 int error; 232 233 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 234 if (error) 235 goto out_error; 236 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 237 error = -EFSCORRUPTED; 238 goto out_error; 239 } 240 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); 241 error = xfs_btree_delete(cur, i); 242 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 243 error = -EFSCORRUPTED; 244 goto out_error; 245 } 246 if (error) 247 goto out_error; 248 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 249 out_error: 250 if (error) 251 trace_xfs_refcount_delete_error(cur->bc_mp, 252 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 253 return error; 254 } 255 256 /* 257 * Adjusting the Reference Count 258 * 259 * As stated elsewhere, the reference count btree (refcbt) stores 260 * >1 reference counts for extents of physical blocks. In this 261 * operation, we're either raising or lowering the reference count of 262 * some subrange stored in the tree: 263 * 264 * <------ adjustment range ------> 265 * ----+ +---+-----+ +--+--------+--------- 266 * 2 | | 3 | 4 | |17| 55 | 10 267 * ----+ +---+-----+ +--+--------+--------- 268 * X axis is physical blocks number; 269 * reference counts are the numbers inside the rectangles 270 * 271 * The first thing we need to do is to ensure that there are no 272 * refcount extents crossing either boundary of the range to be 273 * adjusted. For any extent that does cross a boundary, split it into 274 * two extents so that we can increment the refcount of one of the 275 * pieces later: 276 * 277 * <------ adjustment range ------> 278 * ----+ +---+-----+ +--+--------+----+---- 279 * 2 | | 3 | 2 | |17| 55 | 10 | 10 280 * ----+ +---+-----+ +--+--------+----+---- 281 * 282 * For this next step, let's assume that all the physical blocks in 283 * the adjustment range are mapped to a file and are therefore in use 284 * at least once. Therefore, we can infer that any gap in the 285 * refcount tree within the adjustment range represents a physical 286 * extent with refcount == 1: 287 * 288 * <------ adjustment range ------> 289 * ----+---+---+-----+-+--+--------+----+---- 290 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 291 * ----+---+---+-----+-+--+--------+----+---- 292 * ^ 293 * 294 * For each extent that falls within the interval range, figure out 295 * which extent is to the left or the right of that extent. Now we 296 * have a left, current, and right extent. If the new reference count 297 * of the center extent enables us to merge left, center, and right 298 * into one record covering all three, do so. If the center extent is 299 * at the left end of the range, abuts the left extent, and its new 300 * reference count matches the left extent's record, then merge them. 301 * If the center extent is at the right end of the range, abuts the 302 * right extent, and the reference counts match, merge those. In the 303 * example, we can left merge (assuming an increment operation): 304 * 305 * <------ adjustment range ------> 306 * --------+---+-----+-+--+--------+----+---- 307 * 2 | 3 | 2 |1|17| 55 | 10 | 10 308 * --------+---+-----+-+--+--------+----+---- 309 * ^ 310 * 311 * For all other extents within the range, adjust the reference count 312 * or delete it if the refcount falls below 2. If we were 313 * incrementing, the end result looks like this: 314 * 315 * <------ adjustment range ------> 316 * --------+---+-----+-+--+--------+----+---- 317 * 2 | 4 | 3 |2|18| 56 | 11 | 10 318 * --------+---+-----+-+--+--------+----+---- 319 * 320 * The result of a decrement operation looks as such: 321 * 322 * <------ adjustment range ------> 323 * ----+ +---+ +--+--------+----+---- 324 * 2 | | 2 | |16| 54 | 9 | 10 325 * ----+ +---+ +--+--------+----+---- 326 * DDDD 111111DD 327 * 328 * The blocks marked "D" are freed; the blocks marked "1" are only 329 * referenced once and therefore the record is removed from the 330 * refcount btree. 331 */ 332 333 /* Next block after this extent. */ 334 static inline xfs_agblock_t 335 xfs_refc_next( 336 struct xfs_refcount_irec *rc) 337 { 338 return rc->rc_startblock + rc->rc_blockcount; 339 } 340 341 /* 342 * Split a refcount extent that crosses agbno. 343 */ 344 STATIC int 345 xfs_refcount_split_extent( 346 struct xfs_btree_cur *cur, 347 xfs_agblock_t agbno, 348 bool *shape_changed) 349 { 350 struct xfs_refcount_irec rcext, tmp; 351 int found_rec; 352 int error; 353 354 *shape_changed = false; 355 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 356 if (error) 357 goto out_error; 358 if (!found_rec) 359 return 0; 360 361 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 362 if (error) 363 goto out_error; 364 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 365 error = -EFSCORRUPTED; 366 goto out_error; 367 } 368 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 369 return 0; 370 371 *shape_changed = true; 372 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 373 &rcext, agbno); 374 375 /* Establish the right extent. */ 376 tmp = rcext; 377 tmp.rc_startblock = agbno; 378 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 379 error = xfs_refcount_update(cur, &tmp); 380 if (error) 381 goto out_error; 382 383 /* Insert the left extent. */ 384 tmp = rcext; 385 tmp.rc_blockcount = agbno - rcext.rc_startblock; 386 error = xfs_refcount_insert(cur, &tmp, &found_rec); 387 if (error) 388 goto out_error; 389 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 390 error = -EFSCORRUPTED; 391 goto out_error; 392 } 393 return error; 394 395 out_error: 396 trace_xfs_refcount_split_extent_error(cur->bc_mp, 397 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 398 return error; 399 } 400 401 /* 402 * Merge the left, center, and right extents. 403 */ 404 STATIC int 405 xfs_refcount_merge_center_extents( 406 struct xfs_btree_cur *cur, 407 struct xfs_refcount_irec *left, 408 struct xfs_refcount_irec *center, 409 struct xfs_refcount_irec *right, 410 unsigned long long extlen, 411 xfs_extlen_t *aglen) 412 { 413 int error; 414 int found_rec; 415 416 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 417 cur->bc_ag.pag->pag_agno, left, center, right); 418 419 /* 420 * Make sure the center and right extents are not in the btree. 421 * If the center extent was synthesized, the first delete call 422 * removes the right extent and we skip the second deletion. 423 * If center and right were in the btree, then the first delete 424 * call removes the center and the second one removes the right 425 * extent. 426 */ 427 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 428 &found_rec); 429 if (error) 430 goto out_error; 431 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 432 error = -EFSCORRUPTED; 433 goto out_error; 434 } 435 436 error = xfs_refcount_delete(cur, &found_rec); 437 if (error) 438 goto out_error; 439 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 440 error = -EFSCORRUPTED; 441 goto out_error; 442 } 443 444 if (center->rc_refcount > 1) { 445 error = xfs_refcount_delete(cur, &found_rec); 446 if (error) 447 goto out_error; 448 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 449 error = -EFSCORRUPTED; 450 goto out_error; 451 } 452 } 453 454 /* Enlarge the left extent. */ 455 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 456 &found_rec); 457 if (error) 458 goto out_error; 459 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 460 error = -EFSCORRUPTED; 461 goto out_error; 462 } 463 464 left->rc_blockcount = extlen; 465 error = xfs_refcount_update(cur, left); 466 if (error) 467 goto out_error; 468 469 *aglen = 0; 470 return error; 471 472 out_error: 473 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 474 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 475 return error; 476 } 477 478 /* 479 * Merge with the left extent. 480 */ 481 STATIC int 482 xfs_refcount_merge_left_extent( 483 struct xfs_btree_cur *cur, 484 struct xfs_refcount_irec *left, 485 struct xfs_refcount_irec *cleft, 486 xfs_agblock_t *agbno, 487 xfs_extlen_t *aglen) 488 { 489 int error; 490 int found_rec; 491 492 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 493 cur->bc_ag.pag->pag_agno, left, cleft); 494 495 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 496 if (cleft->rc_refcount > 1) { 497 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 498 &found_rec); 499 if (error) 500 goto out_error; 501 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 502 error = -EFSCORRUPTED; 503 goto out_error; 504 } 505 506 error = xfs_refcount_delete(cur, &found_rec); 507 if (error) 508 goto out_error; 509 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 510 error = -EFSCORRUPTED; 511 goto out_error; 512 } 513 } 514 515 /* Enlarge the left extent. */ 516 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 517 &found_rec); 518 if (error) 519 goto out_error; 520 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 521 error = -EFSCORRUPTED; 522 goto out_error; 523 } 524 525 left->rc_blockcount += cleft->rc_blockcount; 526 error = xfs_refcount_update(cur, left); 527 if (error) 528 goto out_error; 529 530 *agbno += cleft->rc_blockcount; 531 *aglen -= cleft->rc_blockcount; 532 return error; 533 534 out_error: 535 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 536 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 537 return error; 538 } 539 540 /* 541 * Merge with the right extent. 542 */ 543 STATIC int 544 xfs_refcount_merge_right_extent( 545 struct xfs_btree_cur *cur, 546 struct xfs_refcount_irec *right, 547 struct xfs_refcount_irec *cright, 548 xfs_extlen_t *aglen) 549 { 550 int error; 551 int found_rec; 552 553 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 554 cur->bc_ag.pag->pag_agno, cright, right); 555 556 /* 557 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 558 * remove it. 559 */ 560 if (cright->rc_refcount > 1) { 561 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 562 &found_rec); 563 if (error) 564 goto out_error; 565 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 566 error = -EFSCORRUPTED; 567 goto out_error; 568 } 569 570 error = xfs_refcount_delete(cur, &found_rec); 571 if (error) 572 goto out_error; 573 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 574 error = -EFSCORRUPTED; 575 goto out_error; 576 } 577 } 578 579 /* Enlarge the right extent. */ 580 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 581 &found_rec); 582 if (error) 583 goto out_error; 584 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 585 error = -EFSCORRUPTED; 586 goto out_error; 587 } 588 589 right->rc_startblock -= cright->rc_blockcount; 590 right->rc_blockcount += cright->rc_blockcount; 591 error = xfs_refcount_update(cur, right); 592 if (error) 593 goto out_error; 594 595 *aglen -= cright->rc_blockcount; 596 return error; 597 598 out_error: 599 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 600 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 601 return error; 602 } 603 604 #define XFS_FIND_RCEXT_SHARED 1 605 #define XFS_FIND_RCEXT_COW 2 606 /* 607 * Find the left extent and the one after it (cleft). This function assumes 608 * that we've already split any extent crossing agbno. 609 */ 610 STATIC int 611 xfs_refcount_find_left_extents( 612 struct xfs_btree_cur *cur, 613 struct xfs_refcount_irec *left, 614 struct xfs_refcount_irec *cleft, 615 xfs_agblock_t agbno, 616 xfs_extlen_t aglen, 617 int flags) 618 { 619 struct xfs_refcount_irec tmp; 620 int error; 621 int found_rec; 622 623 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 624 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 625 if (error) 626 goto out_error; 627 if (!found_rec) 628 return 0; 629 630 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 631 if (error) 632 goto out_error; 633 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 634 error = -EFSCORRUPTED; 635 goto out_error; 636 } 637 638 if (xfs_refc_next(&tmp) != agbno) 639 return 0; 640 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 641 return 0; 642 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 643 return 0; 644 /* We have a left extent; retrieve (or invent) the next right one */ 645 *left = tmp; 646 647 error = xfs_btree_increment(cur, 0, &found_rec); 648 if (error) 649 goto out_error; 650 if (found_rec) { 651 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 652 if (error) 653 goto out_error; 654 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 655 error = -EFSCORRUPTED; 656 goto out_error; 657 } 658 659 /* if tmp starts at the end of our range, just use that */ 660 if (tmp.rc_startblock == agbno) 661 *cleft = tmp; 662 else { 663 /* 664 * There's a gap in the refcntbt at the start of the 665 * range we're interested in (refcount == 1) so 666 * synthesize the implied extent and pass it back. 667 * We assume here that the agbno/aglen range was 668 * passed in from a data fork extent mapping and 669 * therefore is allocated to exactly one owner. 670 */ 671 cleft->rc_startblock = agbno; 672 cleft->rc_blockcount = min(aglen, 673 tmp.rc_startblock - agbno); 674 cleft->rc_refcount = 1; 675 } 676 } else { 677 /* 678 * No extents, so pretend that there's one covering the whole 679 * range. 680 */ 681 cleft->rc_startblock = agbno; 682 cleft->rc_blockcount = aglen; 683 cleft->rc_refcount = 1; 684 } 685 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 686 left, cleft, agbno); 687 return error; 688 689 out_error: 690 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 691 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 692 return error; 693 } 694 695 /* 696 * Find the right extent and the one before it (cright). This function 697 * assumes that we've already split any extents crossing agbno + aglen. 698 */ 699 STATIC int 700 xfs_refcount_find_right_extents( 701 struct xfs_btree_cur *cur, 702 struct xfs_refcount_irec *right, 703 struct xfs_refcount_irec *cright, 704 xfs_agblock_t agbno, 705 xfs_extlen_t aglen, 706 int flags) 707 { 708 struct xfs_refcount_irec tmp; 709 int error; 710 int found_rec; 711 712 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 713 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 714 if (error) 715 goto out_error; 716 if (!found_rec) 717 return 0; 718 719 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 720 if (error) 721 goto out_error; 722 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 723 error = -EFSCORRUPTED; 724 goto out_error; 725 } 726 727 if (tmp.rc_startblock != agbno + aglen) 728 return 0; 729 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 730 return 0; 731 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 732 return 0; 733 /* We have a right extent; retrieve (or invent) the next left one */ 734 *right = tmp; 735 736 error = xfs_btree_decrement(cur, 0, &found_rec); 737 if (error) 738 goto out_error; 739 if (found_rec) { 740 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 741 if (error) 742 goto out_error; 743 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 744 error = -EFSCORRUPTED; 745 goto out_error; 746 } 747 748 /* if tmp ends at the end of our range, just use that */ 749 if (xfs_refc_next(&tmp) == agbno + aglen) 750 *cright = tmp; 751 else { 752 /* 753 * There's a gap in the refcntbt at the end of the 754 * range we're interested in (refcount == 1) so 755 * create the implied extent and pass it back. 756 * We assume here that the agbno/aglen range was 757 * passed in from a data fork extent mapping and 758 * therefore is allocated to exactly one owner. 759 */ 760 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 761 cright->rc_blockcount = right->rc_startblock - 762 cright->rc_startblock; 763 cright->rc_refcount = 1; 764 } 765 } else { 766 /* 767 * No extents, so pretend that there's one covering the whole 768 * range. 769 */ 770 cright->rc_startblock = agbno; 771 cright->rc_blockcount = aglen; 772 cright->rc_refcount = 1; 773 } 774 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 775 cright, right, agbno + aglen); 776 return error; 777 778 out_error: 779 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 780 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 781 return error; 782 } 783 784 /* Is this extent valid? */ 785 static inline bool 786 xfs_refc_valid( 787 struct xfs_refcount_irec *rc) 788 { 789 return rc->rc_startblock != NULLAGBLOCK; 790 } 791 792 /* 793 * Try to merge with any extents on the boundaries of the adjustment range. 794 */ 795 STATIC int 796 xfs_refcount_merge_extents( 797 struct xfs_btree_cur *cur, 798 xfs_agblock_t *agbno, 799 xfs_extlen_t *aglen, 800 enum xfs_refc_adjust_op adjust, 801 int flags, 802 bool *shape_changed) 803 { 804 struct xfs_refcount_irec left = {0}, cleft = {0}; 805 struct xfs_refcount_irec cright = {0}, right = {0}; 806 int error; 807 unsigned long long ulen; 808 bool cequal; 809 810 *shape_changed = false; 811 /* 812 * Find the extent just below agbno [left], just above agbno [cleft], 813 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 814 * [right]. 815 */ 816 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 817 *aglen, flags); 818 if (error) 819 return error; 820 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 821 *aglen, flags); 822 if (error) 823 return error; 824 825 /* No left or right extent to merge; exit. */ 826 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 827 return 0; 828 829 cequal = (cleft.rc_startblock == cright.rc_startblock) && 830 (cleft.rc_blockcount == cright.rc_blockcount); 831 832 /* Try to merge left, cleft, and right. cleft must == cright. */ 833 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 834 right.rc_blockcount; 835 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 836 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 837 left.rc_refcount == cleft.rc_refcount + adjust && 838 right.rc_refcount == cleft.rc_refcount + adjust && 839 ulen < MAXREFCEXTLEN) { 840 *shape_changed = true; 841 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 842 &right, ulen, aglen); 843 } 844 845 /* Try to merge left and cleft. */ 846 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 847 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 848 left.rc_refcount == cleft.rc_refcount + adjust && 849 ulen < MAXREFCEXTLEN) { 850 *shape_changed = true; 851 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 852 agbno, aglen); 853 if (error) 854 return error; 855 856 /* 857 * If we just merged left + cleft and cleft == cright, 858 * we no longer have a cright to merge with right. We're done. 859 */ 860 if (cequal) 861 return 0; 862 } 863 864 /* Try to merge cright and right. */ 865 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 866 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 867 right.rc_refcount == cright.rc_refcount + adjust && 868 ulen < MAXREFCEXTLEN) { 869 *shape_changed = true; 870 return xfs_refcount_merge_right_extent(cur, &right, &cright, 871 aglen); 872 } 873 874 return error; 875 } 876 877 /* 878 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 879 * true incorrectly is a shutdown FS; the penalty for guessing false 880 * incorrectly is more transaction rolls than might be necessary. 881 * Be conservative here. 882 */ 883 static bool 884 xfs_refcount_still_have_space( 885 struct xfs_btree_cur *cur) 886 { 887 unsigned long overhead; 888 889 overhead = cur->bc_ag.refc.shape_changes * 890 xfs_allocfree_log_count(cur->bc_mp, 1); 891 overhead *= cur->bc_mp->m_sb.sb_blocksize; 892 893 /* 894 * Only allow 2 refcount extent updates per transaction if the 895 * refcount continue update "error" has been injected. 896 */ 897 if (cur->bc_ag.refc.nr_ops > 2 && 898 XFS_TEST_ERROR(false, cur->bc_mp, 899 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 900 return false; 901 902 if (cur->bc_ag.refc.nr_ops == 0) 903 return true; 904 else if (overhead > cur->bc_tp->t_log_res) 905 return false; 906 return cur->bc_tp->t_log_res - overhead > 907 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 908 } 909 910 /* 911 * Adjust the refcounts of middle extents. At this point we should have 912 * split extents that crossed the adjustment range; merged with adjacent 913 * extents; and updated agbno/aglen to reflect the merges. Therefore, 914 * all we have to do is update the extents inside [agbno, agbno + aglen]. 915 */ 916 STATIC int 917 xfs_refcount_adjust_extents( 918 struct xfs_btree_cur *cur, 919 xfs_agblock_t *agbno, 920 xfs_extlen_t *aglen, 921 enum xfs_refc_adjust_op adj) 922 { 923 struct xfs_refcount_irec ext, tmp; 924 int error; 925 int found_rec, found_tmp; 926 xfs_fsblock_t fsbno; 927 928 /* Merging did all the work already. */ 929 if (*aglen == 0) 930 return 0; 931 932 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 933 if (error) 934 goto out_error; 935 936 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 937 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 938 if (error) 939 goto out_error; 940 if (!found_rec) { 941 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 942 ext.rc_blockcount = 0; 943 ext.rc_refcount = 0; 944 } 945 946 /* 947 * Deal with a hole in the refcount tree; if a file maps to 948 * these blocks and there's no refcountbt record, pretend that 949 * there is one with refcount == 1. 950 */ 951 if (ext.rc_startblock != *agbno) { 952 tmp.rc_startblock = *agbno; 953 tmp.rc_blockcount = min(*aglen, 954 ext.rc_startblock - *agbno); 955 tmp.rc_refcount = 1 + adj; 956 trace_xfs_refcount_modify_extent(cur->bc_mp, 957 cur->bc_ag.pag->pag_agno, &tmp); 958 959 /* 960 * Either cover the hole (increment) or 961 * delete the range (decrement). 962 */ 963 if (tmp.rc_refcount) { 964 error = xfs_refcount_insert(cur, &tmp, 965 &found_tmp); 966 if (error) 967 goto out_error; 968 if (XFS_IS_CORRUPT(cur->bc_mp, 969 found_tmp != 1)) { 970 error = -EFSCORRUPTED; 971 goto out_error; 972 } 973 cur->bc_ag.refc.nr_ops++; 974 } else { 975 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 976 cur->bc_ag.pag->pag_agno, 977 tmp.rc_startblock); 978 xfs_free_extent_later(cur->bc_tp, fsbno, 979 tmp.rc_blockcount, NULL); 980 } 981 982 (*agbno) += tmp.rc_blockcount; 983 (*aglen) -= tmp.rc_blockcount; 984 985 error = xfs_refcount_lookup_ge(cur, *agbno, 986 &found_rec); 987 if (error) 988 goto out_error; 989 } 990 991 /* Stop if there's nothing left to modify */ 992 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 993 break; 994 995 /* 996 * Adjust the reference count and either update the tree 997 * (incr) or free the blocks (decr). 998 */ 999 if (ext.rc_refcount == MAXREFCOUNT) 1000 goto skip; 1001 ext.rc_refcount += adj; 1002 trace_xfs_refcount_modify_extent(cur->bc_mp, 1003 cur->bc_ag.pag->pag_agno, &ext); 1004 if (ext.rc_refcount > 1) { 1005 error = xfs_refcount_update(cur, &ext); 1006 if (error) 1007 goto out_error; 1008 cur->bc_ag.refc.nr_ops++; 1009 } else if (ext.rc_refcount == 1) { 1010 error = xfs_refcount_delete(cur, &found_rec); 1011 if (error) 1012 goto out_error; 1013 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1014 error = -EFSCORRUPTED; 1015 goto out_error; 1016 } 1017 cur->bc_ag.refc.nr_ops++; 1018 goto advloop; 1019 } else { 1020 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1021 cur->bc_ag.pag->pag_agno, 1022 ext.rc_startblock); 1023 xfs_free_extent_later(cur->bc_tp, fsbno, 1024 ext.rc_blockcount, NULL); 1025 } 1026 1027 skip: 1028 error = xfs_btree_increment(cur, 0, &found_rec); 1029 if (error) 1030 goto out_error; 1031 1032 advloop: 1033 (*agbno) += ext.rc_blockcount; 1034 (*aglen) -= ext.rc_blockcount; 1035 } 1036 1037 return error; 1038 out_error: 1039 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1040 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1041 return error; 1042 } 1043 1044 /* Adjust the reference count of a range of AG blocks. */ 1045 STATIC int 1046 xfs_refcount_adjust( 1047 struct xfs_btree_cur *cur, 1048 xfs_agblock_t agbno, 1049 xfs_extlen_t aglen, 1050 xfs_agblock_t *new_agbno, 1051 xfs_extlen_t *new_aglen, 1052 enum xfs_refc_adjust_op adj) 1053 { 1054 bool shape_changed; 1055 int shape_changes = 0; 1056 int error; 1057 1058 *new_agbno = agbno; 1059 *new_aglen = aglen; 1060 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1061 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1062 agbno, aglen); 1063 else 1064 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1065 agbno, aglen); 1066 1067 /* 1068 * Ensure that no rcextents cross the boundary of the adjustment range. 1069 */ 1070 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1071 if (error) 1072 goto out_error; 1073 if (shape_changed) 1074 shape_changes++; 1075 1076 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1077 if (error) 1078 goto out_error; 1079 if (shape_changed) 1080 shape_changes++; 1081 1082 /* 1083 * Try to merge with the left or right extents of the range. 1084 */ 1085 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1086 XFS_FIND_RCEXT_SHARED, &shape_changed); 1087 if (error) 1088 goto out_error; 1089 if (shape_changed) 1090 shape_changes++; 1091 if (shape_changes) 1092 cur->bc_ag.refc.shape_changes++; 1093 1094 /* Now that we've taken care of the ends, adjust the middle extents */ 1095 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); 1096 if (error) 1097 goto out_error; 1098 1099 return 0; 1100 1101 out_error: 1102 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1103 error, _RET_IP_); 1104 return error; 1105 } 1106 1107 /* Clean up after calling xfs_refcount_finish_one. */ 1108 void 1109 xfs_refcount_finish_one_cleanup( 1110 struct xfs_trans *tp, 1111 struct xfs_btree_cur *rcur, 1112 int error) 1113 { 1114 struct xfs_buf *agbp; 1115 1116 if (rcur == NULL) 1117 return; 1118 agbp = rcur->bc_ag.agbp; 1119 xfs_btree_del_cursor(rcur, error); 1120 if (error) 1121 xfs_trans_brelse(tp, agbp); 1122 } 1123 1124 /* 1125 * Process one of the deferred refcount operations. We pass back the 1126 * btree cursor to maintain our lock on the btree between calls. 1127 * This saves time and eliminates a buffer deadlock between the 1128 * superblock and the AGF because we'll always grab them in the same 1129 * order. 1130 */ 1131 int 1132 xfs_refcount_finish_one( 1133 struct xfs_trans *tp, 1134 enum xfs_refcount_intent_type type, 1135 xfs_fsblock_t startblock, 1136 xfs_extlen_t blockcount, 1137 xfs_fsblock_t *new_fsb, 1138 xfs_extlen_t *new_len, 1139 struct xfs_btree_cur **pcur) 1140 { 1141 struct xfs_mount *mp = tp->t_mountp; 1142 struct xfs_btree_cur *rcur; 1143 struct xfs_buf *agbp = NULL; 1144 int error = 0; 1145 xfs_agblock_t bno; 1146 xfs_agblock_t new_agbno; 1147 unsigned long nr_ops = 0; 1148 int shape_changes = 0; 1149 struct xfs_perag *pag; 1150 1151 pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); 1152 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1153 1154 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1155 type, XFS_FSB_TO_AGBNO(mp, startblock), 1156 blockcount); 1157 1158 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { 1159 error = -EIO; 1160 goto out_drop; 1161 } 1162 1163 /* 1164 * If we haven't gotten a cursor or the cursor AG doesn't match 1165 * the startblock, get one now. 1166 */ 1167 rcur = *pcur; 1168 if (rcur != NULL && rcur->bc_ag.pag != pag) { 1169 nr_ops = rcur->bc_ag.refc.nr_ops; 1170 shape_changes = rcur->bc_ag.refc.shape_changes; 1171 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1172 rcur = NULL; 1173 *pcur = NULL; 1174 } 1175 if (rcur == NULL) { 1176 error = xfs_alloc_read_agf(tp->t_mountp, tp, pag->pag_agno, 1177 XFS_ALLOC_FLAG_FREEING, &agbp); 1178 if (error) 1179 goto out_drop; 1180 1181 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1182 rcur->bc_ag.refc.nr_ops = nr_ops; 1183 rcur->bc_ag.refc.shape_changes = shape_changes; 1184 } 1185 *pcur = rcur; 1186 1187 switch (type) { 1188 case XFS_REFCOUNT_INCREASE: 1189 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1190 new_len, XFS_REFCOUNT_ADJUST_INCREASE); 1191 *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1192 break; 1193 case XFS_REFCOUNT_DECREASE: 1194 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1195 new_len, XFS_REFCOUNT_ADJUST_DECREASE); 1196 *new_fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1197 break; 1198 case XFS_REFCOUNT_ALLOC_COW: 1199 *new_fsb = startblock + blockcount; 1200 *new_len = 0; 1201 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1202 break; 1203 case XFS_REFCOUNT_FREE_COW: 1204 *new_fsb = startblock + blockcount; 1205 *new_len = 0; 1206 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1207 break; 1208 default: 1209 ASSERT(0); 1210 error = -EFSCORRUPTED; 1211 } 1212 if (!error && *new_len > 0) 1213 trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, 1214 bno, blockcount, new_agbno, *new_len); 1215 out_drop: 1216 xfs_perag_put(pag); 1217 return error; 1218 } 1219 1220 /* 1221 * Record a refcount intent for later processing. 1222 */ 1223 static void 1224 __xfs_refcount_add( 1225 struct xfs_trans *tp, 1226 enum xfs_refcount_intent_type type, 1227 xfs_fsblock_t startblock, 1228 xfs_extlen_t blockcount) 1229 { 1230 struct xfs_refcount_intent *ri; 1231 1232 trace_xfs_refcount_defer(tp->t_mountp, 1233 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1234 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1235 blockcount); 1236 1237 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1238 GFP_NOFS | __GFP_NOFAIL); 1239 INIT_LIST_HEAD(&ri->ri_list); 1240 ri->ri_type = type; 1241 ri->ri_startblock = startblock; 1242 ri->ri_blockcount = blockcount; 1243 1244 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1245 } 1246 1247 /* 1248 * Increase the reference count of the blocks backing a file's extent. 1249 */ 1250 void 1251 xfs_refcount_increase_extent( 1252 struct xfs_trans *tp, 1253 struct xfs_bmbt_irec *PREV) 1254 { 1255 if (!xfs_has_reflink(tp->t_mountp)) 1256 return; 1257 1258 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1259 PREV->br_blockcount); 1260 } 1261 1262 /* 1263 * Decrease the reference count of the blocks backing a file's extent. 1264 */ 1265 void 1266 xfs_refcount_decrease_extent( 1267 struct xfs_trans *tp, 1268 struct xfs_bmbt_irec *PREV) 1269 { 1270 if (!xfs_has_reflink(tp->t_mountp)) 1271 return; 1272 1273 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1274 PREV->br_blockcount); 1275 } 1276 1277 /* 1278 * Given an AG extent, find the lowest-numbered run of shared blocks 1279 * within that range and return the range in fbno/flen. If 1280 * find_end_of_shared is set, return the longest contiguous extent of 1281 * shared blocks; if not, just return the first extent we find. If no 1282 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1283 * and 0, respectively. 1284 */ 1285 int 1286 xfs_refcount_find_shared( 1287 struct xfs_btree_cur *cur, 1288 xfs_agblock_t agbno, 1289 xfs_extlen_t aglen, 1290 xfs_agblock_t *fbno, 1291 xfs_extlen_t *flen, 1292 bool find_end_of_shared) 1293 { 1294 struct xfs_refcount_irec tmp; 1295 int i; 1296 int have; 1297 int error; 1298 1299 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1300 agbno, aglen); 1301 1302 /* By default, skip the whole range */ 1303 *fbno = NULLAGBLOCK; 1304 *flen = 0; 1305 1306 /* Try to find a refcount extent that crosses the start */ 1307 error = xfs_refcount_lookup_le(cur, agbno, &have); 1308 if (error) 1309 goto out_error; 1310 if (!have) { 1311 /* No left extent, look at the next one */ 1312 error = xfs_btree_increment(cur, 0, &have); 1313 if (error) 1314 goto out_error; 1315 if (!have) 1316 goto done; 1317 } 1318 error = xfs_refcount_get_rec(cur, &tmp, &i); 1319 if (error) 1320 goto out_error; 1321 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1322 error = -EFSCORRUPTED; 1323 goto out_error; 1324 } 1325 1326 /* If the extent ends before the start, look at the next one */ 1327 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1328 error = xfs_btree_increment(cur, 0, &have); 1329 if (error) 1330 goto out_error; 1331 if (!have) 1332 goto done; 1333 error = xfs_refcount_get_rec(cur, &tmp, &i); 1334 if (error) 1335 goto out_error; 1336 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1337 error = -EFSCORRUPTED; 1338 goto out_error; 1339 } 1340 } 1341 1342 /* If the extent starts after the range we want, bail out */ 1343 if (tmp.rc_startblock >= agbno + aglen) 1344 goto done; 1345 1346 /* We found the start of a shared extent! */ 1347 if (tmp.rc_startblock < agbno) { 1348 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1349 tmp.rc_startblock = agbno; 1350 } 1351 1352 *fbno = tmp.rc_startblock; 1353 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1354 if (!find_end_of_shared) 1355 goto done; 1356 1357 /* Otherwise, find the end of this shared extent */ 1358 while (*fbno + *flen < agbno + aglen) { 1359 error = xfs_btree_increment(cur, 0, &have); 1360 if (error) 1361 goto out_error; 1362 if (!have) 1363 break; 1364 error = xfs_refcount_get_rec(cur, &tmp, &i); 1365 if (error) 1366 goto out_error; 1367 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1368 error = -EFSCORRUPTED; 1369 goto out_error; 1370 } 1371 if (tmp.rc_startblock >= agbno + aglen || 1372 tmp.rc_startblock != *fbno + *flen) 1373 break; 1374 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1375 } 1376 1377 done: 1378 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1379 cur->bc_ag.pag->pag_agno, *fbno, *flen); 1380 1381 out_error: 1382 if (error) 1383 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1384 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1385 return error; 1386 } 1387 1388 /* 1389 * Recovering CoW Blocks After a Crash 1390 * 1391 * Due to the way that the copy on write mechanism works, there's a window of 1392 * opportunity in which we can lose track of allocated blocks during a crash. 1393 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1394 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1395 * no longer in the free space btree but are not otherwise recorded anywhere 1396 * until the write completes and the blocks are mapped into the file. A crash 1397 * in between allocation and remapping results in the replacement blocks being 1398 * lost. This situation is exacerbated by the CoW extent size hint because 1399 * allocations can hang around for long time. 1400 * 1401 * However, there is a place where we can record these allocations before they 1402 * become mappings -- the reference count btree. The btree does not record 1403 * extents with refcount == 1, so we can record allocations with a refcount of 1404 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1405 * no conflict with shared block records. These mappings should be created 1406 * when we allocate blocks to the CoW fork and deleted when they're removed 1407 * from the CoW fork. 1408 * 1409 * Minor nit: records for in-progress CoW allocations and records for shared 1410 * extents must never be merged, to preserve the property that (except for CoW 1411 * allocations) there are no refcount btree entries with refcount == 1. The 1412 * only time this could potentially happen is when unsharing a block that's 1413 * adjacent to CoW allocations, so we must be careful to avoid this. 1414 * 1415 * At mount time we recover lost CoW allocations by searching the refcount 1416 * btree for these refcount == 1 mappings. These represent CoW allocations 1417 * that were in progress at the time the filesystem went down, so we can free 1418 * them to get the space back. 1419 * 1420 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1421 * several reasons -- first, EFIs pin the tail of the log and would have to be 1422 * periodically relogged to avoid filling up the log. Second, CoW completions 1423 * will have to file an EFD and create new EFIs for whatever remains in the 1424 * CoW fork; this partially takes care of (1) but extent-size reservations 1425 * will have to periodically relog even if there's no writeout in progress. 1426 * This can happen if the CoW extent size hint is set, which you really want. 1427 * Third, EFIs cannot currently be automatically relogged into newer 1428 * transactions to advance the log tail. Fourth, stuffing the log full of 1429 * EFIs places an upper bound on the number of CoW allocations that can be 1430 * held filesystem-wide at any given time. Recording them in the refcount 1431 * btree doesn't require us to maintain any state in memory and doesn't pin 1432 * the log. 1433 */ 1434 /* 1435 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1436 * in that they're not referenced anywhere else in the filesystem, so we 1437 * stash them in the refcount btree with a refcount of 1 until either file 1438 * remapping (or CoW cancellation) happens. 1439 */ 1440 STATIC int 1441 xfs_refcount_adjust_cow_extents( 1442 struct xfs_btree_cur *cur, 1443 xfs_agblock_t agbno, 1444 xfs_extlen_t aglen, 1445 enum xfs_refc_adjust_op adj) 1446 { 1447 struct xfs_refcount_irec ext, tmp; 1448 int error; 1449 int found_rec, found_tmp; 1450 1451 if (aglen == 0) 1452 return 0; 1453 1454 /* Find any overlapping refcount records */ 1455 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1456 if (error) 1457 goto out_error; 1458 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1459 if (error) 1460 goto out_error; 1461 if (!found_rec) { 1462 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1463 XFS_REFC_COW_START; 1464 ext.rc_blockcount = 0; 1465 ext.rc_refcount = 0; 1466 } 1467 1468 switch (adj) { 1469 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1470 /* Adding a CoW reservation, there should be nothing here. */ 1471 if (XFS_IS_CORRUPT(cur->bc_mp, 1472 agbno + aglen > ext.rc_startblock)) { 1473 error = -EFSCORRUPTED; 1474 goto out_error; 1475 } 1476 1477 tmp.rc_startblock = agbno; 1478 tmp.rc_blockcount = aglen; 1479 tmp.rc_refcount = 1; 1480 trace_xfs_refcount_modify_extent(cur->bc_mp, 1481 cur->bc_ag.pag->pag_agno, &tmp); 1482 1483 error = xfs_refcount_insert(cur, &tmp, 1484 &found_tmp); 1485 if (error) 1486 goto out_error; 1487 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1488 error = -EFSCORRUPTED; 1489 goto out_error; 1490 } 1491 break; 1492 case XFS_REFCOUNT_ADJUST_COW_FREE: 1493 /* Removing a CoW reservation, there should be one extent. */ 1494 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1495 error = -EFSCORRUPTED; 1496 goto out_error; 1497 } 1498 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1499 error = -EFSCORRUPTED; 1500 goto out_error; 1501 } 1502 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1503 error = -EFSCORRUPTED; 1504 goto out_error; 1505 } 1506 1507 ext.rc_refcount = 0; 1508 trace_xfs_refcount_modify_extent(cur->bc_mp, 1509 cur->bc_ag.pag->pag_agno, &ext); 1510 error = xfs_refcount_delete(cur, &found_rec); 1511 if (error) 1512 goto out_error; 1513 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1514 error = -EFSCORRUPTED; 1515 goto out_error; 1516 } 1517 break; 1518 default: 1519 ASSERT(0); 1520 } 1521 1522 return error; 1523 out_error: 1524 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1525 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1526 return error; 1527 } 1528 1529 /* 1530 * Add or remove refcount btree entries for CoW reservations. 1531 */ 1532 STATIC int 1533 xfs_refcount_adjust_cow( 1534 struct xfs_btree_cur *cur, 1535 xfs_agblock_t agbno, 1536 xfs_extlen_t aglen, 1537 enum xfs_refc_adjust_op adj) 1538 { 1539 bool shape_changed; 1540 int error; 1541 1542 agbno += XFS_REFC_COW_START; 1543 1544 /* 1545 * Ensure that no rcextents cross the boundary of the adjustment range. 1546 */ 1547 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1548 if (error) 1549 goto out_error; 1550 1551 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1552 if (error) 1553 goto out_error; 1554 1555 /* 1556 * Try to merge with the left or right extents of the range. 1557 */ 1558 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1559 XFS_FIND_RCEXT_COW, &shape_changed); 1560 if (error) 1561 goto out_error; 1562 1563 /* Now that we've taken care of the ends, adjust the middle extents */ 1564 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1565 if (error) 1566 goto out_error; 1567 1568 return 0; 1569 1570 out_error: 1571 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1572 error, _RET_IP_); 1573 return error; 1574 } 1575 1576 /* 1577 * Record a CoW allocation in the refcount btree. 1578 */ 1579 STATIC int 1580 __xfs_refcount_cow_alloc( 1581 struct xfs_btree_cur *rcur, 1582 xfs_agblock_t agbno, 1583 xfs_extlen_t aglen) 1584 { 1585 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1586 agbno, aglen); 1587 1588 /* Add refcount btree reservation */ 1589 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1590 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1591 } 1592 1593 /* 1594 * Remove a CoW allocation from the refcount btree. 1595 */ 1596 STATIC int 1597 __xfs_refcount_cow_free( 1598 struct xfs_btree_cur *rcur, 1599 xfs_agblock_t agbno, 1600 xfs_extlen_t aglen) 1601 { 1602 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1603 agbno, aglen); 1604 1605 /* Remove refcount btree reservation */ 1606 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1607 XFS_REFCOUNT_ADJUST_COW_FREE); 1608 } 1609 1610 /* Record a CoW staging extent in the refcount btree. */ 1611 void 1612 xfs_refcount_alloc_cow_extent( 1613 struct xfs_trans *tp, 1614 xfs_fsblock_t fsb, 1615 xfs_extlen_t len) 1616 { 1617 struct xfs_mount *mp = tp->t_mountp; 1618 1619 if (!xfs_has_reflink(mp)) 1620 return; 1621 1622 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1623 1624 /* Add rmap entry */ 1625 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1626 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1627 } 1628 1629 /* Forget a CoW staging event in the refcount btree. */ 1630 void 1631 xfs_refcount_free_cow_extent( 1632 struct xfs_trans *tp, 1633 xfs_fsblock_t fsb, 1634 xfs_extlen_t len) 1635 { 1636 struct xfs_mount *mp = tp->t_mountp; 1637 1638 if (!xfs_has_reflink(mp)) 1639 return; 1640 1641 /* Remove rmap entry */ 1642 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1643 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1644 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1645 } 1646 1647 struct xfs_refcount_recovery { 1648 struct list_head rr_list; 1649 struct xfs_refcount_irec rr_rrec; 1650 }; 1651 1652 /* Stuff an extent on the recovery list. */ 1653 STATIC int 1654 xfs_refcount_recover_extent( 1655 struct xfs_btree_cur *cur, 1656 const union xfs_btree_rec *rec, 1657 void *priv) 1658 { 1659 struct list_head *debris = priv; 1660 struct xfs_refcount_recovery *rr; 1661 1662 if (XFS_IS_CORRUPT(cur->bc_mp, 1663 be32_to_cpu(rec->refc.rc_refcount) != 1)) 1664 return -EFSCORRUPTED; 1665 1666 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); 1667 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1668 list_add_tail(&rr->rr_list, debris); 1669 1670 return 0; 1671 } 1672 1673 /* Find and remove leftover CoW reservations. */ 1674 int 1675 xfs_refcount_recover_cow_leftovers( 1676 struct xfs_mount *mp, 1677 struct xfs_perag *pag) 1678 { 1679 struct xfs_trans *tp; 1680 struct xfs_btree_cur *cur; 1681 struct xfs_buf *agbp; 1682 struct xfs_refcount_recovery *rr, *n; 1683 struct list_head debris; 1684 union xfs_btree_irec low; 1685 union xfs_btree_irec high; 1686 xfs_fsblock_t fsb; 1687 xfs_agblock_t agbno; 1688 int error; 1689 1690 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1691 return -EOPNOTSUPP; 1692 1693 INIT_LIST_HEAD(&debris); 1694 1695 /* 1696 * In this first part, we use an empty transaction to gather up 1697 * all the leftover CoW extents so that we can subsequently 1698 * delete them. The empty transaction is used to avoid 1699 * a buffer lock deadlock if there happens to be a loop in the 1700 * refcountbt because we're allowed to re-grab a buffer that is 1701 * already attached to our transaction. When we're done 1702 * recording the CoW debris we cancel the (empty) transaction 1703 * and everything goes away cleanly. 1704 */ 1705 error = xfs_trans_alloc_empty(mp, &tp); 1706 if (error) 1707 return error; 1708 1709 error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); 1710 if (error) 1711 goto out_trans; 1712 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1713 1714 /* Find all the leftover CoW staging extents. */ 1715 memset(&low, 0, sizeof(low)); 1716 memset(&high, 0, sizeof(high)); 1717 low.rc.rc_startblock = XFS_REFC_COW_START; 1718 high.rc.rc_startblock = -1U; 1719 error = xfs_btree_query_range(cur, &low, &high, 1720 xfs_refcount_recover_extent, &debris); 1721 xfs_btree_del_cursor(cur, error); 1722 xfs_trans_brelse(tp, agbp); 1723 xfs_trans_cancel(tp); 1724 if (error) 1725 goto out_free; 1726 1727 /* Now iterate the list to free the leftovers */ 1728 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1729 /* Set up transaction. */ 1730 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1731 if (error) 1732 goto out_free; 1733 1734 trace_xfs_refcount_recover_extent(mp, pag->pag_agno, 1735 &rr->rr_rrec); 1736 1737 /* Free the orphan record */ 1738 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1739 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno); 1740 xfs_refcount_free_cow_extent(tp, fsb, 1741 rr->rr_rrec.rc_blockcount); 1742 1743 /* Free the block. */ 1744 xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1745 1746 error = xfs_trans_commit(tp); 1747 if (error) 1748 goto out_free; 1749 1750 list_del(&rr->rr_list); 1751 kmem_free(rr); 1752 } 1753 1754 return error; 1755 out_trans: 1756 xfs_trans_cancel(tp); 1757 out_free: 1758 /* Free the leftover list */ 1759 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1760 list_del(&rr->rr_list); 1761 kmem_free(rr); 1762 } 1763 return error; 1764 } 1765 1766 /* Is there a record covering a given extent? */ 1767 int 1768 xfs_refcount_has_record( 1769 struct xfs_btree_cur *cur, 1770 xfs_agblock_t bno, 1771 xfs_extlen_t len, 1772 bool *exists) 1773 { 1774 union xfs_btree_irec low; 1775 union xfs_btree_irec high; 1776 1777 memset(&low, 0, sizeof(low)); 1778 low.rc.rc_startblock = bno; 1779 memset(&high, 0xFF, sizeof(high)); 1780 high.rc.rc_startblock = bno + len - 1; 1781 1782 return xfs_btree_has_record(cur, &low, &high, exists); 1783 } 1784 1785 int __init 1786 xfs_refcount_intent_init_cache(void) 1787 { 1788 xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", 1789 sizeof(struct xfs_refcount_intent), 1790 0, 0, NULL); 1791 1792 return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; 1793 } 1794 1795 void 1796 xfs_refcount_intent_destroy_cache(void) 1797 { 1798 kmem_cache_destroy(xfs_refcount_intent_cache); 1799 xfs_refcount_intent_cache = NULL; 1800 } 1801