1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 27 struct kmem_cache *xfs_refcount_intent_cache; 28 29 /* Allowable refcount adjustment amounts. */ 30 enum xfs_refc_adjust_op { 31 XFS_REFCOUNT_ADJUST_INCREASE = 1, 32 XFS_REFCOUNT_ADJUST_DECREASE = -1, 33 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 34 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 35 }; 36 37 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 38 xfs_agblock_t agbno, xfs_extlen_t aglen); 39 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 40 xfs_agblock_t agbno, xfs_extlen_t aglen); 41 42 /* 43 * Look up the first record less than or equal to [bno, len] in the btree 44 * given by cur. 45 */ 46 int 47 xfs_refcount_lookup_le( 48 struct xfs_btree_cur *cur, 49 enum xfs_refc_domain domain, 50 xfs_agblock_t bno, 51 int *stat) 52 { 53 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 54 xfs_refcount_encode_startblock(bno, domain), 55 XFS_LOOKUP_LE); 56 cur->bc_rec.rc.rc_startblock = bno; 57 cur->bc_rec.rc.rc_blockcount = 0; 58 cur->bc_rec.rc.rc_domain = domain; 59 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 60 } 61 62 /* 63 * Look up the first record greater than or equal to [bno, len] in the btree 64 * given by cur. 65 */ 66 int 67 xfs_refcount_lookup_ge( 68 struct xfs_btree_cur *cur, 69 enum xfs_refc_domain domain, 70 xfs_agblock_t bno, 71 int *stat) 72 { 73 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 74 xfs_refcount_encode_startblock(bno, domain), 75 XFS_LOOKUP_GE); 76 cur->bc_rec.rc.rc_startblock = bno; 77 cur->bc_rec.rc.rc_blockcount = 0; 78 cur->bc_rec.rc.rc_domain = domain; 79 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 80 } 81 82 /* 83 * Look up the first record equal to [bno, len] in the btree 84 * given by cur. 85 */ 86 int 87 xfs_refcount_lookup_eq( 88 struct xfs_btree_cur *cur, 89 enum xfs_refc_domain domain, 90 xfs_agblock_t bno, 91 int *stat) 92 { 93 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 94 xfs_refcount_encode_startblock(bno, domain), 95 XFS_LOOKUP_LE); 96 cur->bc_rec.rc.rc_startblock = bno; 97 cur->bc_rec.rc.rc_blockcount = 0; 98 cur->bc_rec.rc.rc_domain = domain; 99 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 100 } 101 102 /* Convert on-disk record to in-core format. */ 103 void 104 xfs_refcount_btrec_to_irec( 105 const union xfs_btree_rec *rec, 106 struct xfs_refcount_irec *irec) 107 { 108 uint32_t start; 109 110 start = be32_to_cpu(rec->refc.rc_startblock); 111 if (start & XFS_REFC_COWFLAG) { 112 start &= ~XFS_REFC_COWFLAG; 113 irec->rc_domain = XFS_REFC_DOMAIN_COW; 114 } else { 115 irec->rc_domain = XFS_REFC_DOMAIN_SHARED; 116 } 117 118 irec->rc_startblock = start; 119 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 120 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 121 } 122 123 /* 124 * Get the data from the pointed-to record. 125 */ 126 int 127 xfs_refcount_get_rec( 128 struct xfs_btree_cur *cur, 129 struct xfs_refcount_irec *irec, 130 int *stat) 131 { 132 struct xfs_mount *mp = cur->bc_mp; 133 struct xfs_perag *pag = cur->bc_ag.pag; 134 union xfs_btree_rec *rec; 135 int error; 136 137 error = xfs_btree_get_rec(cur, &rec, stat); 138 if (error || !*stat) 139 return error; 140 141 xfs_refcount_btrec_to_irec(rec, irec); 142 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 143 goto out_bad_rec; 144 145 if (!xfs_refcount_check_domain(irec)) 146 goto out_bad_rec; 147 148 /* check for valid extent range, including overflow */ 149 if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) 150 goto out_bad_rec; 151 152 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 153 goto out_bad_rec; 154 155 trace_xfs_refcount_get(cur->bc_mp, pag->pag_agno, irec); 156 return 0; 157 158 out_bad_rec: 159 xfs_warn(mp, 160 "Refcount BTree record corruption in AG %d detected!", 161 pag->pag_agno); 162 xfs_warn(mp, 163 "Start block 0x%x, block count 0x%x, references 0x%x", 164 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 165 return -EFSCORRUPTED; 166 } 167 168 /* 169 * Update the record referred to by cur to the value given 170 * by [bno, len, refcount]. 171 * This either works (return 0) or gets an EFSCORRUPTED error. 172 */ 173 STATIC int 174 xfs_refcount_update( 175 struct xfs_btree_cur *cur, 176 struct xfs_refcount_irec *irec) 177 { 178 union xfs_btree_rec rec; 179 uint32_t start; 180 int error; 181 182 trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 183 184 start = xfs_refcount_encode_startblock(irec->rc_startblock, 185 irec->rc_domain); 186 rec.refc.rc_startblock = cpu_to_be32(start); 187 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 188 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 189 190 error = xfs_btree_update(cur, &rec); 191 if (error) 192 trace_xfs_refcount_update_error(cur->bc_mp, 193 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 194 return error; 195 } 196 197 /* 198 * Insert the record referred to by cur to the value given 199 * by [bno, len, refcount]. 200 * This either works (return 0) or gets an EFSCORRUPTED error. 201 */ 202 int 203 xfs_refcount_insert( 204 struct xfs_btree_cur *cur, 205 struct xfs_refcount_irec *irec, 206 int *i) 207 { 208 int error; 209 210 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 211 212 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 213 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 214 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 215 cur->bc_rec.rc.rc_domain = irec->rc_domain; 216 217 error = xfs_btree_insert(cur, i); 218 if (error) 219 goto out_error; 220 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 221 error = -EFSCORRUPTED; 222 goto out_error; 223 } 224 225 out_error: 226 if (error) 227 trace_xfs_refcount_insert_error(cur->bc_mp, 228 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 229 return error; 230 } 231 232 /* 233 * Remove the record referred to by cur, then set the pointer to the spot 234 * where the record could be re-inserted, in case we want to increment or 235 * decrement the cursor. 236 * This either works (return 0) or gets an EFSCORRUPTED error. 237 */ 238 STATIC int 239 xfs_refcount_delete( 240 struct xfs_btree_cur *cur, 241 int *i) 242 { 243 struct xfs_refcount_irec irec; 244 int found_rec; 245 int error; 246 247 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 248 if (error) 249 goto out_error; 250 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 251 error = -EFSCORRUPTED; 252 goto out_error; 253 } 254 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); 255 error = xfs_btree_delete(cur, i); 256 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 257 error = -EFSCORRUPTED; 258 goto out_error; 259 } 260 if (error) 261 goto out_error; 262 error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, 263 &found_rec); 264 out_error: 265 if (error) 266 trace_xfs_refcount_delete_error(cur->bc_mp, 267 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 268 return error; 269 } 270 271 /* 272 * Adjusting the Reference Count 273 * 274 * As stated elsewhere, the reference count btree (refcbt) stores 275 * >1 reference counts for extents of physical blocks. In this 276 * operation, we're either raising or lowering the reference count of 277 * some subrange stored in the tree: 278 * 279 * <------ adjustment range ------> 280 * ----+ +---+-----+ +--+--------+--------- 281 * 2 | | 3 | 4 | |17| 55 | 10 282 * ----+ +---+-----+ +--+--------+--------- 283 * X axis is physical blocks number; 284 * reference counts are the numbers inside the rectangles 285 * 286 * The first thing we need to do is to ensure that there are no 287 * refcount extents crossing either boundary of the range to be 288 * adjusted. For any extent that does cross a boundary, split it into 289 * two extents so that we can increment the refcount of one of the 290 * pieces later: 291 * 292 * <------ adjustment range ------> 293 * ----+ +---+-----+ +--+--------+----+---- 294 * 2 | | 3 | 2 | |17| 55 | 10 | 10 295 * ----+ +---+-----+ +--+--------+----+---- 296 * 297 * For this next step, let's assume that all the physical blocks in 298 * the adjustment range are mapped to a file and are therefore in use 299 * at least once. Therefore, we can infer that any gap in the 300 * refcount tree within the adjustment range represents a physical 301 * extent with refcount == 1: 302 * 303 * <------ adjustment range ------> 304 * ----+---+---+-----+-+--+--------+----+---- 305 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 306 * ----+---+---+-----+-+--+--------+----+---- 307 * ^ 308 * 309 * For each extent that falls within the interval range, figure out 310 * which extent is to the left or the right of that extent. Now we 311 * have a left, current, and right extent. If the new reference count 312 * of the center extent enables us to merge left, center, and right 313 * into one record covering all three, do so. If the center extent is 314 * at the left end of the range, abuts the left extent, and its new 315 * reference count matches the left extent's record, then merge them. 316 * If the center extent is at the right end of the range, abuts the 317 * right extent, and the reference counts match, merge those. In the 318 * example, we can left merge (assuming an increment operation): 319 * 320 * <------ adjustment range ------> 321 * --------+---+-----+-+--+--------+----+---- 322 * 2 | 3 | 2 |1|17| 55 | 10 | 10 323 * --------+---+-----+-+--+--------+----+---- 324 * ^ 325 * 326 * For all other extents within the range, adjust the reference count 327 * or delete it if the refcount falls below 2. If we were 328 * incrementing, the end result looks like this: 329 * 330 * <------ adjustment range ------> 331 * --------+---+-----+-+--+--------+----+---- 332 * 2 | 4 | 3 |2|18| 56 | 11 | 10 333 * --------+---+-----+-+--+--------+----+---- 334 * 335 * The result of a decrement operation looks as such: 336 * 337 * <------ adjustment range ------> 338 * ----+ +---+ +--+--------+----+---- 339 * 2 | | 2 | |16| 54 | 9 | 10 340 * ----+ +---+ +--+--------+----+---- 341 * DDDD 111111DD 342 * 343 * The blocks marked "D" are freed; the blocks marked "1" are only 344 * referenced once and therefore the record is removed from the 345 * refcount btree. 346 */ 347 348 /* Next block after this extent. */ 349 static inline xfs_agblock_t 350 xfs_refc_next( 351 struct xfs_refcount_irec *rc) 352 { 353 return rc->rc_startblock + rc->rc_blockcount; 354 } 355 356 /* 357 * Split a refcount extent that crosses agbno. 358 */ 359 STATIC int 360 xfs_refcount_split_extent( 361 struct xfs_btree_cur *cur, 362 enum xfs_refc_domain domain, 363 xfs_agblock_t agbno, 364 bool *shape_changed) 365 { 366 struct xfs_refcount_irec rcext, tmp; 367 int found_rec; 368 int error; 369 370 *shape_changed = false; 371 error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); 372 if (error) 373 goto out_error; 374 if (!found_rec) 375 return 0; 376 377 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 378 if (error) 379 goto out_error; 380 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 381 error = -EFSCORRUPTED; 382 goto out_error; 383 } 384 if (rcext.rc_domain != domain) 385 return 0; 386 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 387 return 0; 388 389 *shape_changed = true; 390 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 391 &rcext, agbno); 392 393 /* Establish the right extent. */ 394 tmp = rcext; 395 tmp.rc_startblock = agbno; 396 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 397 error = xfs_refcount_update(cur, &tmp); 398 if (error) 399 goto out_error; 400 401 /* Insert the left extent. */ 402 tmp = rcext; 403 tmp.rc_blockcount = agbno - rcext.rc_startblock; 404 error = xfs_refcount_insert(cur, &tmp, &found_rec); 405 if (error) 406 goto out_error; 407 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 408 error = -EFSCORRUPTED; 409 goto out_error; 410 } 411 return error; 412 413 out_error: 414 trace_xfs_refcount_split_extent_error(cur->bc_mp, 415 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 416 return error; 417 } 418 419 /* 420 * Merge the left, center, and right extents. 421 */ 422 STATIC int 423 xfs_refcount_merge_center_extents( 424 struct xfs_btree_cur *cur, 425 struct xfs_refcount_irec *left, 426 struct xfs_refcount_irec *center, 427 struct xfs_refcount_irec *right, 428 unsigned long long extlen, 429 xfs_extlen_t *aglen) 430 { 431 int error; 432 int found_rec; 433 434 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 435 cur->bc_ag.pag->pag_agno, left, center, right); 436 437 ASSERT(left->rc_domain == center->rc_domain); 438 ASSERT(right->rc_domain == center->rc_domain); 439 440 /* 441 * Make sure the center and right extents are not in the btree. 442 * If the center extent was synthesized, the first delete call 443 * removes the right extent and we skip the second deletion. 444 * If center and right were in the btree, then the first delete 445 * call removes the center and the second one removes the right 446 * extent. 447 */ 448 error = xfs_refcount_lookup_ge(cur, center->rc_domain, 449 center->rc_startblock, &found_rec); 450 if (error) 451 goto out_error; 452 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 453 error = -EFSCORRUPTED; 454 goto out_error; 455 } 456 457 error = xfs_refcount_delete(cur, &found_rec); 458 if (error) 459 goto out_error; 460 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 461 error = -EFSCORRUPTED; 462 goto out_error; 463 } 464 465 if (center->rc_refcount > 1) { 466 error = xfs_refcount_delete(cur, &found_rec); 467 if (error) 468 goto out_error; 469 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 470 error = -EFSCORRUPTED; 471 goto out_error; 472 } 473 } 474 475 /* Enlarge the left extent. */ 476 error = xfs_refcount_lookup_le(cur, left->rc_domain, 477 left->rc_startblock, &found_rec); 478 if (error) 479 goto out_error; 480 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 481 error = -EFSCORRUPTED; 482 goto out_error; 483 } 484 485 left->rc_blockcount = extlen; 486 error = xfs_refcount_update(cur, left); 487 if (error) 488 goto out_error; 489 490 *aglen = 0; 491 return error; 492 493 out_error: 494 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 495 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 496 return error; 497 } 498 499 /* 500 * Merge with the left extent. 501 */ 502 STATIC int 503 xfs_refcount_merge_left_extent( 504 struct xfs_btree_cur *cur, 505 struct xfs_refcount_irec *left, 506 struct xfs_refcount_irec *cleft, 507 xfs_agblock_t *agbno, 508 xfs_extlen_t *aglen) 509 { 510 int error; 511 int found_rec; 512 513 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 514 cur->bc_ag.pag->pag_agno, left, cleft); 515 516 ASSERT(left->rc_domain == cleft->rc_domain); 517 518 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 519 if (cleft->rc_refcount > 1) { 520 error = xfs_refcount_lookup_le(cur, cleft->rc_domain, 521 cleft->rc_startblock, &found_rec); 522 if (error) 523 goto out_error; 524 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 525 error = -EFSCORRUPTED; 526 goto out_error; 527 } 528 529 error = xfs_refcount_delete(cur, &found_rec); 530 if (error) 531 goto out_error; 532 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 533 error = -EFSCORRUPTED; 534 goto out_error; 535 } 536 } 537 538 /* Enlarge the left extent. */ 539 error = xfs_refcount_lookup_le(cur, left->rc_domain, 540 left->rc_startblock, &found_rec); 541 if (error) 542 goto out_error; 543 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 544 error = -EFSCORRUPTED; 545 goto out_error; 546 } 547 548 left->rc_blockcount += cleft->rc_blockcount; 549 error = xfs_refcount_update(cur, left); 550 if (error) 551 goto out_error; 552 553 *agbno += cleft->rc_blockcount; 554 *aglen -= cleft->rc_blockcount; 555 return error; 556 557 out_error: 558 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 559 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 560 return error; 561 } 562 563 /* 564 * Merge with the right extent. 565 */ 566 STATIC int 567 xfs_refcount_merge_right_extent( 568 struct xfs_btree_cur *cur, 569 struct xfs_refcount_irec *right, 570 struct xfs_refcount_irec *cright, 571 xfs_extlen_t *aglen) 572 { 573 int error; 574 int found_rec; 575 576 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 577 cur->bc_ag.pag->pag_agno, cright, right); 578 579 ASSERT(right->rc_domain == cright->rc_domain); 580 581 /* 582 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 583 * remove it. 584 */ 585 if (cright->rc_refcount > 1) { 586 error = xfs_refcount_lookup_le(cur, cright->rc_domain, 587 cright->rc_startblock, &found_rec); 588 if (error) 589 goto out_error; 590 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 591 error = -EFSCORRUPTED; 592 goto out_error; 593 } 594 595 error = xfs_refcount_delete(cur, &found_rec); 596 if (error) 597 goto out_error; 598 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 599 error = -EFSCORRUPTED; 600 goto out_error; 601 } 602 } 603 604 /* Enlarge the right extent. */ 605 error = xfs_refcount_lookup_le(cur, right->rc_domain, 606 right->rc_startblock, &found_rec); 607 if (error) 608 goto out_error; 609 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 610 error = -EFSCORRUPTED; 611 goto out_error; 612 } 613 614 right->rc_startblock -= cright->rc_blockcount; 615 right->rc_blockcount += cright->rc_blockcount; 616 error = xfs_refcount_update(cur, right); 617 if (error) 618 goto out_error; 619 620 *aglen -= cright->rc_blockcount; 621 return error; 622 623 out_error: 624 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 625 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 626 return error; 627 } 628 629 /* 630 * Find the left extent and the one after it (cleft). This function assumes 631 * that we've already split any extent crossing agbno. 632 */ 633 STATIC int 634 xfs_refcount_find_left_extents( 635 struct xfs_btree_cur *cur, 636 struct xfs_refcount_irec *left, 637 struct xfs_refcount_irec *cleft, 638 enum xfs_refc_domain domain, 639 xfs_agblock_t agbno, 640 xfs_extlen_t aglen) 641 { 642 struct xfs_refcount_irec tmp; 643 int error; 644 int found_rec; 645 646 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 647 error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); 648 if (error) 649 goto out_error; 650 if (!found_rec) 651 return 0; 652 653 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 654 if (error) 655 goto out_error; 656 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 657 error = -EFSCORRUPTED; 658 goto out_error; 659 } 660 661 if (tmp.rc_domain != domain) 662 return 0; 663 if (xfs_refc_next(&tmp) != agbno) 664 return 0; 665 /* We have a left extent; retrieve (or invent) the next right one */ 666 *left = tmp; 667 668 error = xfs_btree_increment(cur, 0, &found_rec); 669 if (error) 670 goto out_error; 671 if (found_rec) { 672 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 673 if (error) 674 goto out_error; 675 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 676 error = -EFSCORRUPTED; 677 goto out_error; 678 } 679 680 if (tmp.rc_domain != domain) 681 goto not_found; 682 683 /* if tmp starts at the end of our range, just use that */ 684 if (tmp.rc_startblock == agbno) 685 *cleft = tmp; 686 else { 687 /* 688 * There's a gap in the refcntbt at the start of the 689 * range we're interested in (refcount == 1) so 690 * synthesize the implied extent and pass it back. 691 * We assume here that the agbno/aglen range was 692 * passed in from a data fork extent mapping and 693 * therefore is allocated to exactly one owner. 694 */ 695 cleft->rc_startblock = agbno; 696 cleft->rc_blockcount = min(aglen, 697 tmp.rc_startblock - agbno); 698 cleft->rc_refcount = 1; 699 cleft->rc_domain = domain; 700 } 701 } else { 702 not_found: 703 /* 704 * No extents, so pretend that there's one covering the whole 705 * range. 706 */ 707 cleft->rc_startblock = agbno; 708 cleft->rc_blockcount = aglen; 709 cleft->rc_refcount = 1; 710 cleft->rc_domain = domain; 711 } 712 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 713 left, cleft, agbno); 714 return error; 715 716 out_error: 717 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 718 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 719 return error; 720 } 721 722 /* 723 * Find the right extent and the one before it (cright). This function 724 * assumes that we've already split any extents crossing agbno + aglen. 725 */ 726 STATIC int 727 xfs_refcount_find_right_extents( 728 struct xfs_btree_cur *cur, 729 struct xfs_refcount_irec *right, 730 struct xfs_refcount_irec *cright, 731 enum xfs_refc_domain domain, 732 xfs_agblock_t agbno, 733 xfs_extlen_t aglen) 734 { 735 struct xfs_refcount_irec tmp; 736 int error; 737 int found_rec; 738 739 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 740 error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); 741 if (error) 742 goto out_error; 743 if (!found_rec) 744 return 0; 745 746 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 747 if (error) 748 goto out_error; 749 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 750 error = -EFSCORRUPTED; 751 goto out_error; 752 } 753 754 if (tmp.rc_domain != domain) 755 return 0; 756 if (tmp.rc_startblock != agbno + aglen) 757 return 0; 758 /* We have a right extent; retrieve (or invent) the next left one */ 759 *right = tmp; 760 761 error = xfs_btree_decrement(cur, 0, &found_rec); 762 if (error) 763 goto out_error; 764 if (found_rec) { 765 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 766 if (error) 767 goto out_error; 768 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 769 error = -EFSCORRUPTED; 770 goto out_error; 771 } 772 773 if (tmp.rc_domain != domain) 774 goto not_found; 775 776 /* if tmp ends at the end of our range, just use that */ 777 if (xfs_refc_next(&tmp) == agbno + aglen) 778 *cright = tmp; 779 else { 780 /* 781 * There's a gap in the refcntbt at the end of the 782 * range we're interested in (refcount == 1) so 783 * create the implied extent and pass it back. 784 * We assume here that the agbno/aglen range was 785 * passed in from a data fork extent mapping and 786 * therefore is allocated to exactly one owner. 787 */ 788 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 789 cright->rc_blockcount = right->rc_startblock - 790 cright->rc_startblock; 791 cright->rc_refcount = 1; 792 cright->rc_domain = domain; 793 } 794 } else { 795 not_found: 796 /* 797 * No extents, so pretend that there's one covering the whole 798 * range. 799 */ 800 cright->rc_startblock = agbno; 801 cright->rc_blockcount = aglen; 802 cright->rc_refcount = 1; 803 cright->rc_domain = domain; 804 } 805 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 806 cright, right, agbno + aglen); 807 return error; 808 809 out_error: 810 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 811 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 812 return error; 813 } 814 815 /* Is this extent valid? */ 816 static inline bool 817 xfs_refc_valid( 818 struct xfs_refcount_irec *rc) 819 { 820 return rc->rc_startblock != NULLAGBLOCK; 821 } 822 823 /* 824 * Try to merge with any extents on the boundaries of the adjustment range. 825 */ 826 STATIC int 827 xfs_refcount_merge_extents( 828 struct xfs_btree_cur *cur, 829 enum xfs_refc_domain domain, 830 xfs_agblock_t *agbno, 831 xfs_extlen_t *aglen, 832 enum xfs_refc_adjust_op adjust, 833 bool *shape_changed) 834 { 835 struct xfs_refcount_irec left = {0}, cleft = {0}; 836 struct xfs_refcount_irec cright = {0}, right = {0}; 837 int error; 838 unsigned long long ulen; 839 bool cequal; 840 841 *shape_changed = false; 842 /* 843 * Find the extent just below agbno [left], just above agbno [cleft], 844 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 845 * [right]. 846 */ 847 error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain, 848 *agbno, *aglen); 849 if (error) 850 return error; 851 error = xfs_refcount_find_right_extents(cur, &right, &cright, domain, 852 *agbno, *aglen); 853 if (error) 854 return error; 855 856 /* No left or right extent to merge; exit. */ 857 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 858 return 0; 859 860 cequal = (cleft.rc_startblock == cright.rc_startblock) && 861 (cleft.rc_blockcount == cright.rc_blockcount); 862 863 /* Try to merge left, cleft, and right. cleft must == cright. */ 864 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 865 right.rc_blockcount; 866 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 867 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 868 left.rc_refcount == cleft.rc_refcount + adjust && 869 right.rc_refcount == cleft.rc_refcount + adjust && 870 ulen < MAXREFCEXTLEN) { 871 *shape_changed = true; 872 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 873 &right, ulen, aglen); 874 } 875 876 /* Try to merge left and cleft. */ 877 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 878 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 879 left.rc_refcount == cleft.rc_refcount + adjust && 880 ulen < MAXREFCEXTLEN) { 881 *shape_changed = true; 882 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 883 agbno, aglen); 884 if (error) 885 return error; 886 887 /* 888 * If we just merged left + cleft and cleft == cright, 889 * we no longer have a cright to merge with right. We're done. 890 */ 891 if (cequal) 892 return 0; 893 } 894 895 /* Try to merge cright and right. */ 896 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 897 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 898 right.rc_refcount == cright.rc_refcount + adjust && 899 ulen < MAXREFCEXTLEN) { 900 *shape_changed = true; 901 return xfs_refcount_merge_right_extent(cur, &right, &cright, 902 aglen); 903 } 904 905 return 0; 906 } 907 908 /* 909 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 910 * true incorrectly is a shutdown FS; the penalty for guessing false 911 * incorrectly is more transaction rolls than might be necessary. 912 * Be conservative here. 913 */ 914 static bool 915 xfs_refcount_still_have_space( 916 struct xfs_btree_cur *cur) 917 { 918 unsigned long overhead; 919 920 /* 921 * Worst case estimate: full splits of the free space and rmap btrees 922 * to handle each of the shape changes to the refcount btree. 923 */ 924 overhead = xfs_allocfree_block_count(cur->bc_mp, 925 cur->bc_ag.refc.shape_changes); 926 overhead += cur->bc_mp->m_refc_maxlevels; 927 overhead *= cur->bc_mp->m_sb.sb_blocksize; 928 929 /* 930 * Only allow 2 refcount extent updates per transaction if the 931 * refcount continue update "error" has been injected. 932 */ 933 if (cur->bc_ag.refc.nr_ops > 2 && 934 XFS_TEST_ERROR(false, cur->bc_mp, 935 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 936 return false; 937 938 if (cur->bc_ag.refc.nr_ops == 0) 939 return true; 940 else if (overhead > cur->bc_tp->t_log_res) 941 return false; 942 return cur->bc_tp->t_log_res - overhead > 943 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 944 } 945 946 /* 947 * Adjust the refcounts of middle extents. At this point we should have 948 * split extents that crossed the adjustment range; merged with adjacent 949 * extents; and updated agbno/aglen to reflect the merges. Therefore, 950 * all we have to do is update the extents inside [agbno, agbno + aglen]. 951 */ 952 STATIC int 953 xfs_refcount_adjust_extents( 954 struct xfs_btree_cur *cur, 955 xfs_agblock_t *agbno, 956 xfs_extlen_t *aglen, 957 enum xfs_refc_adjust_op adj) 958 { 959 struct xfs_refcount_irec ext, tmp; 960 int error; 961 int found_rec, found_tmp; 962 xfs_fsblock_t fsbno; 963 964 /* Merging did all the work already. */ 965 if (*aglen == 0) 966 return 0; 967 968 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, 969 &found_rec); 970 if (error) 971 goto out_error; 972 973 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 974 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 975 if (error) 976 goto out_error; 977 if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) { 978 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 979 ext.rc_blockcount = 0; 980 ext.rc_refcount = 0; 981 ext.rc_domain = XFS_REFC_DOMAIN_SHARED; 982 } 983 984 /* 985 * Deal with a hole in the refcount tree; if a file maps to 986 * these blocks and there's no refcountbt record, pretend that 987 * there is one with refcount == 1. 988 */ 989 if (ext.rc_startblock != *agbno) { 990 tmp.rc_startblock = *agbno; 991 tmp.rc_blockcount = min(*aglen, 992 ext.rc_startblock - *agbno); 993 tmp.rc_refcount = 1 + adj; 994 tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; 995 996 trace_xfs_refcount_modify_extent(cur->bc_mp, 997 cur->bc_ag.pag->pag_agno, &tmp); 998 999 /* 1000 * Either cover the hole (increment) or 1001 * delete the range (decrement). 1002 */ 1003 cur->bc_ag.refc.nr_ops++; 1004 if (tmp.rc_refcount) { 1005 error = xfs_refcount_insert(cur, &tmp, 1006 &found_tmp); 1007 if (error) 1008 goto out_error; 1009 if (XFS_IS_CORRUPT(cur->bc_mp, 1010 found_tmp != 1)) { 1011 error = -EFSCORRUPTED; 1012 goto out_error; 1013 } 1014 } else { 1015 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1016 cur->bc_ag.pag->pag_agno, 1017 tmp.rc_startblock); 1018 xfs_free_extent_later(cur->bc_tp, fsbno, 1019 tmp.rc_blockcount, NULL); 1020 } 1021 1022 (*agbno) += tmp.rc_blockcount; 1023 (*aglen) -= tmp.rc_blockcount; 1024 1025 /* Stop if there's nothing left to modify */ 1026 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 1027 break; 1028 1029 /* Move the cursor to the start of ext. */ 1030 error = xfs_refcount_lookup_ge(cur, 1031 XFS_REFC_DOMAIN_SHARED, *agbno, 1032 &found_rec); 1033 if (error) 1034 goto out_error; 1035 } 1036 1037 /* 1038 * A previous step trimmed agbno/aglen such that the end of the 1039 * range would not be in the middle of the record. If this is 1040 * no longer the case, something is seriously wrong with the 1041 * btree. Make sure we never feed the synthesized record into 1042 * the processing loop below. 1043 */ 1044 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || 1045 XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { 1046 error = -EFSCORRUPTED; 1047 goto out_error; 1048 } 1049 1050 /* 1051 * Adjust the reference count and either update the tree 1052 * (incr) or free the blocks (decr). 1053 */ 1054 if (ext.rc_refcount == MAXREFCOUNT) 1055 goto skip; 1056 ext.rc_refcount += adj; 1057 trace_xfs_refcount_modify_extent(cur->bc_mp, 1058 cur->bc_ag.pag->pag_agno, &ext); 1059 cur->bc_ag.refc.nr_ops++; 1060 if (ext.rc_refcount > 1) { 1061 error = xfs_refcount_update(cur, &ext); 1062 if (error) 1063 goto out_error; 1064 } else if (ext.rc_refcount == 1) { 1065 error = xfs_refcount_delete(cur, &found_rec); 1066 if (error) 1067 goto out_error; 1068 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1069 error = -EFSCORRUPTED; 1070 goto out_error; 1071 } 1072 goto advloop; 1073 } else { 1074 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1075 cur->bc_ag.pag->pag_agno, 1076 ext.rc_startblock); 1077 xfs_free_extent_later(cur->bc_tp, fsbno, 1078 ext.rc_blockcount, NULL); 1079 } 1080 1081 skip: 1082 error = xfs_btree_increment(cur, 0, &found_rec); 1083 if (error) 1084 goto out_error; 1085 1086 advloop: 1087 (*agbno) += ext.rc_blockcount; 1088 (*aglen) -= ext.rc_blockcount; 1089 } 1090 1091 return error; 1092 out_error: 1093 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1094 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1095 return error; 1096 } 1097 1098 /* Adjust the reference count of a range of AG blocks. */ 1099 STATIC int 1100 xfs_refcount_adjust( 1101 struct xfs_btree_cur *cur, 1102 xfs_agblock_t agbno, 1103 xfs_extlen_t aglen, 1104 xfs_agblock_t *new_agbno, 1105 xfs_extlen_t *new_aglen, 1106 enum xfs_refc_adjust_op adj) 1107 { 1108 bool shape_changed; 1109 int shape_changes = 0; 1110 int error; 1111 1112 *new_agbno = agbno; 1113 *new_aglen = aglen; 1114 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1115 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1116 agbno, aglen); 1117 else 1118 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1119 agbno, aglen); 1120 1121 /* 1122 * Ensure that no rcextents cross the boundary of the adjustment range. 1123 */ 1124 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1125 agbno, &shape_changed); 1126 if (error) 1127 goto out_error; 1128 if (shape_changed) 1129 shape_changes++; 1130 1131 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1132 agbno + aglen, &shape_changed); 1133 if (error) 1134 goto out_error; 1135 if (shape_changed) 1136 shape_changes++; 1137 1138 /* 1139 * Try to merge with the left or right extents of the range. 1140 */ 1141 error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, 1142 new_agbno, new_aglen, adj, &shape_changed); 1143 if (error) 1144 goto out_error; 1145 if (shape_changed) 1146 shape_changes++; 1147 if (shape_changes) 1148 cur->bc_ag.refc.shape_changes++; 1149 1150 /* Now that we've taken care of the ends, adjust the middle extents */ 1151 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); 1152 if (error) 1153 goto out_error; 1154 1155 return 0; 1156 1157 out_error: 1158 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1159 error, _RET_IP_); 1160 return error; 1161 } 1162 1163 /* Clean up after calling xfs_refcount_finish_one. */ 1164 void 1165 xfs_refcount_finish_one_cleanup( 1166 struct xfs_trans *tp, 1167 struct xfs_btree_cur *rcur, 1168 int error) 1169 { 1170 struct xfs_buf *agbp; 1171 1172 if (rcur == NULL) 1173 return; 1174 agbp = rcur->bc_ag.agbp; 1175 xfs_btree_del_cursor(rcur, error); 1176 if (error) 1177 xfs_trans_brelse(tp, agbp); 1178 } 1179 1180 /* 1181 * Set up a continuation a deferred refcount operation by updating the intent. 1182 * Checks to make sure we're not going to run off the end of the AG. 1183 */ 1184 static inline int 1185 xfs_refcount_continue_op( 1186 struct xfs_btree_cur *cur, 1187 xfs_fsblock_t startblock, 1188 xfs_agblock_t new_agbno, 1189 xfs_extlen_t new_len, 1190 xfs_fsblock_t *new_fsbno) 1191 { 1192 struct xfs_mount *mp = cur->bc_mp; 1193 struct xfs_perag *pag = cur->bc_ag.pag; 1194 1195 if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) 1196 return -EFSCORRUPTED; 1197 1198 *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1199 1200 ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); 1201 ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); 1202 1203 return 0; 1204 } 1205 1206 /* 1207 * Process one of the deferred refcount operations. We pass back the 1208 * btree cursor to maintain our lock on the btree between calls. 1209 * This saves time and eliminates a buffer deadlock between the 1210 * superblock and the AGF because we'll always grab them in the same 1211 * order. 1212 */ 1213 int 1214 xfs_refcount_finish_one( 1215 struct xfs_trans *tp, 1216 enum xfs_refcount_intent_type type, 1217 xfs_fsblock_t startblock, 1218 xfs_extlen_t blockcount, 1219 xfs_fsblock_t *new_fsb, 1220 xfs_extlen_t *new_len, 1221 struct xfs_btree_cur **pcur) 1222 { 1223 struct xfs_mount *mp = tp->t_mountp; 1224 struct xfs_btree_cur *rcur; 1225 struct xfs_buf *agbp = NULL; 1226 int error = 0; 1227 xfs_agblock_t bno; 1228 xfs_agblock_t new_agbno; 1229 unsigned long nr_ops = 0; 1230 int shape_changes = 0; 1231 struct xfs_perag *pag; 1232 1233 pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); 1234 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1235 1236 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1237 type, XFS_FSB_TO_AGBNO(mp, startblock), 1238 blockcount); 1239 1240 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { 1241 error = -EIO; 1242 goto out_drop; 1243 } 1244 1245 /* 1246 * If we haven't gotten a cursor or the cursor AG doesn't match 1247 * the startblock, get one now. 1248 */ 1249 rcur = *pcur; 1250 if (rcur != NULL && rcur->bc_ag.pag != pag) { 1251 nr_ops = rcur->bc_ag.refc.nr_ops; 1252 shape_changes = rcur->bc_ag.refc.shape_changes; 1253 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1254 rcur = NULL; 1255 *pcur = NULL; 1256 } 1257 if (rcur == NULL) { 1258 error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING, 1259 &agbp); 1260 if (error) 1261 goto out_drop; 1262 1263 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1264 rcur->bc_ag.refc.nr_ops = nr_ops; 1265 rcur->bc_ag.refc.shape_changes = shape_changes; 1266 } 1267 *pcur = rcur; 1268 1269 switch (type) { 1270 case XFS_REFCOUNT_INCREASE: 1271 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1272 new_len, XFS_REFCOUNT_ADJUST_INCREASE); 1273 if (error) 1274 goto out_drop; 1275 if (*new_len > 0) 1276 error = xfs_refcount_continue_op(rcur, startblock, 1277 new_agbno, *new_len, new_fsb); 1278 break; 1279 case XFS_REFCOUNT_DECREASE: 1280 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1281 new_len, XFS_REFCOUNT_ADJUST_DECREASE); 1282 if (error) 1283 goto out_drop; 1284 if (*new_len > 0) 1285 error = xfs_refcount_continue_op(rcur, startblock, 1286 new_agbno, *new_len, new_fsb); 1287 break; 1288 case XFS_REFCOUNT_ALLOC_COW: 1289 *new_fsb = startblock + blockcount; 1290 *new_len = 0; 1291 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1292 break; 1293 case XFS_REFCOUNT_FREE_COW: 1294 *new_fsb = startblock + blockcount; 1295 *new_len = 0; 1296 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1297 break; 1298 default: 1299 ASSERT(0); 1300 error = -EFSCORRUPTED; 1301 } 1302 if (!error && *new_len > 0) 1303 trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, 1304 bno, blockcount, new_agbno, *new_len); 1305 out_drop: 1306 xfs_perag_put(pag); 1307 return error; 1308 } 1309 1310 /* 1311 * Record a refcount intent for later processing. 1312 */ 1313 static void 1314 __xfs_refcount_add( 1315 struct xfs_trans *tp, 1316 enum xfs_refcount_intent_type type, 1317 xfs_fsblock_t startblock, 1318 xfs_extlen_t blockcount) 1319 { 1320 struct xfs_refcount_intent *ri; 1321 1322 trace_xfs_refcount_defer(tp->t_mountp, 1323 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1324 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1325 blockcount); 1326 1327 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1328 GFP_NOFS | __GFP_NOFAIL); 1329 INIT_LIST_HEAD(&ri->ri_list); 1330 ri->ri_type = type; 1331 ri->ri_startblock = startblock; 1332 ri->ri_blockcount = blockcount; 1333 1334 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1335 } 1336 1337 /* 1338 * Increase the reference count of the blocks backing a file's extent. 1339 */ 1340 void 1341 xfs_refcount_increase_extent( 1342 struct xfs_trans *tp, 1343 struct xfs_bmbt_irec *PREV) 1344 { 1345 if (!xfs_has_reflink(tp->t_mountp)) 1346 return; 1347 1348 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1349 PREV->br_blockcount); 1350 } 1351 1352 /* 1353 * Decrease the reference count of the blocks backing a file's extent. 1354 */ 1355 void 1356 xfs_refcount_decrease_extent( 1357 struct xfs_trans *tp, 1358 struct xfs_bmbt_irec *PREV) 1359 { 1360 if (!xfs_has_reflink(tp->t_mountp)) 1361 return; 1362 1363 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1364 PREV->br_blockcount); 1365 } 1366 1367 /* 1368 * Given an AG extent, find the lowest-numbered run of shared blocks 1369 * within that range and return the range in fbno/flen. If 1370 * find_end_of_shared is set, return the longest contiguous extent of 1371 * shared blocks; if not, just return the first extent we find. If no 1372 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1373 * and 0, respectively. 1374 */ 1375 int 1376 xfs_refcount_find_shared( 1377 struct xfs_btree_cur *cur, 1378 xfs_agblock_t agbno, 1379 xfs_extlen_t aglen, 1380 xfs_agblock_t *fbno, 1381 xfs_extlen_t *flen, 1382 bool find_end_of_shared) 1383 { 1384 struct xfs_refcount_irec tmp; 1385 int i; 1386 int have; 1387 int error; 1388 1389 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1390 agbno, aglen); 1391 1392 /* By default, skip the whole range */ 1393 *fbno = NULLAGBLOCK; 1394 *flen = 0; 1395 1396 /* Try to find a refcount extent that crosses the start */ 1397 error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, 1398 &have); 1399 if (error) 1400 goto out_error; 1401 if (!have) { 1402 /* No left extent, look at the next one */ 1403 error = xfs_btree_increment(cur, 0, &have); 1404 if (error) 1405 goto out_error; 1406 if (!have) 1407 goto done; 1408 } 1409 error = xfs_refcount_get_rec(cur, &tmp, &i); 1410 if (error) 1411 goto out_error; 1412 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1413 error = -EFSCORRUPTED; 1414 goto out_error; 1415 } 1416 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) 1417 goto done; 1418 1419 /* If the extent ends before the start, look at the next one */ 1420 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1421 error = xfs_btree_increment(cur, 0, &have); 1422 if (error) 1423 goto out_error; 1424 if (!have) 1425 goto done; 1426 error = xfs_refcount_get_rec(cur, &tmp, &i); 1427 if (error) 1428 goto out_error; 1429 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1430 error = -EFSCORRUPTED; 1431 goto out_error; 1432 } 1433 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED) 1434 goto done; 1435 } 1436 1437 /* If the extent starts after the range we want, bail out */ 1438 if (tmp.rc_startblock >= agbno + aglen) 1439 goto done; 1440 1441 /* We found the start of a shared extent! */ 1442 if (tmp.rc_startblock < agbno) { 1443 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1444 tmp.rc_startblock = agbno; 1445 } 1446 1447 *fbno = tmp.rc_startblock; 1448 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1449 if (!find_end_of_shared) 1450 goto done; 1451 1452 /* Otherwise, find the end of this shared extent */ 1453 while (*fbno + *flen < agbno + aglen) { 1454 error = xfs_btree_increment(cur, 0, &have); 1455 if (error) 1456 goto out_error; 1457 if (!have) 1458 break; 1459 error = xfs_refcount_get_rec(cur, &tmp, &i); 1460 if (error) 1461 goto out_error; 1462 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1463 error = -EFSCORRUPTED; 1464 goto out_error; 1465 } 1466 if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED || 1467 tmp.rc_startblock >= agbno + aglen || 1468 tmp.rc_startblock != *fbno + *flen) 1469 break; 1470 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1471 } 1472 1473 done: 1474 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1475 cur->bc_ag.pag->pag_agno, *fbno, *flen); 1476 1477 out_error: 1478 if (error) 1479 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1480 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1481 return error; 1482 } 1483 1484 /* 1485 * Recovering CoW Blocks After a Crash 1486 * 1487 * Due to the way that the copy on write mechanism works, there's a window of 1488 * opportunity in which we can lose track of allocated blocks during a crash. 1489 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1490 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1491 * no longer in the free space btree but are not otherwise recorded anywhere 1492 * until the write completes and the blocks are mapped into the file. A crash 1493 * in between allocation and remapping results in the replacement blocks being 1494 * lost. This situation is exacerbated by the CoW extent size hint because 1495 * allocations can hang around for long time. 1496 * 1497 * However, there is a place where we can record these allocations before they 1498 * become mappings -- the reference count btree. The btree does not record 1499 * extents with refcount == 1, so we can record allocations with a refcount of 1500 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1501 * no conflict with shared block records. These mappings should be created 1502 * when we allocate blocks to the CoW fork and deleted when they're removed 1503 * from the CoW fork. 1504 * 1505 * Minor nit: records for in-progress CoW allocations and records for shared 1506 * extents must never be merged, to preserve the property that (except for CoW 1507 * allocations) there are no refcount btree entries with refcount == 1. The 1508 * only time this could potentially happen is when unsharing a block that's 1509 * adjacent to CoW allocations, so we must be careful to avoid this. 1510 * 1511 * At mount time we recover lost CoW allocations by searching the refcount 1512 * btree for these refcount == 1 mappings. These represent CoW allocations 1513 * that were in progress at the time the filesystem went down, so we can free 1514 * them to get the space back. 1515 * 1516 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1517 * several reasons -- first, EFIs pin the tail of the log and would have to be 1518 * periodically relogged to avoid filling up the log. Second, CoW completions 1519 * will have to file an EFD and create new EFIs for whatever remains in the 1520 * CoW fork; this partially takes care of (1) but extent-size reservations 1521 * will have to periodically relog even if there's no writeout in progress. 1522 * This can happen if the CoW extent size hint is set, which you really want. 1523 * Third, EFIs cannot currently be automatically relogged into newer 1524 * transactions to advance the log tail. Fourth, stuffing the log full of 1525 * EFIs places an upper bound on the number of CoW allocations that can be 1526 * held filesystem-wide at any given time. Recording them in the refcount 1527 * btree doesn't require us to maintain any state in memory and doesn't pin 1528 * the log. 1529 */ 1530 /* 1531 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1532 * in that they're not referenced anywhere else in the filesystem, so we 1533 * stash them in the refcount btree with a refcount of 1 until either file 1534 * remapping (or CoW cancellation) happens. 1535 */ 1536 STATIC int 1537 xfs_refcount_adjust_cow_extents( 1538 struct xfs_btree_cur *cur, 1539 xfs_agblock_t agbno, 1540 xfs_extlen_t aglen, 1541 enum xfs_refc_adjust_op adj) 1542 { 1543 struct xfs_refcount_irec ext, tmp; 1544 int error; 1545 int found_rec, found_tmp; 1546 1547 if (aglen == 0) 1548 return 0; 1549 1550 /* Find any overlapping refcount records */ 1551 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, 1552 &found_rec); 1553 if (error) 1554 goto out_error; 1555 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1556 if (error) 1557 goto out_error; 1558 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && 1559 ext.rc_domain != XFS_REFC_DOMAIN_COW)) { 1560 error = -EFSCORRUPTED; 1561 goto out_error; 1562 } 1563 if (!found_rec) { 1564 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 1565 ext.rc_blockcount = 0; 1566 ext.rc_refcount = 0; 1567 ext.rc_domain = XFS_REFC_DOMAIN_COW; 1568 } 1569 1570 switch (adj) { 1571 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1572 /* Adding a CoW reservation, there should be nothing here. */ 1573 if (XFS_IS_CORRUPT(cur->bc_mp, 1574 agbno + aglen > ext.rc_startblock)) { 1575 error = -EFSCORRUPTED; 1576 goto out_error; 1577 } 1578 1579 tmp.rc_startblock = agbno; 1580 tmp.rc_blockcount = aglen; 1581 tmp.rc_refcount = 1; 1582 tmp.rc_domain = XFS_REFC_DOMAIN_COW; 1583 1584 trace_xfs_refcount_modify_extent(cur->bc_mp, 1585 cur->bc_ag.pag->pag_agno, &tmp); 1586 1587 error = xfs_refcount_insert(cur, &tmp, 1588 &found_tmp); 1589 if (error) 1590 goto out_error; 1591 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1592 error = -EFSCORRUPTED; 1593 goto out_error; 1594 } 1595 break; 1596 case XFS_REFCOUNT_ADJUST_COW_FREE: 1597 /* Removing a CoW reservation, there should be one extent. */ 1598 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1599 error = -EFSCORRUPTED; 1600 goto out_error; 1601 } 1602 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1603 error = -EFSCORRUPTED; 1604 goto out_error; 1605 } 1606 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1607 error = -EFSCORRUPTED; 1608 goto out_error; 1609 } 1610 1611 ext.rc_refcount = 0; 1612 trace_xfs_refcount_modify_extent(cur->bc_mp, 1613 cur->bc_ag.pag->pag_agno, &ext); 1614 error = xfs_refcount_delete(cur, &found_rec); 1615 if (error) 1616 goto out_error; 1617 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1618 error = -EFSCORRUPTED; 1619 goto out_error; 1620 } 1621 break; 1622 default: 1623 ASSERT(0); 1624 } 1625 1626 return error; 1627 out_error: 1628 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1629 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1630 return error; 1631 } 1632 1633 /* 1634 * Add or remove refcount btree entries for CoW reservations. 1635 */ 1636 STATIC int 1637 xfs_refcount_adjust_cow( 1638 struct xfs_btree_cur *cur, 1639 xfs_agblock_t agbno, 1640 xfs_extlen_t aglen, 1641 enum xfs_refc_adjust_op adj) 1642 { 1643 bool shape_changed; 1644 int error; 1645 1646 /* 1647 * Ensure that no rcextents cross the boundary of the adjustment range. 1648 */ 1649 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1650 agbno, &shape_changed); 1651 if (error) 1652 goto out_error; 1653 1654 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1655 agbno + aglen, &shape_changed); 1656 if (error) 1657 goto out_error; 1658 1659 /* 1660 * Try to merge with the left or right extents of the range. 1661 */ 1662 error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno, 1663 &aglen, adj, &shape_changed); 1664 if (error) 1665 goto out_error; 1666 1667 /* Now that we've taken care of the ends, adjust the middle extents */ 1668 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1669 if (error) 1670 goto out_error; 1671 1672 return 0; 1673 1674 out_error: 1675 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1676 error, _RET_IP_); 1677 return error; 1678 } 1679 1680 /* 1681 * Record a CoW allocation in the refcount btree. 1682 */ 1683 STATIC int 1684 __xfs_refcount_cow_alloc( 1685 struct xfs_btree_cur *rcur, 1686 xfs_agblock_t agbno, 1687 xfs_extlen_t aglen) 1688 { 1689 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1690 agbno, aglen); 1691 1692 /* Add refcount btree reservation */ 1693 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1694 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1695 } 1696 1697 /* 1698 * Remove a CoW allocation from the refcount btree. 1699 */ 1700 STATIC int 1701 __xfs_refcount_cow_free( 1702 struct xfs_btree_cur *rcur, 1703 xfs_agblock_t agbno, 1704 xfs_extlen_t aglen) 1705 { 1706 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1707 agbno, aglen); 1708 1709 /* Remove refcount btree reservation */ 1710 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1711 XFS_REFCOUNT_ADJUST_COW_FREE); 1712 } 1713 1714 /* Record a CoW staging extent in the refcount btree. */ 1715 void 1716 xfs_refcount_alloc_cow_extent( 1717 struct xfs_trans *tp, 1718 xfs_fsblock_t fsb, 1719 xfs_extlen_t len) 1720 { 1721 struct xfs_mount *mp = tp->t_mountp; 1722 1723 if (!xfs_has_reflink(mp)) 1724 return; 1725 1726 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1727 1728 /* Add rmap entry */ 1729 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1730 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1731 } 1732 1733 /* Forget a CoW staging event in the refcount btree. */ 1734 void 1735 xfs_refcount_free_cow_extent( 1736 struct xfs_trans *tp, 1737 xfs_fsblock_t fsb, 1738 xfs_extlen_t len) 1739 { 1740 struct xfs_mount *mp = tp->t_mountp; 1741 1742 if (!xfs_has_reflink(mp)) 1743 return; 1744 1745 /* Remove rmap entry */ 1746 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1747 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1748 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1749 } 1750 1751 struct xfs_refcount_recovery { 1752 struct list_head rr_list; 1753 struct xfs_refcount_irec rr_rrec; 1754 }; 1755 1756 /* Stuff an extent on the recovery list. */ 1757 STATIC int 1758 xfs_refcount_recover_extent( 1759 struct xfs_btree_cur *cur, 1760 const union xfs_btree_rec *rec, 1761 void *priv) 1762 { 1763 struct list_head *debris = priv; 1764 struct xfs_refcount_recovery *rr; 1765 1766 if (XFS_IS_CORRUPT(cur->bc_mp, 1767 be32_to_cpu(rec->refc.rc_refcount) != 1)) 1768 return -EFSCORRUPTED; 1769 1770 rr = kmalloc(sizeof(struct xfs_refcount_recovery), 1771 GFP_KERNEL | __GFP_NOFAIL); 1772 INIT_LIST_HEAD(&rr->rr_list); 1773 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1774 1775 if (XFS_IS_CORRUPT(cur->bc_mp, 1776 rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { 1777 kfree(rr); 1778 return -EFSCORRUPTED; 1779 } 1780 1781 list_add_tail(&rr->rr_list, debris); 1782 return 0; 1783 } 1784 1785 /* Find and remove leftover CoW reservations. */ 1786 int 1787 xfs_refcount_recover_cow_leftovers( 1788 struct xfs_mount *mp, 1789 struct xfs_perag *pag) 1790 { 1791 struct xfs_trans *tp; 1792 struct xfs_btree_cur *cur; 1793 struct xfs_buf *agbp; 1794 struct xfs_refcount_recovery *rr, *n; 1795 struct list_head debris; 1796 union xfs_btree_irec low; 1797 union xfs_btree_irec high; 1798 xfs_fsblock_t fsb; 1799 int error; 1800 1801 /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */ 1802 BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG); 1803 if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS) 1804 return -EOPNOTSUPP; 1805 1806 INIT_LIST_HEAD(&debris); 1807 1808 /* 1809 * In this first part, we use an empty transaction to gather up 1810 * all the leftover CoW extents so that we can subsequently 1811 * delete them. The empty transaction is used to avoid 1812 * a buffer lock deadlock if there happens to be a loop in the 1813 * refcountbt because we're allowed to re-grab a buffer that is 1814 * already attached to our transaction. When we're done 1815 * recording the CoW debris we cancel the (empty) transaction 1816 * and everything goes away cleanly. 1817 */ 1818 error = xfs_trans_alloc_empty(mp, &tp); 1819 if (error) 1820 return error; 1821 1822 error = xfs_alloc_read_agf(pag, tp, 0, &agbp); 1823 if (error) 1824 goto out_trans; 1825 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1826 1827 /* Find all the leftover CoW staging extents. */ 1828 memset(&low, 0, sizeof(low)); 1829 memset(&high, 0, sizeof(high)); 1830 low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; 1831 high.rc.rc_startblock = -1U; 1832 error = xfs_btree_query_range(cur, &low, &high, 1833 xfs_refcount_recover_extent, &debris); 1834 xfs_btree_del_cursor(cur, error); 1835 xfs_trans_brelse(tp, agbp); 1836 xfs_trans_cancel(tp); 1837 if (error) 1838 goto out_free; 1839 1840 /* Now iterate the list to free the leftovers */ 1841 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1842 /* Set up transaction. */ 1843 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1844 if (error) 1845 goto out_free; 1846 1847 trace_xfs_refcount_recover_extent(mp, pag->pag_agno, 1848 &rr->rr_rrec); 1849 1850 /* Free the orphan record */ 1851 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, 1852 rr->rr_rrec.rc_startblock); 1853 xfs_refcount_free_cow_extent(tp, fsb, 1854 rr->rr_rrec.rc_blockcount); 1855 1856 /* Free the block. */ 1857 xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1858 1859 error = xfs_trans_commit(tp); 1860 if (error) 1861 goto out_free; 1862 1863 list_del(&rr->rr_list); 1864 kfree(rr); 1865 } 1866 1867 return error; 1868 out_trans: 1869 xfs_trans_cancel(tp); 1870 out_free: 1871 /* Free the leftover list */ 1872 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1873 list_del(&rr->rr_list); 1874 kfree(rr); 1875 } 1876 return error; 1877 } 1878 1879 /* Is there a record covering a given extent? */ 1880 int 1881 xfs_refcount_has_record( 1882 struct xfs_btree_cur *cur, 1883 enum xfs_refc_domain domain, 1884 xfs_agblock_t bno, 1885 xfs_extlen_t len, 1886 bool *exists) 1887 { 1888 union xfs_btree_irec low; 1889 union xfs_btree_irec high; 1890 1891 memset(&low, 0, sizeof(low)); 1892 low.rc.rc_startblock = bno; 1893 memset(&high, 0xFF, sizeof(high)); 1894 high.rc.rc_startblock = bno + len - 1; 1895 low.rc.rc_domain = high.rc.rc_domain = domain; 1896 1897 return xfs_btree_has_record(cur, &low, &high, exists); 1898 } 1899 1900 int __init 1901 xfs_refcount_intent_init_cache(void) 1902 { 1903 xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", 1904 sizeof(struct xfs_refcount_intent), 1905 0, 0, NULL); 1906 1907 return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; 1908 } 1909 1910 void 1911 xfs_refcount_intent_destroy_cache(void) 1912 { 1913 kmem_cache_destroy(xfs_refcount_intent_cache); 1914 xfs_refcount_intent_cache = NULL; 1915 } 1916