1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_defer.h" 14 #include "xfs_btree.h" 15 #include "xfs_bmap.h" 16 #include "xfs_refcount_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_trans.h" 22 #include "xfs_bit.h" 23 #include "xfs_refcount.h" 24 #include "xfs_rmap.h" 25 #include "xfs_ag.h" 26 27 struct kmem_cache *xfs_refcount_intent_cache; 28 29 /* Allowable refcount adjustment amounts. */ 30 enum xfs_refc_adjust_op { 31 XFS_REFCOUNT_ADJUST_INCREASE = 1, 32 XFS_REFCOUNT_ADJUST_DECREASE = -1, 33 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 34 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 35 }; 36 37 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 38 xfs_agblock_t agbno, xfs_extlen_t aglen); 39 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 40 xfs_agblock_t agbno, xfs_extlen_t aglen); 41 42 /* 43 * Look up the first record less than or equal to [bno, len] in the btree 44 * given by cur. 45 */ 46 int 47 xfs_refcount_lookup_le( 48 struct xfs_btree_cur *cur, 49 enum xfs_refc_domain domain, 50 xfs_agblock_t bno, 51 int *stat) 52 { 53 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 54 xfs_refcount_encode_startblock(bno, domain), 55 XFS_LOOKUP_LE); 56 cur->bc_rec.rc.rc_startblock = bno; 57 cur->bc_rec.rc.rc_blockcount = 0; 58 cur->bc_rec.rc.rc_domain = domain; 59 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 60 } 61 62 /* 63 * Look up the first record greater than or equal to [bno, len] in the btree 64 * given by cur. 65 */ 66 int 67 xfs_refcount_lookup_ge( 68 struct xfs_btree_cur *cur, 69 enum xfs_refc_domain domain, 70 xfs_agblock_t bno, 71 int *stat) 72 { 73 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 74 xfs_refcount_encode_startblock(bno, domain), 75 XFS_LOOKUP_GE); 76 cur->bc_rec.rc.rc_startblock = bno; 77 cur->bc_rec.rc.rc_blockcount = 0; 78 cur->bc_rec.rc.rc_domain = domain; 79 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 80 } 81 82 /* 83 * Look up the first record equal to [bno, len] in the btree 84 * given by cur. 85 */ 86 int 87 xfs_refcount_lookup_eq( 88 struct xfs_btree_cur *cur, 89 enum xfs_refc_domain domain, 90 xfs_agblock_t bno, 91 int *stat) 92 { 93 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno, 94 xfs_refcount_encode_startblock(bno, domain), 95 XFS_LOOKUP_LE); 96 cur->bc_rec.rc.rc_startblock = bno; 97 cur->bc_rec.rc.rc_blockcount = 0; 98 cur->bc_rec.rc.rc_domain = domain; 99 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 100 } 101 102 /* Convert on-disk record to in-core format. */ 103 void 104 xfs_refcount_btrec_to_irec( 105 const union xfs_btree_rec *rec, 106 struct xfs_refcount_irec *irec) 107 { 108 uint32_t start; 109 110 start = be32_to_cpu(rec->refc.rc_startblock); 111 if (start & XFS_REFC_COW_START) { 112 start &= ~XFS_REFC_COW_START; 113 irec->rc_domain = XFS_REFC_DOMAIN_COW; 114 } else { 115 irec->rc_domain = XFS_REFC_DOMAIN_SHARED; 116 } 117 118 irec->rc_startblock = start; 119 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 120 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 121 } 122 123 /* 124 * Get the data from the pointed-to record. 125 */ 126 int 127 xfs_refcount_get_rec( 128 struct xfs_btree_cur *cur, 129 struct xfs_refcount_irec *irec, 130 int *stat) 131 { 132 struct xfs_mount *mp = cur->bc_mp; 133 struct xfs_perag *pag = cur->bc_ag.pag; 134 union xfs_btree_rec *rec; 135 int error; 136 137 error = xfs_btree_get_rec(cur, &rec, stat); 138 if (error || !*stat) 139 return error; 140 141 xfs_refcount_btrec_to_irec(rec, irec); 142 if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) 143 goto out_bad_rec; 144 145 if (!xfs_refcount_check_domain(irec)) 146 goto out_bad_rec; 147 148 /* check for valid extent range, including overflow */ 149 if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount)) 150 goto out_bad_rec; 151 152 if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) 153 goto out_bad_rec; 154 155 trace_xfs_refcount_get(cur->bc_mp, pag->pag_agno, irec); 156 return 0; 157 158 out_bad_rec: 159 xfs_warn(mp, 160 "Refcount BTree record corruption in AG %d detected!", 161 pag->pag_agno); 162 xfs_warn(mp, 163 "Start block 0x%x, block count 0x%x, references 0x%x", 164 irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); 165 return -EFSCORRUPTED; 166 } 167 168 /* 169 * Update the record referred to by cur to the value given 170 * by [bno, len, refcount]. 171 * This either works (return 0) or gets an EFSCORRUPTED error. 172 */ 173 STATIC int 174 xfs_refcount_update( 175 struct xfs_btree_cur *cur, 176 struct xfs_refcount_irec *irec) 177 { 178 union xfs_btree_rec rec; 179 uint32_t start; 180 int error; 181 182 trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 183 184 start = xfs_refcount_encode_startblock(irec->rc_startblock, 185 irec->rc_domain); 186 rec.refc.rc_startblock = cpu_to_be32(start); 187 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 188 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 189 190 error = xfs_btree_update(cur, &rec); 191 if (error) 192 trace_xfs_refcount_update_error(cur->bc_mp, 193 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 194 return error; 195 } 196 197 /* 198 * Insert the record referred to by cur to the value given 199 * by [bno, len, refcount]. 200 * This either works (return 0) or gets an EFSCORRUPTED error. 201 */ 202 int 203 xfs_refcount_insert( 204 struct xfs_btree_cur *cur, 205 struct xfs_refcount_irec *irec, 206 int *i) 207 { 208 int error; 209 210 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); 211 212 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 213 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 214 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 215 cur->bc_rec.rc.rc_domain = irec->rc_domain; 216 217 error = xfs_btree_insert(cur, i); 218 if (error) 219 goto out_error; 220 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 221 error = -EFSCORRUPTED; 222 goto out_error; 223 } 224 225 out_error: 226 if (error) 227 trace_xfs_refcount_insert_error(cur->bc_mp, 228 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 229 return error; 230 } 231 232 /* 233 * Remove the record referred to by cur, then set the pointer to the spot 234 * where the record could be re-inserted, in case we want to increment or 235 * decrement the cursor. 236 * This either works (return 0) or gets an EFSCORRUPTED error. 237 */ 238 STATIC int 239 xfs_refcount_delete( 240 struct xfs_btree_cur *cur, 241 int *i) 242 { 243 struct xfs_refcount_irec irec; 244 int found_rec; 245 int error; 246 247 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 248 if (error) 249 goto out_error; 250 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 251 error = -EFSCORRUPTED; 252 goto out_error; 253 } 254 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); 255 error = xfs_btree_delete(cur, i); 256 if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { 257 error = -EFSCORRUPTED; 258 goto out_error; 259 } 260 if (error) 261 goto out_error; 262 error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock, 263 &found_rec); 264 out_error: 265 if (error) 266 trace_xfs_refcount_delete_error(cur->bc_mp, 267 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 268 return error; 269 } 270 271 /* 272 * Adjusting the Reference Count 273 * 274 * As stated elsewhere, the reference count btree (refcbt) stores 275 * >1 reference counts for extents of physical blocks. In this 276 * operation, we're either raising or lowering the reference count of 277 * some subrange stored in the tree: 278 * 279 * <------ adjustment range ------> 280 * ----+ +---+-----+ +--+--------+--------- 281 * 2 | | 3 | 4 | |17| 55 | 10 282 * ----+ +---+-----+ +--+--------+--------- 283 * X axis is physical blocks number; 284 * reference counts are the numbers inside the rectangles 285 * 286 * The first thing we need to do is to ensure that there are no 287 * refcount extents crossing either boundary of the range to be 288 * adjusted. For any extent that does cross a boundary, split it into 289 * two extents so that we can increment the refcount of one of the 290 * pieces later: 291 * 292 * <------ adjustment range ------> 293 * ----+ +---+-----+ +--+--------+----+---- 294 * 2 | | 3 | 2 | |17| 55 | 10 | 10 295 * ----+ +---+-----+ +--+--------+----+---- 296 * 297 * For this next step, let's assume that all the physical blocks in 298 * the adjustment range are mapped to a file and are therefore in use 299 * at least once. Therefore, we can infer that any gap in the 300 * refcount tree within the adjustment range represents a physical 301 * extent with refcount == 1: 302 * 303 * <------ adjustment range ------> 304 * ----+---+---+-----+-+--+--------+----+---- 305 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 306 * ----+---+---+-----+-+--+--------+----+---- 307 * ^ 308 * 309 * For each extent that falls within the interval range, figure out 310 * which extent is to the left or the right of that extent. Now we 311 * have a left, current, and right extent. If the new reference count 312 * of the center extent enables us to merge left, center, and right 313 * into one record covering all three, do so. If the center extent is 314 * at the left end of the range, abuts the left extent, and its new 315 * reference count matches the left extent's record, then merge them. 316 * If the center extent is at the right end of the range, abuts the 317 * right extent, and the reference counts match, merge those. In the 318 * example, we can left merge (assuming an increment operation): 319 * 320 * <------ adjustment range ------> 321 * --------+---+-----+-+--+--------+----+---- 322 * 2 | 3 | 2 |1|17| 55 | 10 | 10 323 * --------+---+-----+-+--+--------+----+---- 324 * ^ 325 * 326 * For all other extents within the range, adjust the reference count 327 * or delete it if the refcount falls below 2. If we were 328 * incrementing, the end result looks like this: 329 * 330 * <------ adjustment range ------> 331 * --------+---+-----+-+--+--------+----+---- 332 * 2 | 4 | 3 |2|18| 56 | 11 | 10 333 * --------+---+-----+-+--+--------+----+---- 334 * 335 * The result of a decrement operation looks as such: 336 * 337 * <------ adjustment range ------> 338 * ----+ +---+ +--+--------+----+---- 339 * 2 | | 2 | |16| 54 | 9 | 10 340 * ----+ +---+ +--+--------+----+---- 341 * DDDD 111111DD 342 * 343 * The blocks marked "D" are freed; the blocks marked "1" are only 344 * referenced once and therefore the record is removed from the 345 * refcount btree. 346 */ 347 348 /* Next block after this extent. */ 349 static inline xfs_agblock_t 350 xfs_refc_next( 351 struct xfs_refcount_irec *rc) 352 { 353 return rc->rc_startblock + rc->rc_blockcount; 354 } 355 356 /* 357 * Split a refcount extent that crosses agbno. 358 */ 359 STATIC int 360 xfs_refcount_split_extent( 361 struct xfs_btree_cur *cur, 362 enum xfs_refc_domain domain, 363 xfs_agblock_t agbno, 364 bool *shape_changed) 365 { 366 struct xfs_refcount_irec rcext, tmp; 367 int found_rec; 368 int error; 369 370 *shape_changed = false; 371 error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec); 372 if (error) 373 goto out_error; 374 if (!found_rec) 375 return 0; 376 377 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 378 if (error) 379 goto out_error; 380 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 381 error = -EFSCORRUPTED; 382 goto out_error; 383 } 384 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 385 return 0; 386 387 *shape_changed = true; 388 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 389 &rcext, agbno); 390 391 /* Establish the right extent. */ 392 tmp = rcext; 393 tmp.rc_startblock = agbno; 394 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 395 error = xfs_refcount_update(cur, &tmp); 396 if (error) 397 goto out_error; 398 399 /* Insert the left extent. */ 400 tmp = rcext; 401 tmp.rc_blockcount = agbno - rcext.rc_startblock; 402 error = xfs_refcount_insert(cur, &tmp, &found_rec); 403 if (error) 404 goto out_error; 405 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 406 error = -EFSCORRUPTED; 407 goto out_error; 408 } 409 return error; 410 411 out_error: 412 trace_xfs_refcount_split_extent_error(cur->bc_mp, 413 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 414 return error; 415 } 416 417 /* 418 * Merge the left, center, and right extents. 419 */ 420 STATIC int 421 xfs_refcount_merge_center_extents( 422 struct xfs_btree_cur *cur, 423 struct xfs_refcount_irec *left, 424 struct xfs_refcount_irec *center, 425 struct xfs_refcount_irec *right, 426 unsigned long long extlen, 427 xfs_extlen_t *aglen) 428 { 429 int error; 430 int found_rec; 431 432 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 433 cur->bc_ag.pag->pag_agno, left, center, right); 434 435 /* 436 * Make sure the center and right extents are not in the btree. 437 * If the center extent was synthesized, the first delete call 438 * removes the right extent and we skip the second deletion. 439 * If center and right were in the btree, then the first delete 440 * call removes the center and the second one removes the right 441 * extent. 442 */ 443 error = xfs_refcount_lookup_ge(cur, center->rc_domain, 444 center->rc_startblock, &found_rec); 445 if (error) 446 goto out_error; 447 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 448 error = -EFSCORRUPTED; 449 goto out_error; 450 } 451 452 error = xfs_refcount_delete(cur, &found_rec); 453 if (error) 454 goto out_error; 455 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 456 error = -EFSCORRUPTED; 457 goto out_error; 458 } 459 460 if (center->rc_refcount > 1) { 461 error = xfs_refcount_delete(cur, &found_rec); 462 if (error) 463 goto out_error; 464 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 465 error = -EFSCORRUPTED; 466 goto out_error; 467 } 468 } 469 470 /* Enlarge the left extent. */ 471 error = xfs_refcount_lookup_le(cur, left->rc_domain, 472 left->rc_startblock, &found_rec); 473 if (error) 474 goto out_error; 475 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 476 error = -EFSCORRUPTED; 477 goto out_error; 478 } 479 480 left->rc_blockcount = extlen; 481 error = xfs_refcount_update(cur, left); 482 if (error) 483 goto out_error; 484 485 *aglen = 0; 486 return error; 487 488 out_error: 489 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 490 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 491 return error; 492 } 493 494 /* 495 * Merge with the left extent. 496 */ 497 STATIC int 498 xfs_refcount_merge_left_extent( 499 struct xfs_btree_cur *cur, 500 struct xfs_refcount_irec *left, 501 struct xfs_refcount_irec *cleft, 502 xfs_agblock_t *agbno, 503 xfs_extlen_t *aglen) 504 { 505 int error; 506 int found_rec; 507 508 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 509 cur->bc_ag.pag->pag_agno, left, cleft); 510 511 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 512 if (cleft->rc_refcount > 1) { 513 error = xfs_refcount_lookup_le(cur, cleft->rc_domain, 514 cleft->rc_startblock, &found_rec); 515 if (error) 516 goto out_error; 517 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 518 error = -EFSCORRUPTED; 519 goto out_error; 520 } 521 522 error = xfs_refcount_delete(cur, &found_rec); 523 if (error) 524 goto out_error; 525 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 526 error = -EFSCORRUPTED; 527 goto out_error; 528 } 529 } 530 531 /* Enlarge the left extent. */ 532 error = xfs_refcount_lookup_le(cur, left->rc_domain, 533 left->rc_startblock, &found_rec); 534 if (error) 535 goto out_error; 536 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 537 error = -EFSCORRUPTED; 538 goto out_error; 539 } 540 541 left->rc_blockcount += cleft->rc_blockcount; 542 error = xfs_refcount_update(cur, left); 543 if (error) 544 goto out_error; 545 546 *agbno += cleft->rc_blockcount; 547 *aglen -= cleft->rc_blockcount; 548 return error; 549 550 out_error: 551 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 552 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 553 return error; 554 } 555 556 /* 557 * Merge with the right extent. 558 */ 559 STATIC int 560 xfs_refcount_merge_right_extent( 561 struct xfs_btree_cur *cur, 562 struct xfs_refcount_irec *right, 563 struct xfs_refcount_irec *cright, 564 xfs_extlen_t *aglen) 565 { 566 int error; 567 int found_rec; 568 569 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 570 cur->bc_ag.pag->pag_agno, cright, right); 571 572 /* 573 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 574 * remove it. 575 */ 576 if (cright->rc_refcount > 1) { 577 error = xfs_refcount_lookup_le(cur, cright->rc_domain, 578 cright->rc_startblock, &found_rec); 579 if (error) 580 goto out_error; 581 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 582 error = -EFSCORRUPTED; 583 goto out_error; 584 } 585 586 error = xfs_refcount_delete(cur, &found_rec); 587 if (error) 588 goto out_error; 589 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 590 error = -EFSCORRUPTED; 591 goto out_error; 592 } 593 } 594 595 /* Enlarge the right extent. */ 596 error = xfs_refcount_lookup_le(cur, right->rc_domain, 597 right->rc_startblock, &found_rec); 598 if (error) 599 goto out_error; 600 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 601 error = -EFSCORRUPTED; 602 goto out_error; 603 } 604 605 right->rc_startblock -= cright->rc_blockcount; 606 right->rc_blockcount += cright->rc_blockcount; 607 error = xfs_refcount_update(cur, right); 608 if (error) 609 goto out_error; 610 611 *aglen -= cright->rc_blockcount; 612 return error; 613 614 out_error: 615 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 616 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 617 return error; 618 } 619 620 #define XFS_FIND_RCEXT_SHARED 1 621 #define XFS_FIND_RCEXT_COW 2 622 /* 623 * Find the left extent and the one after it (cleft). This function assumes 624 * that we've already split any extent crossing agbno. 625 */ 626 STATIC int 627 xfs_refcount_find_left_extents( 628 struct xfs_btree_cur *cur, 629 struct xfs_refcount_irec *left, 630 struct xfs_refcount_irec *cleft, 631 xfs_agblock_t agbno, 632 xfs_extlen_t aglen, 633 int flags) 634 { 635 struct xfs_refcount_irec tmp; 636 enum xfs_refc_domain domain; 637 int error; 638 int found_rec; 639 640 if (flags & XFS_FIND_RCEXT_SHARED) 641 domain = XFS_REFC_DOMAIN_SHARED; 642 else 643 domain = XFS_REFC_DOMAIN_COW; 644 645 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 646 error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec); 647 if (error) 648 goto out_error; 649 if (!found_rec) 650 return 0; 651 652 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 653 if (error) 654 goto out_error; 655 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 656 error = -EFSCORRUPTED; 657 goto out_error; 658 } 659 660 if (xfs_refc_next(&tmp) != agbno) 661 return 0; 662 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 663 return 0; 664 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 665 return 0; 666 /* We have a left extent; retrieve (or invent) the next right one */ 667 *left = tmp; 668 669 error = xfs_btree_increment(cur, 0, &found_rec); 670 if (error) 671 goto out_error; 672 if (found_rec) { 673 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 674 if (error) 675 goto out_error; 676 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 677 error = -EFSCORRUPTED; 678 goto out_error; 679 } 680 681 /* if tmp starts at the end of our range, just use that */ 682 if (tmp.rc_startblock == agbno) 683 *cleft = tmp; 684 else { 685 /* 686 * There's a gap in the refcntbt at the start of the 687 * range we're interested in (refcount == 1) so 688 * synthesize the implied extent and pass it back. 689 * We assume here that the agbno/aglen range was 690 * passed in from a data fork extent mapping and 691 * therefore is allocated to exactly one owner. 692 */ 693 cleft->rc_startblock = agbno; 694 cleft->rc_blockcount = min(aglen, 695 tmp.rc_startblock - agbno); 696 cleft->rc_refcount = 1; 697 cleft->rc_domain = domain; 698 } 699 } else { 700 /* 701 * No extents, so pretend that there's one covering the whole 702 * range. 703 */ 704 cleft->rc_startblock = agbno; 705 cleft->rc_blockcount = aglen; 706 cleft->rc_refcount = 1; 707 cleft->rc_domain = domain; 708 } 709 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 710 left, cleft, agbno); 711 return error; 712 713 out_error: 714 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 715 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 716 return error; 717 } 718 719 /* 720 * Find the right extent and the one before it (cright). This function 721 * assumes that we've already split any extents crossing agbno + aglen. 722 */ 723 STATIC int 724 xfs_refcount_find_right_extents( 725 struct xfs_btree_cur *cur, 726 struct xfs_refcount_irec *right, 727 struct xfs_refcount_irec *cright, 728 xfs_agblock_t agbno, 729 xfs_extlen_t aglen, 730 int flags) 731 { 732 struct xfs_refcount_irec tmp; 733 enum xfs_refc_domain domain; 734 int error; 735 int found_rec; 736 737 if (flags & XFS_FIND_RCEXT_SHARED) 738 domain = XFS_REFC_DOMAIN_SHARED; 739 else 740 domain = XFS_REFC_DOMAIN_COW; 741 742 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 743 error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec); 744 if (error) 745 goto out_error; 746 if (!found_rec) 747 return 0; 748 749 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 750 if (error) 751 goto out_error; 752 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 753 error = -EFSCORRUPTED; 754 goto out_error; 755 } 756 757 if (tmp.rc_startblock != agbno + aglen) 758 return 0; 759 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 760 return 0; 761 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 762 return 0; 763 /* We have a right extent; retrieve (or invent) the next left one */ 764 *right = tmp; 765 766 error = xfs_btree_decrement(cur, 0, &found_rec); 767 if (error) 768 goto out_error; 769 if (found_rec) { 770 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 771 if (error) 772 goto out_error; 773 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 774 error = -EFSCORRUPTED; 775 goto out_error; 776 } 777 778 /* if tmp ends at the end of our range, just use that */ 779 if (xfs_refc_next(&tmp) == agbno + aglen) 780 *cright = tmp; 781 else { 782 /* 783 * There's a gap in the refcntbt at the end of the 784 * range we're interested in (refcount == 1) so 785 * create the implied extent and pass it back. 786 * We assume here that the agbno/aglen range was 787 * passed in from a data fork extent mapping and 788 * therefore is allocated to exactly one owner. 789 */ 790 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 791 cright->rc_blockcount = right->rc_startblock - 792 cright->rc_startblock; 793 cright->rc_refcount = 1; 794 cright->rc_domain = domain; 795 } 796 } else { 797 /* 798 * No extents, so pretend that there's one covering the whole 799 * range. 800 */ 801 cright->rc_startblock = agbno; 802 cright->rc_blockcount = aglen; 803 cright->rc_refcount = 1; 804 cright->rc_domain = domain; 805 } 806 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, 807 cright, right, agbno + aglen); 808 return error; 809 810 out_error: 811 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 812 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 813 return error; 814 } 815 816 /* Is this extent valid? */ 817 static inline bool 818 xfs_refc_valid( 819 struct xfs_refcount_irec *rc) 820 { 821 return rc->rc_startblock != NULLAGBLOCK; 822 } 823 824 /* 825 * Try to merge with any extents on the boundaries of the adjustment range. 826 */ 827 STATIC int 828 xfs_refcount_merge_extents( 829 struct xfs_btree_cur *cur, 830 xfs_agblock_t *agbno, 831 xfs_extlen_t *aglen, 832 enum xfs_refc_adjust_op adjust, 833 int flags, 834 bool *shape_changed) 835 { 836 struct xfs_refcount_irec left = {0}, cleft = {0}; 837 struct xfs_refcount_irec cright = {0}, right = {0}; 838 int error; 839 unsigned long long ulen; 840 bool cequal; 841 842 *shape_changed = false; 843 /* 844 * Find the extent just below agbno [left], just above agbno [cleft], 845 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 846 * [right]. 847 */ 848 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 849 *aglen, flags); 850 if (error) 851 return error; 852 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 853 *aglen, flags); 854 if (error) 855 return error; 856 857 /* No left or right extent to merge; exit. */ 858 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 859 return 0; 860 861 cequal = (cleft.rc_startblock == cright.rc_startblock) && 862 (cleft.rc_blockcount == cright.rc_blockcount); 863 864 /* Try to merge left, cleft, and right. cleft must == cright. */ 865 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 866 right.rc_blockcount; 867 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 868 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 869 left.rc_refcount == cleft.rc_refcount + adjust && 870 right.rc_refcount == cleft.rc_refcount + adjust && 871 ulen < MAXREFCEXTLEN) { 872 *shape_changed = true; 873 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 874 &right, ulen, aglen); 875 } 876 877 /* Try to merge left and cleft. */ 878 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 879 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 880 left.rc_refcount == cleft.rc_refcount + adjust && 881 ulen < MAXREFCEXTLEN) { 882 *shape_changed = true; 883 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 884 agbno, aglen); 885 if (error) 886 return error; 887 888 /* 889 * If we just merged left + cleft and cleft == cright, 890 * we no longer have a cright to merge with right. We're done. 891 */ 892 if (cequal) 893 return 0; 894 } 895 896 /* Try to merge cright and right. */ 897 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 898 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 899 right.rc_refcount == cright.rc_refcount + adjust && 900 ulen < MAXREFCEXTLEN) { 901 *shape_changed = true; 902 return xfs_refcount_merge_right_extent(cur, &right, &cright, 903 aglen); 904 } 905 906 return error; 907 } 908 909 /* 910 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 911 * true incorrectly is a shutdown FS; the penalty for guessing false 912 * incorrectly is more transaction rolls than might be necessary. 913 * Be conservative here. 914 */ 915 static bool 916 xfs_refcount_still_have_space( 917 struct xfs_btree_cur *cur) 918 { 919 unsigned long overhead; 920 921 /* 922 * Worst case estimate: full splits of the free space and rmap btrees 923 * to handle each of the shape changes to the refcount btree. 924 */ 925 overhead = xfs_allocfree_block_count(cur->bc_mp, 926 cur->bc_ag.refc.shape_changes); 927 overhead += cur->bc_mp->m_refc_maxlevels; 928 overhead *= cur->bc_mp->m_sb.sb_blocksize; 929 930 /* 931 * Only allow 2 refcount extent updates per transaction if the 932 * refcount continue update "error" has been injected. 933 */ 934 if (cur->bc_ag.refc.nr_ops > 2 && 935 XFS_TEST_ERROR(false, cur->bc_mp, 936 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 937 return false; 938 939 if (cur->bc_ag.refc.nr_ops == 0) 940 return true; 941 else if (overhead > cur->bc_tp->t_log_res) 942 return false; 943 return cur->bc_tp->t_log_res - overhead > 944 cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 945 } 946 947 /* 948 * Adjust the refcounts of middle extents. At this point we should have 949 * split extents that crossed the adjustment range; merged with adjacent 950 * extents; and updated agbno/aglen to reflect the merges. Therefore, 951 * all we have to do is update the extents inside [agbno, agbno + aglen]. 952 */ 953 STATIC int 954 xfs_refcount_adjust_extents( 955 struct xfs_btree_cur *cur, 956 xfs_agblock_t *agbno, 957 xfs_extlen_t *aglen, 958 enum xfs_refc_adjust_op adj) 959 { 960 struct xfs_refcount_irec ext, tmp; 961 int error; 962 int found_rec, found_tmp; 963 xfs_fsblock_t fsbno; 964 965 /* Merging did all the work already. */ 966 if (*aglen == 0) 967 return 0; 968 969 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno, 970 &found_rec); 971 if (error) 972 goto out_error; 973 974 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 975 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 976 if (error) 977 goto out_error; 978 if (!found_rec) { 979 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 980 ext.rc_blockcount = 0; 981 ext.rc_refcount = 0; 982 ext.rc_domain = XFS_REFC_DOMAIN_SHARED; 983 } 984 985 /* 986 * Deal with a hole in the refcount tree; if a file maps to 987 * these blocks and there's no refcountbt record, pretend that 988 * there is one with refcount == 1. 989 */ 990 if (ext.rc_startblock != *agbno) { 991 tmp.rc_startblock = *agbno; 992 tmp.rc_blockcount = min(*aglen, 993 ext.rc_startblock - *agbno); 994 tmp.rc_refcount = 1 + adj; 995 tmp.rc_domain = XFS_REFC_DOMAIN_SHARED; 996 997 trace_xfs_refcount_modify_extent(cur->bc_mp, 998 cur->bc_ag.pag->pag_agno, &tmp); 999 1000 /* 1001 * Either cover the hole (increment) or 1002 * delete the range (decrement). 1003 */ 1004 cur->bc_ag.refc.nr_ops++; 1005 if (tmp.rc_refcount) { 1006 error = xfs_refcount_insert(cur, &tmp, 1007 &found_tmp); 1008 if (error) 1009 goto out_error; 1010 if (XFS_IS_CORRUPT(cur->bc_mp, 1011 found_tmp != 1)) { 1012 error = -EFSCORRUPTED; 1013 goto out_error; 1014 } 1015 } else { 1016 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1017 cur->bc_ag.pag->pag_agno, 1018 tmp.rc_startblock); 1019 xfs_free_extent_later(cur->bc_tp, fsbno, 1020 tmp.rc_blockcount, NULL); 1021 } 1022 1023 (*agbno) += tmp.rc_blockcount; 1024 (*aglen) -= tmp.rc_blockcount; 1025 1026 /* Stop if there's nothing left to modify */ 1027 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 1028 break; 1029 1030 /* Move the cursor to the start of ext. */ 1031 error = xfs_refcount_lookup_ge(cur, 1032 XFS_REFC_DOMAIN_SHARED, *agbno, 1033 &found_rec); 1034 if (error) 1035 goto out_error; 1036 } 1037 1038 /* 1039 * A previous step trimmed agbno/aglen such that the end of the 1040 * range would not be in the middle of the record. If this is 1041 * no longer the case, something is seriously wrong with the 1042 * btree. Make sure we never feed the synthesized record into 1043 * the processing loop below. 1044 */ 1045 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || 1046 XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { 1047 error = -EFSCORRUPTED; 1048 goto out_error; 1049 } 1050 1051 /* 1052 * Adjust the reference count and either update the tree 1053 * (incr) or free the blocks (decr). 1054 */ 1055 if (ext.rc_refcount == MAXREFCOUNT) 1056 goto skip; 1057 ext.rc_refcount += adj; 1058 trace_xfs_refcount_modify_extent(cur->bc_mp, 1059 cur->bc_ag.pag->pag_agno, &ext); 1060 cur->bc_ag.refc.nr_ops++; 1061 if (ext.rc_refcount > 1) { 1062 error = xfs_refcount_update(cur, &ext); 1063 if (error) 1064 goto out_error; 1065 } else if (ext.rc_refcount == 1) { 1066 error = xfs_refcount_delete(cur, &found_rec); 1067 if (error) 1068 goto out_error; 1069 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1070 error = -EFSCORRUPTED; 1071 goto out_error; 1072 } 1073 goto advloop; 1074 } else { 1075 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 1076 cur->bc_ag.pag->pag_agno, 1077 ext.rc_startblock); 1078 xfs_free_extent_later(cur->bc_tp, fsbno, 1079 ext.rc_blockcount, NULL); 1080 } 1081 1082 skip: 1083 error = xfs_btree_increment(cur, 0, &found_rec); 1084 if (error) 1085 goto out_error; 1086 1087 advloop: 1088 (*agbno) += ext.rc_blockcount; 1089 (*aglen) -= ext.rc_blockcount; 1090 } 1091 1092 return error; 1093 out_error: 1094 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1095 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1096 return error; 1097 } 1098 1099 /* Adjust the reference count of a range of AG blocks. */ 1100 STATIC int 1101 xfs_refcount_adjust( 1102 struct xfs_btree_cur *cur, 1103 xfs_agblock_t agbno, 1104 xfs_extlen_t aglen, 1105 xfs_agblock_t *new_agbno, 1106 xfs_extlen_t *new_aglen, 1107 enum xfs_refc_adjust_op adj) 1108 { 1109 bool shape_changed; 1110 int shape_changes = 0; 1111 int error; 1112 1113 *new_agbno = agbno; 1114 *new_aglen = aglen; 1115 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 1116 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1117 agbno, aglen); 1118 else 1119 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1120 agbno, aglen); 1121 1122 /* 1123 * Ensure that no rcextents cross the boundary of the adjustment range. 1124 */ 1125 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1126 agbno, &shape_changed); 1127 if (error) 1128 goto out_error; 1129 if (shape_changed) 1130 shape_changes++; 1131 1132 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, 1133 agbno + aglen, &shape_changed); 1134 if (error) 1135 goto out_error; 1136 if (shape_changed) 1137 shape_changes++; 1138 1139 /* 1140 * Try to merge with the left or right extents of the range. 1141 */ 1142 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1143 XFS_FIND_RCEXT_SHARED, &shape_changed); 1144 if (error) 1145 goto out_error; 1146 if (shape_changed) 1147 shape_changes++; 1148 if (shape_changes) 1149 cur->bc_ag.refc.shape_changes++; 1150 1151 /* Now that we've taken care of the ends, adjust the middle extents */ 1152 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); 1153 if (error) 1154 goto out_error; 1155 1156 return 0; 1157 1158 out_error: 1159 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1160 error, _RET_IP_); 1161 return error; 1162 } 1163 1164 /* Clean up after calling xfs_refcount_finish_one. */ 1165 void 1166 xfs_refcount_finish_one_cleanup( 1167 struct xfs_trans *tp, 1168 struct xfs_btree_cur *rcur, 1169 int error) 1170 { 1171 struct xfs_buf *agbp; 1172 1173 if (rcur == NULL) 1174 return; 1175 agbp = rcur->bc_ag.agbp; 1176 xfs_btree_del_cursor(rcur, error); 1177 if (error) 1178 xfs_trans_brelse(tp, agbp); 1179 } 1180 1181 /* 1182 * Set up a continuation a deferred refcount operation by updating the intent. 1183 * Checks to make sure we're not going to run off the end of the AG. 1184 */ 1185 static inline int 1186 xfs_refcount_continue_op( 1187 struct xfs_btree_cur *cur, 1188 xfs_fsblock_t startblock, 1189 xfs_agblock_t new_agbno, 1190 xfs_extlen_t new_len, 1191 xfs_fsblock_t *new_fsbno) 1192 { 1193 struct xfs_mount *mp = cur->bc_mp; 1194 struct xfs_perag *pag = cur->bc_ag.pag; 1195 1196 if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) 1197 return -EFSCORRUPTED; 1198 1199 *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); 1200 1201 ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); 1202 ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); 1203 1204 return 0; 1205 } 1206 1207 /* 1208 * Process one of the deferred refcount operations. We pass back the 1209 * btree cursor to maintain our lock on the btree between calls. 1210 * This saves time and eliminates a buffer deadlock between the 1211 * superblock and the AGF because we'll always grab them in the same 1212 * order. 1213 */ 1214 int 1215 xfs_refcount_finish_one( 1216 struct xfs_trans *tp, 1217 enum xfs_refcount_intent_type type, 1218 xfs_fsblock_t startblock, 1219 xfs_extlen_t blockcount, 1220 xfs_fsblock_t *new_fsb, 1221 xfs_extlen_t *new_len, 1222 struct xfs_btree_cur **pcur) 1223 { 1224 struct xfs_mount *mp = tp->t_mountp; 1225 struct xfs_btree_cur *rcur; 1226 struct xfs_buf *agbp = NULL; 1227 int error = 0; 1228 xfs_agblock_t bno; 1229 xfs_agblock_t new_agbno; 1230 unsigned long nr_ops = 0; 1231 int shape_changes = 0; 1232 struct xfs_perag *pag; 1233 1234 pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); 1235 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1236 1237 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1238 type, XFS_FSB_TO_AGBNO(mp, startblock), 1239 blockcount); 1240 1241 if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { 1242 error = -EIO; 1243 goto out_drop; 1244 } 1245 1246 /* 1247 * If we haven't gotten a cursor or the cursor AG doesn't match 1248 * the startblock, get one now. 1249 */ 1250 rcur = *pcur; 1251 if (rcur != NULL && rcur->bc_ag.pag != pag) { 1252 nr_ops = rcur->bc_ag.refc.nr_ops; 1253 shape_changes = rcur->bc_ag.refc.shape_changes; 1254 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1255 rcur = NULL; 1256 *pcur = NULL; 1257 } 1258 if (rcur == NULL) { 1259 error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING, 1260 &agbp); 1261 if (error) 1262 goto out_drop; 1263 1264 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1265 rcur->bc_ag.refc.nr_ops = nr_ops; 1266 rcur->bc_ag.refc.shape_changes = shape_changes; 1267 } 1268 *pcur = rcur; 1269 1270 switch (type) { 1271 case XFS_REFCOUNT_INCREASE: 1272 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1273 new_len, XFS_REFCOUNT_ADJUST_INCREASE); 1274 if (error) 1275 goto out_drop; 1276 if (*new_len > 0) 1277 error = xfs_refcount_continue_op(rcur, startblock, 1278 new_agbno, *new_len, new_fsb); 1279 break; 1280 case XFS_REFCOUNT_DECREASE: 1281 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1282 new_len, XFS_REFCOUNT_ADJUST_DECREASE); 1283 if (error) 1284 goto out_drop; 1285 if (*new_len > 0) 1286 error = xfs_refcount_continue_op(rcur, startblock, 1287 new_agbno, *new_len, new_fsb); 1288 break; 1289 case XFS_REFCOUNT_ALLOC_COW: 1290 *new_fsb = startblock + blockcount; 1291 *new_len = 0; 1292 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); 1293 break; 1294 case XFS_REFCOUNT_FREE_COW: 1295 *new_fsb = startblock + blockcount; 1296 *new_len = 0; 1297 error = __xfs_refcount_cow_free(rcur, bno, blockcount); 1298 break; 1299 default: 1300 ASSERT(0); 1301 error = -EFSCORRUPTED; 1302 } 1303 if (!error && *new_len > 0) 1304 trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, 1305 bno, blockcount, new_agbno, *new_len); 1306 out_drop: 1307 xfs_perag_put(pag); 1308 return error; 1309 } 1310 1311 /* 1312 * Record a refcount intent for later processing. 1313 */ 1314 static void 1315 __xfs_refcount_add( 1316 struct xfs_trans *tp, 1317 enum xfs_refcount_intent_type type, 1318 xfs_fsblock_t startblock, 1319 xfs_extlen_t blockcount) 1320 { 1321 struct xfs_refcount_intent *ri; 1322 1323 trace_xfs_refcount_defer(tp->t_mountp, 1324 XFS_FSB_TO_AGNO(tp->t_mountp, startblock), 1325 type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), 1326 blockcount); 1327 1328 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 1329 GFP_NOFS | __GFP_NOFAIL); 1330 INIT_LIST_HEAD(&ri->ri_list); 1331 ri->ri_type = type; 1332 ri->ri_startblock = startblock; 1333 ri->ri_blockcount = blockcount; 1334 1335 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1336 } 1337 1338 /* 1339 * Increase the reference count of the blocks backing a file's extent. 1340 */ 1341 void 1342 xfs_refcount_increase_extent( 1343 struct xfs_trans *tp, 1344 struct xfs_bmbt_irec *PREV) 1345 { 1346 if (!xfs_has_reflink(tp->t_mountp)) 1347 return; 1348 1349 __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock, 1350 PREV->br_blockcount); 1351 } 1352 1353 /* 1354 * Decrease the reference count of the blocks backing a file's extent. 1355 */ 1356 void 1357 xfs_refcount_decrease_extent( 1358 struct xfs_trans *tp, 1359 struct xfs_bmbt_irec *PREV) 1360 { 1361 if (!xfs_has_reflink(tp->t_mountp)) 1362 return; 1363 1364 __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock, 1365 PREV->br_blockcount); 1366 } 1367 1368 /* 1369 * Given an AG extent, find the lowest-numbered run of shared blocks 1370 * within that range and return the range in fbno/flen. If 1371 * find_end_of_shared is set, return the longest contiguous extent of 1372 * shared blocks; if not, just return the first extent we find. If no 1373 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1374 * and 0, respectively. 1375 */ 1376 int 1377 xfs_refcount_find_shared( 1378 struct xfs_btree_cur *cur, 1379 xfs_agblock_t agbno, 1380 xfs_extlen_t aglen, 1381 xfs_agblock_t *fbno, 1382 xfs_extlen_t *flen, 1383 bool find_end_of_shared) 1384 { 1385 struct xfs_refcount_irec tmp; 1386 int i; 1387 int have; 1388 int error; 1389 1390 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1391 agbno, aglen); 1392 1393 /* By default, skip the whole range */ 1394 *fbno = NULLAGBLOCK; 1395 *flen = 0; 1396 1397 /* Try to find a refcount extent that crosses the start */ 1398 error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno, 1399 &have); 1400 if (error) 1401 goto out_error; 1402 if (!have) { 1403 /* No left extent, look at the next one */ 1404 error = xfs_btree_increment(cur, 0, &have); 1405 if (error) 1406 goto out_error; 1407 if (!have) 1408 goto done; 1409 } 1410 error = xfs_refcount_get_rec(cur, &tmp, &i); 1411 if (error) 1412 goto out_error; 1413 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1414 error = -EFSCORRUPTED; 1415 goto out_error; 1416 } 1417 1418 /* If the extent ends before the start, look at the next one */ 1419 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1420 error = xfs_btree_increment(cur, 0, &have); 1421 if (error) 1422 goto out_error; 1423 if (!have) 1424 goto done; 1425 error = xfs_refcount_get_rec(cur, &tmp, &i); 1426 if (error) 1427 goto out_error; 1428 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1429 error = -EFSCORRUPTED; 1430 goto out_error; 1431 } 1432 } 1433 1434 /* If the extent starts after the range we want, bail out */ 1435 if (tmp.rc_startblock >= agbno + aglen) 1436 goto done; 1437 1438 /* We found the start of a shared extent! */ 1439 if (tmp.rc_startblock < agbno) { 1440 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1441 tmp.rc_startblock = agbno; 1442 } 1443 1444 *fbno = tmp.rc_startblock; 1445 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1446 if (!find_end_of_shared) 1447 goto done; 1448 1449 /* Otherwise, find the end of this shared extent */ 1450 while (*fbno + *flen < agbno + aglen) { 1451 error = xfs_btree_increment(cur, 0, &have); 1452 if (error) 1453 goto out_error; 1454 if (!have) 1455 break; 1456 error = xfs_refcount_get_rec(cur, &tmp, &i); 1457 if (error) 1458 goto out_error; 1459 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 1460 error = -EFSCORRUPTED; 1461 goto out_error; 1462 } 1463 if (tmp.rc_startblock >= agbno + aglen || 1464 tmp.rc_startblock != *fbno + *flen) 1465 break; 1466 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1467 } 1468 1469 done: 1470 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1471 cur->bc_ag.pag->pag_agno, *fbno, *flen); 1472 1473 out_error: 1474 if (error) 1475 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1476 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1477 return error; 1478 } 1479 1480 /* 1481 * Recovering CoW Blocks After a Crash 1482 * 1483 * Due to the way that the copy on write mechanism works, there's a window of 1484 * opportunity in which we can lose track of allocated blocks during a crash. 1485 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1486 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1487 * no longer in the free space btree but are not otherwise recorded anywhere 1488 * until the write completes and the blocks are mapped into the file. A crash 1489 * in between allocation and remapping results in the replacement blocks being 1490 * lost. This situation is exacerbated by the CoW extent size hint because 1491 * allocations can hang around for long time. 1492 * 1493 * However, there is a place where we can record these allocations before they 1494 * become mappings -- the reference count btree. The btree does not record 1495 * extents with refcount == 1, so we can record allocations with a refcount of 1496 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1497 * no conflict with shared block records. These mappings should be created 1498 * when we allocate blocks to the CoW fork and deleted when they're removed 1499 * from the CoW fork. 1500 * 1501 * Minor nit: records for in-progress CoW allocations and records for shared 1502 * extents must never be merged, to preserve the property that (except for CoW 1503 * allocations) there are no refcount btree entries with refcount == 1. The 1504 * only time this could potentially happen is when unsharing a block that's 1505 * adjacent to CoW allocations, so we must be careful to avoid this. 1506 * 1507 * At mount time we recover lost CoW allocations by searching the refcount 1508 * btree for these refcount == 1 mappings. These represent CoW allocations 1509 * that were in progress at the time the filesystem went down, so we can free 1510 * them to get the space back. 1511 * 1512 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1513 * several reasons -- first, EFIs pin the tail of the log and would have to be 1514 * periodically relogged to avoid filling up the log. Second, CoW completions 1515 * will have to file an EFD and create new EFIs for whatever remains in the 1516 * CoW fork; this partially takes care of (1) but extent-size reservations 1517 * will have to periodically relog even if there's no writeout in progress. 1518 * This can happen if the CoW extent size hint is set, which you really want. 1519 * Third, EFIs cannot currently be automatically relogged into newer 1520 * transactions to advance the log tail. Fourth, stuffing the log full of 1521 * EFIs places an upper bound on the number of CoW allocations that can be 1522 * held filesystem-wide at any given time. Recording them in the refcount 1523 * btree doesn't require us to maintain any state in memory and doesn't pin 1524 * the log. 1525 */ 1526 /* 1527 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1528 * in that they're not referenced anywhere else in the filesystem, so we 1529 * stash them in the refcount btree with a refcount of 1 until either file 1530 * remapping (or CoW cancellation) happens. 1531 */ 1532 STATIC int 1533 xfs_refcount_adjust_cow_extents( 1534 struct xfs_btree_cur *cur, 1535 xfs_agblock_t agbno, 1536 xfs_extlen_t aglen, 1537 enum xfs_refc_adjust_op adj) 1538 { 1539 struct xfs_refcount_irec ext, tmp; 1540 int error; 1541 int found_rec, found_tmp; 1542 1543 if (aglen == 0) 1544 return 0; 1545 1546 /* Find any overlapping refcount records */ 1547 error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno, 1548 &found_rec); 1549 if (error) 1550 goto out_error; 1551 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1552 if (error) 1553 goto out_error; 1554 if (!found_rec) { 1555 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 1556 ext.rc_blockcount = 0; 1557 ext.rc_refcount = 0; 1558 ext.rc_domain = XFS_REFC_DOMAIN_COW; 1559 } 1560 1561 switch (adj) { 1562 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1563 /* Adding a CoW reservation, there should be nothing here. */ 1564 if (XFS_IS_CORRUPT(cur->bc_mp, 1565 agbno + aglen > ext.rc_startblock)) { 1566 error = -EFSCORRUPTED; 1567 goto out_error; 1568 } 1569 1570 tmp.rc_startblock = agbno; 1571 tmp.rc_blockcount = aglen; 1572 tmp.rc_refcount = 1; 1573 tmp.rc_domain = XFS_REFC_DOMAIN_COW; 1574 1575 trace_xfs_refcount_modify_extent(cur->bc_mp, 1576 cur->bc_ag.pag->pag_agno, &tmp); 1577 1578 error = xfs_refcount_insert(cur, &tmp, 1579 &found_tmp); 1580 if (error) 1581 goto out_error; 1582 if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { 1583 error = -EFSCORRUPTED; 1584 goto out_error; 1585 } 1586 break; 1587 case XFS_REFCOUNT_ADJUST_COW_FREE: 1588 /* Removing a CoW reservation, there should be one extent. */ 1589 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { 1590 error = -EFSCORRUPTED; 1591 goto out_error; 1592 } 1593 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { 1594 error = -EFSCORRUPTED; 1595 goto out_error; 1596 } 1597 if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { 1598 error = -EFSCORRUPTED; 1599 goto out_error; 1600 } 1601 1602 ext.rc_refcount = 0; 1603 trace_xfs_refcount_modify_extent(cur->bc_mp, 1604 cur->bc_ag.pag->pag_agno, &ext); 1605 error = xfs_refcount_delete(cur, &found_rec); 1606 if (error) 1607 goto out_error; 1608 if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { 1609 error = -EFSCORRUPTED; 1610 goto out_error; 1611 } 1612 break; 1613 default: 1614 ASSERT(0); 1615 } 1616 1617 return error; 1618 out_error: 1619 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1620 cur->bc_ag.pag->pag_agno, error, _RET_IP_); 1621 return error; 1622 } 1623 1624 /* 1625 * Add or remove refcount btree entries for CoW reservations. 1626 */ 1627 STATIC int 1628 xfs_refcount_adjust_cow( 1629 struct xfs_btree_cur *cur, 1630 xfs_agblock_t agbno, 1631 xfs_extlen_t aglen, 1632 enum xfs_refc_adjust_op adj) 1633 { 1634 bool shape_changed; 1635 int error; 1636 1637 /* 1638 * Ensure that no rcextents cross the boundary of the adjustment range. 1639 */ 1640 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1641 agbno, &shape_changed); 1642 if (error) 1643 goto out_error; 1644 1645 error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW, 1646 agbno + aglen, &shape_changed); 1647 if (error) 1648 goto out_error; 1649 1650 /* 1651 * Try to merge with the left or right extents of the range. 1652 */ 1653 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1654 XFS_FIND_RCEXT_COW, &shape_changed); 1655 if (error) 1656 goto out_error; 1657 1658 /* Now that we've taken care of the ends, adjust the middle extents */ 1659 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1660 if (error) 1661 goto out_error; 1662 1663 return 0; 1664 1665 out_error: 1666 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1667 error, _RET_IP_); 1668 return error; 1669 } 1670 1671 /* 1672 * Record a CoW allocation in the refcount btree. 1673 */ 1674 STATIC int 1675 __xfs_refcount_cow_alloc( 1676 struct xfs_btree_cur *rcur, 1677 xfs_agblock_t agbno, 1678 xfs_extlen_t aglen) 1679 { 1680 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1681 agbno, aglen); 1682 1683 /* Add refcount btree reservation */ 1684 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1685 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1686 } 1687 1688 /* 1689 * Remove a CoW allocation from the refcount btree. 1690 */ 1691 STATIC int 1692 __xfs_refcount_cow_free( 1693 struct xfs_btree_cur *rcur, 1694 xfs_agblock_t agbno, 1695 xfs_extlen_t aglen) 1696 { 1697 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno, 1698 agbno, aglen); 1699 1700 /* Remove refcount btree reservation */ 1701 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1702 XFS_REFCOUNT_ADJUST_COW_FREE); 1703 } 1704 1705 /* Record a CoW staging extent in the refcount btree. */ 1706 void 1707 xfs_refcount_alloc_cow_extent( 1708 struct xfs_trans *tp, 1709 xfs_fsblock_t fsb, 1710 xfs_extlen_t len) 1711 { 1712 struct xfs_mount *mp = tp->t_mountp; 1713 1714 if (!xfs_has_reflink(mp)) 1715 return; 1716 1717 __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len); 1718 1719 /* Add rmap entry */ 1720 xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1721 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1722 } 1723 1724 /* Forget a CoW staging event in the refcount btree. */ 1725 void 1726 xfs_refcount_free_cow_extent( 1727 struct xfs_trans *tp, 1728 xfs_fsblock_t fsb, 1729 xfs_extlen_t len) 1730 { 1731 struct xfs_mount *mp = tp->t_mountp; 1732 1733 if (!xfs_has_reflink(mp)) 1734 return; 1735 1736 /* Remove rmap entry */ 1737 xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb), 1738 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1739 __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len); 1740 } 1741 1742 struct xfs_refcount_recovery { 1743 struct list_head rr_list; 1744 struct xfs_refcount_irec rr_rrec; 1745 }; 1746 1747 /* Stuff an extent on the recovery list. */ 1748 STATIC int 1749 xfs_refcount_recover_extent( 1750 struct xfs_btree_cur *cur, 1751 const union xfs_btree_rec *rec, 1752 void *priv) 1753 { 1754 struct list_head *debris = priv; 1755 struct xfs_refcount_recovery *rr; 1756 1757 if (XFS_IS_CORRUPT(cur->bc_mp, 1758 be32_to_cpu(rec->refc.rc_refcount) != 1)) 1759 return -EFSCORRUPTED; 1760 1761 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0); 1762 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1763 list_add_tail(&rr->rr_list, debris); 1764 1765 return 0; 1766 } 1767 1768 /* Find and remove leftover CoW reservations. */ 1769 int 1770 xfs_refcount_recover_cow_leftovers( 1771 struct xfs_mount *mp, 1772 struct xfs_perag *pag) 1773 { 1774 struct xfs_trans *tp; 1775 struct xfs_btree_cur *cur; 1776 struct xfs_buf *agbp; 1777 struct xfs_refcount_recovery *rr, *n; 1778 struct list_head debris; 1779 union xfs_btree_irec low; 1780 union xfs_btree_irec high; 1781 xfs_fsblock_t fsb; 1782 int error; 1783 1784 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1785 return -EOPNOTSUPP; 1786 1787 INIT_LIST_HEAD(&debris); 1788 1789 /* 1790 * In this first part, we use an empty transaction to gather up 1791 * all the leftover CoW extents so that we can subsequently 1792 * delete them. The empty transaction is used to avoid 1793 * a buffer lock deadlock if there happens to be a loop in the 1794 * refcountbt because we're allowed to re-grab a buffer that is 1795 * already attached to our transaction. When we're done 1796 * recording the CoW debris we cancel the (empty) transaction 1797 * and everything goes away cleanly. 1798 */ 1799 error = xfs_trans_alloc_empty(mp, &tp); 1800 if (error) 1801 return error; 1802 1803 error = xfs_alloc_read_agf(pag, tp, 0, &agbp); 1804 if (error) 1805 goto out_trans; 1806 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); 1807 1808 /* Find all the leftover CoW staging extents. */ 1809 memset(&low, 0, sizeof(low)); 1810 memset(&high, 0, sizeof(high)); 1811 low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; 1812 high.rc.rc_startblock = -1U; 1813 error = xfs_btree_query_range(cur, &low, &high, 1814 xfs_refcount_recover_extent, &debris); 1815 xfs_btree_del_cursor(cur, error); 1816 xfs_trans_brelse(tp, agbp); 1817 xfs_trans_cancel(tp); 1818 if (error) 1819 goto out_free; 1820 1821 /* Now iterate the list to free the leftovers */ 1822 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1823 /* Set up transaction. */ 1824 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1825 if (error) 1826 goto out_free; 1827 1828 trace_xfs_refcount_recover_extent(mp, pag->pag_agno, 1829 &rr->rr_rrec); 1830 1831 /* Free the orphan record */ 1832 fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno, 1833 rr->rr_rrec.rc_startblock); 1834 xfs_refcount_free_cow_extent(tp, fsb, 1835 rr->rr_rrec.rc_blockcount); 1836 1837 /* Free the block. */ 1838 xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); 1839 1840 error = xfs_trans_commit(tp); 1841 if (error) 1842 goto out_free; 1843 1844 list_del(&rr->rr_list); 1845 kmem_free(rr); 1846 } 1847 1848 return error; 1849 out_trans: 1850 xfs_trans_cancel(tp); 1851 out_free: 1852 /* Free the leftover list */ 1853 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1854 list_del(&rr->rr_list); 1855 kmem_free(rr); 1856 } 1857 return error; 1858 } 1859 1860 /* Is there a record covering a given extent? */ 1861 int 1862 xfs_refcount_has_record( 1863 struct xfs_btree_cur *cur, 1864 enum xfs_refc_domain domain, 1865 xfs_agblock_t bno, 1866 xfs_extlen_t len, 1867 bool *exists) 1868 { 1869 union xfs_btree_irec low; 1870 union xfs_btree_irec high; 1871 1872 memset(&low, 0, sizeof(low)); 1873 low.rc.rc_startblock = bno; 1874 memset(&high, 0xFF, sizeof(high)); 1875 high.rc.rc_startblock = bno + len - 1; 1876 low.rc.rc_domain = high.rc.rc_domain = domain; 1877 1878 return xfs_btree_has_record(cur, &low, &high, exists); 1879 } 1880 1881 int __init 1882 xfs_refcount_intent_init_cache(void) 1883 { 1884 xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", 1885 sizeof(struct xfs_refcount_intent), 1886 0, 0, NULL); 1887 1888 return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; 1889 } 1890 1891 void 1892 xfs_refcount_intent_destroy_cache(void) 1893 { 1894 kmem_cache_destroy(xfs_refcount_intent_cache); 1895 xfs_refcount_intent_cache = NULL; 1896 } 1897