1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_log_format.h" 25 #include "xfs_trans_resv.h" 26 #include "xfs_sb.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_btree.h" 30 #include "xfs_bmap.h" 31 #include "xfs_refcount_btree.h" 32 #include "xfs_alloc.h" 33 #include "xfs_errortag.h" 34 #include "xfs_error.h" 35 #include "xfs_trace.h" 36 #include "xfs_cksum.h" 37 #include "xfs_trans.h" 38 #include "xfs_bit.h" 39 #include "xfs_refcount.h" 40 #include "xfs_rmap.h" 41 42 /* Allowable refcount adjustment amounts. */ 43 enum xfs_refc_adjust_op { 44 XFS_REFCOUNT_ADJUST_INCREASE = 1, 45 XFS_REFCOUNT_ADJUST_DECREASE = -1, 46 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 47 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 48 }; 49 50 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 51 xfs_agblock_t agbno, xfs_extlen_t aglen, 52 struct xfs_defer_ops *dfops); 53 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 54 xfs_agblock_t agbno, xfs_extlen_t aglen, 55 struct xfs_defer_ops *dfops); 56 57 /* 58 * Look up the first record less than or equal to [bno, len] in the btree 59 * given by cur. 60 */ 61 int 62 xfs_refcount_lookup_le( 63 struct xfs_btree_cur *cur, 64 xfs_agblock_t bno, 65 int *stat) 66 { 67 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 68 XFS_LOOKUP_LE); 69 cur->bc_rec.rc.rc_startblock = bno; 70 cur->bc_rec.rc.rc_blockcount = 0; 71 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 72 } 73 74 /* 75 * Look up the first record greater than or equal to [bno, len] in the btree 76 * given by cur. 77 */ 78 int 79 xfs_refcount_lookup_ge( 80 struct xfs_btree_cur *cur, 81 xfs_agblock_t bno, 82 int *stat) 83 { 84 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 85 XFS_LOOKUP_GE); 86 cur->bc_rec.rc.rc_startblock = bno; 87 cur->bc_rec.rc.rc_blockcount = 0; 88 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 89 } 90 91 /* 92 * Look up the first record equal to [bno, len] in the btree 93 * given by cur. 94 */ 95 int 96 xfs_refcount_lookup_eq( 97 struct xfs_btree_cur *cur, 98 xfs_agblock_t bno, 99 int *stat) 100 { 101 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 102 XFS_LOOKUP_LE); 103 cur->bc_rec.rc.rc_startblock = bno; 104 cur->bc_rec.rc.rc_blockcount = 0; 105 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 106 } 107 108 /* Convert on-disk record to in-core format. */ 109 void 110 xfs_refcount_btrec_to_irec( 111 union xfs_btree_rec *rec, 112 struct xfs_refcount_irec *irec) 113 { 114 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 115 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 116 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 117 } 118 119 /* 120 * Get the data from the pointed-to record. 121 */ 122 int 123 xfs_refcount_get_rec( 124 struct xfs_btree_cur *cur, 125 struct xfs_refcount_irec *irec, 126 int *stat) 127 { 128 union xfs_btree_rec *rec; 129 int error; 130 131 error = xfs_btree_get_rec(cur, &rec, stat); 132 if (!error && *stat == 1) { 133 xfs_refcount_btrec_to_irec(rec, irec); 134 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, 135 irec); 136 } 137 return error; 138 } 139 140 /* 141 * Update the record referred to by cur to the value given 142 * by [bno, len, refcount]. 143 * This either works (return 0) or gets an EFSCORRUPTED error. 144 */ 145 STATIC int 146 xfs_refcount_update( 147 struct xfs_btree_cur *cur, 148 struct xfs_refcount_irec *irec) 149 { 150 union xfs_btree_rec rec; 151 int error; 152 153 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); 154 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 155 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 156 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 157 error = xfs_btree_update(cur, &rec); 158 if (error) 159 trace_xfs_refcount_update_error(cur->bc_mp, 160 cur->bc_private.a.agno, error, _RET_IP_); 161 return error; 162 } 163 164 /* 165 * Insert the record referred to by cur to the value given 166 * by [bno, len, refcount]. 167 * This either works (return 0) or gets an EFSCORRUPTED error. 168 */ 169 int 170 xfs_refcount_insert( 171 struct xfs_btree_cur *cur, 172 struct xfs_refcount_irec *irec, 173 int *i) 174 { 175 int error; 176 177 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); 178 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 179 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 180 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 181 error = xfs_btree_insert(cur, i); 182 if (error) 183 goto out_error; 184 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 185 186 out_error: 187 if (error) 188 trace_xfs_refcount_insert_error(cur->bc_mp, 189 cur->bc_private.a.agno, error, _RET_IP_); 190 return error; 191 } 192 193 /* 194 * Remove the record referred to by cur, then set the pointer to the spot 195 * where the record could be re-inserted, in case we want to increment or 196 * decrement the cursor. 197 * This either works (return 0) or gets an EFSCORRUPTED error. 198 */ 199 STATIC int 200 xfs_refcount_delete( 201 struct xfs_btree_cur *cur, 202 int *i) 203 { 204 struct xfs_refcount_irec irec; 205 int found_rec; 206 int error; 207 208 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 209 if (error) 210 goto out_error; 211 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 212 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); 213 error = xfs_btree_delete(cur, i); 214 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 215 if (error) 216 goto out_error; 217 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 218 out_error: 219 if (error) 220 trace_xfs_refcount_delete_error(cur->bc_mp, 221 cur->bc_private.a.agno, error, _RET_IP_); 222 return error; 223 } 224 225 /* 226 * Adjusting the Reference Count 227 * 228 * As stated elsewhere, the reference count btree (refcbt) stores 229 * >1 reference counts for extents of physical blocks. In this 230 * operation, we're either raising or lowering the reference count of 231 * some subrange stored in the tree: 232 * 233 * <------ adjustment range ------> 234 * ----+ +---+-----+ +--+--------+--------- 235 * 2 | | 3 | 4 | |17| 55 | 10 236 * ----+ +---+-----+ +--+--------+--------- 237 * X axis is physical blocks number; 238 * reference counts are the numbers inside the rectangles 239 * 240 * The first thing we need to do is to ensure that there are no 241 * refcount extents crossing either boundary of the range to be 242 * adjusted. For any extent that does cross a boundary, split it into 243 * two extents so that we can increment the refcount of one of the 244 * pieces later: 245 * 246 * <------ adjustment range ------> 247 * ----+ +---+-----+ +--+--------+----+---- 248 * 2 | | 3 | 2 | |17| 55 | 10 | 10 249 * ----+ +---+-----+ +--+--------+----+---- 250 * 251 * For this next step, let's assume that all the physical blocks in 252 * the adjustment range are mapped to a file and are therefore in use 253 * at least once. Therefore, we can infer that any gap in the 254 * refcount tree within the adjustment range represents a physical 255 * extent with refcount == 1: 256 * 257 * <------ adjustment range ------> 258 * ----+---+---+-----+-+--+--------+----+---- 259 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 260 * ----+---+---+-----+-+--+--------+----+---- 261 * ^ 262 * 263 * For each extent that falls within the interval range, figure out 264 * which extent is to the left or the right of that extent. Now we 265 * have a left, current, and right extent. If the new reference count 266 * of the center extent enables us to merge left, center, and right 267 * into one record covering all three, do so. If the center extent is 268 * at the left end of the range, abuts the left extent, and its new 269 * reference count matches the left extent's record, then merge them. 270 * If the center extent is at the right end of the range, abuts the 271 * right extent, and the reference counts match, merge those. In the 272 * example, we can left merge (assuming an increment operation): 273 * 274 * <------ adjustment range ------> 275 * --------+---+-----+-+--+--------+----+---- 276 * 2 | 3 | 2 |1|17| 55 | 10 | 10 277 * --------+---+-----+-+--+--------+----+---- 278 * ^ 279 * 280 * For all other extents within the range, adjust the reference count 281 * or delete it if the refcount falls below 2. If we were 282 * incrementing, the end result looks like this: 283 * 284 * <------ adjustment range ------> 285 * --------+---+-----+-+--+--------+----+---- 286 * 2 | 4 | 3 |2|18| 56 | 11 | 10 287 * --------+---+-----+-+--+--------+----+---- 288 * 289 * The result of a decrement operation looks as such: 290 * 291 * <------ adjustment range ------> 292 * ----+ +---+ +--+--------+----+---- 293 * 2 | | 2 | |16| 54 | 9 | 10 294 * ----+ +---+ +--+--------+----+---- 295 * DDDD 111111DD 296 * 297 * The blocks marked "D" are freed; the blocks marked "1" are only 298 * referenced once and therefore the record is removed from the 299 * refcount btree. 300 */ 301 302 /* Next block after this extent. */ 303 static inline xfs_agblock_t 304 xfs_refc_next( 305 struct xfs_refcount_irec *rc) 306 { 307 return rc->rc_startblock + rc->rc_blockcount; 308 } 309 310 /* 311 * Split a refcount extent that crosses agbno. 312 */ 313 STATIC int 314 xfs_refcount_split_extent( 315 struct xfs_btree_cur *cur, 316 xfs_agblock_t agbno, 317 bool *shape_changed) 318 { 319 struct xfs_refcount_irec rcext, tmp; 320 int found_rec; 321 int error; 322 323 *shape_changed = false; 324 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 325 if (error) 326 goto out_error; 327 if (!found_rec) 328 return 0; 329 330 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 331 if (error) 332 goto out_error; 333 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 334 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 335 return 0; 336 337 *shape_changed = true; 338 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, 339 &rcext, agbno); 340 341 /* Establish the right extent. */ 342 tmp = rcext; 343 tmp.rc_startblock = agbno; 344 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 345 error = xfs_refcount_update(cur, &tmp); 346 if (error) 347 goto out_error; 348 349 /* Insert the left extent. */ 350 tmp = rcext; 351 tmp.rc_blockcount = agbno - rcext.rc_startblock; 352 error = xfs_refcount_insert(cur, &tmp, &found_rec); 353 if (error) 354 goto out_error; 355 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 356 return error; 357 358 out_error: 359 trace_xfs_refcount_split_extent_error(cur->bc_mp, 360 cur->bc_private.a.agno, error, _RET_IP_); 361 return error; 362 } 363 364 /* 365 * Merge the left, center, and right extents. 366 */ 367 STATIC int 368 xfs_refcount_merge_center_extents( 369 struct xfs_btree_cur *cur, 370 struct xfs_refcount_irec *left, 371 struct xfs_refcount_irec *center, 372 struct xfs_refcount_irec *right, 373 unsigned long long extlen, 374 xfs_extlen_t *aglen) 375 { 376 int error; 377 int found_rec; 378 379 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 380 cur->bc_private.a.agno, left, center, right); 381 382 /* 383 * Make sure the center and right extents are not in the btree. 384 * If the center extent was synthesized, the first delete call 385 * removes the right extent and we skip the second deletion. 386 * If center and right were in the btree, then the first delete 387 * call removes the center and the second one removes the right 388 * extent. 389 */ 390 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 391 &found_rec); 392 if (error) 393 goto out_error; 394 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 395 396 error = xfs_refcount_delete(cur, &found_rec); 397 if (error) 398 goto out_error; 399 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 400 401 if (center->rc_refcount > 1) { 402 error = xfs_refcount_delete(cur, &found_rec); 403 if (error) 404 goto out_error; 405 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 406 out_error); 407 } 408 409 /* Enlarge the left extent. */ 410 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 411 &found_rec); 412 if (error) 413 goto out_error; 414 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 415 416 left->rc_blockcount = extlen; 417 error = xfs_refcount_update(cur, left); 418 if (error) 419 goto out_error; 420 421 *aglen = 0; 422 return error; 423 424 out_error: 425 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 426 cur->bc_private.a.agno, error, _RET_IP_); 427 return error; 428 } 429 430 /* 431 * Merge with the left extent. 432 */ 433 STATIC int 434 xfs_refcount_merge_left_extent( 435 struct xfs_btree_cur *cur, 436 struct xfs_refcount_irec *left, 437 struct xfs_refcount_irec *cleft, 438 xfs_agblock_t *agbno, 439 xfs_extlen_t *aglen) 440 { 441 int error; 442 int found_rec; 443 444 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 445 cur->bc_private.a.agno, left, cleft); 446 447 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 448 if (cleft->rc_refcount > 1) { 449 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 450 &found_rec); 451 if (error) 452 goto out_error; 453 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 454 out_error); 455 456 error = xfs_refcount_delete(cur, &found_rec); 457 if (error) 458 goto out_error; 459 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 460 out_error); 461 } 462 463 /* Enlarge the left extent. */ 464 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 465 &found_rec); 466 if (error) 467 goto out_error; 468 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 469 470 left->rc_blockcount += cleft->rc_blockcount; 471 error = xfs_refcount_update(cur, left); 472 if (error) 473 goto out_error; 474 475 *agbno += cleft->rc_blockcount; 476 *aglen -= cleft->rc_blockcount; 477 return error; 478 479 out_error: 480 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 481 cur->bc_private.a.agno, error, _RET_IP_); 482 return error; 483 } 484 485 /* 486 * Merge with the right extent. 487 */ 488 STATIC int 489 xfs_refcount_merge_right_extent( 490 struct xfs_btree_cur *cur, 491 struct xfs_refcount_irec *right, 492 struct xfs_refcount_irec *cright, 493 xfs_extlen_t *aglen) 494 { 495 int error; 496 int found_rec; 497 498 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 499 cur->bc_private.a.agno, cright, right); 500 501 /* 502 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 503 * remove it. 504 */ 505 if (cright->rc_refcount > 1) { 506 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 507 &found_rec); 508 if (error) 509 goto out_error; 510 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 511 out_error); 512 513 error = xfs_refcount_delete(cur, &found_rec); 514 if (error) 515 goto out_error; 516 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 517 out_error); 518 } 519 520 /* Enlarge the right extent. */ 521 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 522 &found_rec); 523 if (error) 524 goto out_error; 525 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 526 527 right->rc_startblock -= cright->rc_blockcount; 528 right->rc_blockcount += cright->rc_blockcount; 529 error = xfs_refcount_update(cur, right); 530 if (error) 531 goto out_error; 532 533 *aglen -= cright->rc_blockcount; 534 return error; 535 536 out_error: 537 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 538 cur->bc_private.a.agno, error, _RET_IP_); 539 return error; 540 } 541 542 #define XFS_FIND_RCEXT_SHARED 1 543 #define XFS_FIND_RCEXT_COW 2 544 /* 545 * Find the left extent and the one after it (cleft). This function assumes 546 * that we've already split any extent crossing agbno. 547 */ 548 STATIC int 549 xfs_refcount_find_left_extents( 550 struct xfs_btree_cur *cur, 551 struct xfs_refcount_irec *left, 552 struct xfs_refcount_irec *cleft, 553 xfs_agblock_t agbno, 554 xfs_extlen_t aglen, 555 int flags) 556 { 557 struct xfs_refcount_irec tmp; 558 int error; 559 int found_rec; 560 561 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 562 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 563 if (error) 564 goto out_error; 565 if (!found_rec) 566 return 0; 567 568 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 569 if (error) 570 goto out_error; 571 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 572 573 if (xfs_refc_next(&tmp) != agbno) 574 return 0; 575 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 576 return 0; 577 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 578 return 0; 579 /* We have a left extent; retrieve (or invent) the next right one */ 580 *left = tmp; 581 582 error = xfs_btree_increment(cur, 0, &found_rec); 583 if (error) 584 goto out_error; 585 if (found_rec) { 586 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 587 if (error) 588 goto out_error; 589 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 590 out_error); 591 592 /* if tmp starts at the end of our range, just use that */ 593 if (tmp.rc_startblock == agbno) 594 *cleft = tmp; 595 else { 596 /* 597 * There's a gap in the refcntbt at the start of the 598 * range we're interested in (refcount == 1) so 599 * synthesize the implied extent and pass it back. 600 * We assume here that the agbno/aglen range was 601 * passed in from a data fork extent mapping and 602 * therefore is allocated to exactly one owner. 603 */ 604 cleft->rc_startblock = agbno; 605 cleft->rc_blockcount = min(aglen, 606 tmp.rc_startblock - agbno); 607 cleft->rc_refcount = 1; 608 } 609 } else { 610 /* 611 * No extents, so pretend that there's one covering the whole 612 * range. 613 */ 614 cleft->rc_startblock = agbno; 615 cleft->rc_blockcount = aglen; 616 cleft->rc_refcount = 1; 617 } 618 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, 619 left, cleft, agbno); 620 return error; 621 622 out_error: 623 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 624 cur->bc_private.a.agno, error, _RET_IP_); 625 return error; 626 } 627 628 /* 629 * Find the right extent and the one before it (cright). This function 630 * assumes that we've already split any extents crossing agbno + aglen. 631 */ 632 STATIC int 633 xfs_refcount_find_right_extents( 634 struct xfs_btree_cur *cur, 635 struct xfs_refcount_irec *right, 636 struct xfs_refcount_irec *cright, 637 xfs_agblock_t agbno, 638 xfs_extlen_t aglen, 639 int flags) 640 { 641 struct xfs_refcount_irec tmp; 642 int error; 643 int found_rec; 644 645 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 646 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 647 if (error) 648 goto out_error; 649 if (!found_rec) 650 return 0; 651 652 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 653 if (error) 654 goto out_error; 655 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 656 657 if (tmp.rc_startblock != agbno + aglen) 658 return 0; 659 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 660 return 0; 661 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 662 return 0; 663 /* We have a right extent; retrieve (or invent) the next left one */ 664 *right = tmp; 665 666 error = xfs_btree_decrement(cur, 0, &found_rec); 667 if (error) 668 goto out_error; 669 if (found_rec) { 670 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 671 if (error) 672 goto out_error; 673 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 674 out_error); 675 676 /* if tmp ends at the end of our range, just use that */ 677 if (xfs_refc_next(&tmp) == agbno + aglen) 678 *cright = tmp; 679 else { 680 /* 681 * There's a gap in the refcntbt at the end of the 682 * range we're interested in (refcount == 1) so 683 * create the implied extent and pass it back. 684 * We assume here that the agbno/aglen range was 685 * passed in from a data fork extent mapping and 686 * therefore is allocated to exactly one owner. 687 */ 688 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 689 cright->rc_blockcount = right->rc_startblock - 690 cright->rc_startblock; 691 cright->rc_refcount = 1; 692 } 693 } else { 694 /* 695 * No extents, so pretend that there's one covering the whole 696 * range. 697 */ 698 cright->rc_startblock = agbno; 699 cright->rc_blockcount = aglen; 700 cright->rc_refcount = 1; 701 } 702 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, 703 cright, right, agbno + aglen); 704 return error; 705 706 out_error: 707 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 708 cur->bc_private.a.agno, error, _RET_IP_); 709 return error; 710 } 711 712 /* Is this extent valid? */ 713 static inline bool 714 xfs_refc_valid( 715 struct xfs_refcount_irec *rc) 716 { 717 return rc->rc_startblock != NULLAGBLOCK; 718 } 719 720 /* 721 * Try to merge with any extents on the boundaries of the adjustment range. 722 */ 723 STATIC int 724 xfs_refcount_merge_extents( 725 struct xfs_btree_cur *cur, 726 xfs_agblock_t *agbno, 727 xfs_extlen_t *aglen, 728 enum xfs_refc_adjust_op adjust, 729 int flags, 730 bool *shape_changed) 731 { 732 struct xfs_refcount_irec left = {0}, cleft = {0}; 733 struct xfs_refcount_irec cright = {0}, right = {0}; 734 int error; 735 unsigned long long ulen; 736 bool cequal; 737 738 *shape_changed = false; 739 /* 740 * Find the extent just below agbno [left], just above agbno [cleft], 741 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 742 * [right]. 743 */ 744 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 745 *aglen, flags); 746 if (error) 747 return error; 748 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 749 *aglen, flags); 750 if (error) 751 return error; 752 753 /* No left or right extent to merge; exit. */ 754 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 755 return 0; 756 757 cequal = (cleft.rc_startblock == cright.rc_startblock) && 758 (cleft.rc_blockcount == cright.rc_blockcount); 759 760 /* Try to merge left, cleft, and right. cleft must == cright. */ 761 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 762 right.rc_blockcount; 763 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 764 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 765 left.rc_refcount == cleft.rc_refcount + adjust && 766 right.rc_refcount == cleft.rc_refcount + adjust && 767 ulen < MAXREFCEXTLEN) { 768 *shape_changed = true; 769 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 770 &right, ulen, aglen); 771 } 772 773 /* Try to merge left and cleft. */ 774 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 775 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 776 left.rc_refcount == cleft.rc_refcount + adjust && 777 ulen < MAXREFCEXTLEN) { 778 *shape_changed = true; 779 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 780 agbno, aglen); 781 if (error) 782 return error; 783 784 /* 785 * If we just merged left + cleft and cleft == cright, 786 * we no longer have a cright to merge with right. We're done. 787 */ 788 if (cequal) 789 return 0; 790 } 791 792 /* Try to merge cright and right. */ 793 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 794 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 795 right.rc_refcount == cright.rc_refcount + adjust && 796 ulen < MAXREFCEXTLEN) { 797 *shape_changed = true; 798 return xfs_refcount_merge_right_extent(cur, &right, &cright, 799 aglen); 800 } 801 802 return error; 803 } 804 805 /* 806 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 807 * true incorrectly is a shutdown FS; the penalty for guessing false 808 * incorrectly is more transaction rolls than might be necessary. 809 * Be conservative here. 810 */ 811 static bool 812 xfs_refcount_still_have_space( 813 struct xfs_btree_cur *cur) 814 { 815 unsigned long overhead; 816 817 overhead = cur->bc_private.a.priv.refc.shape_changes * 818 xfs_allocfree_log_count(cur->bc_mp, 1); 819 overhead *= cur->bc_mp->m_sb.sb_blocksize; 820 821 /* 822 * Only allow 2 refcount extent updates per transaction if the 823 * refcount continue update "error" has been injected. 824 */ 825 if (cur->bc_private.a.priv.refc.nr_ops > 2 && 826 XFS_TEST_ERROR(false, cur->bc_mp, 827 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 828 return false; 829 830 if (cur->bc_private.a.priv.refc.nr_ops == 0) 831 return true; 832 else if (overhead > cur->bc_tp->t_log_res) 833 return false; 834 return cur->bc_tp->t_log_res - overhead > 835 cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 836 } 837 838 /* 839 * Adjust the refcounts of middle extents. At this point we should have 840 * split extents that crossed the adjustment range; merged with adjacent 841 * extents; and updated agbno/aglen to reflect the merges. Therefore, 842 * all we have to do is update the extents inside [agbno, agbno + aglen]. 843 */ 844 STATIC int 845 xfs_refcount_adjust_extents( 846 struct xfs_btree_cur *cur, 847 xfs_agblock_t *agbno, 848 xfs_extlen_t *aglen, 849 enum xfs_refc_adjust_op adj, 850 struct xfs_defer_ops *dfops, 851 struct xfs_owner_info *oinfo) 852 { 853 struct xfs_refcount_irec ext, tmp; 854 int error; 855 int found_rec, found_tmp; 856 xfs_fsblock_t fsbno; 857 858 /* Merging did all the work already. */ 859 if (*aglen == 0) 860 return 0; 861 862 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 863 if (error) 864 goto out_error; 865 866 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 867 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 868 if (error) 869 goto out_error; 870 if (!found_rec) { 871 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 872 ext.rc_blockcount = 0; 873 ext.rc_refcount = 0; 874 } 875 876 /* 877 * Deal with a hole in the refcount tree; if a file maps to 878 * these blocks and there's no refcountbt record, pretend that 879 * there is one with refcount == 1. 880 */ 881 if (ext.rc_startblock != *agbno) { 882 tmp.rc_startblock = *agbno; 883 tmp.rc_blockcount = min(*aglen, 884 ext.rc_startblock - *agbno); 885 tmp.rc_refcount = 1 + adj; 886 trace_xfs_refcount_modify_extent(cur->bc_mp, 887 cur->bc_private.a.agno, &tmp); 888 889 /* 890 * Either cover the hole (increment) or 891 * delete the range (decrement). 892 */ 893 if (tmp.rc_refcount) { 894 error = xfs_refcount_insert(cur, &tmp, 895 &found_tmp); 896 if (error) 897 goto out_error; 898 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 899 found_tmp == 1, out_error); 900 cur->bc_private.a.priv.refc.nr_ops++; 901 } else { 902 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 903 cur->bc_private.a.agno, 904 tmp.rc_startblock); 905 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 906 tmp.rc_blockcount, oinfo); 907 } 908 909 (*agbno) += tmp.rc_blockcount; 910 (*aglen) -= tmp.rc_blockcount; 911 912 error = xfs_refcount_lookup_ge(cur, *agbno, 913 &found_rec); 914 if (error) 915 goto out_error; 916 } 917 918 /* Stop if there's nothing left to modify */ 919 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 920 break; 921 922 /* 923 * Adjust the reference count and either update the tree 924 * (incr) or free the blocks (decr). 925 */ 926 if (ext.rc_refcount == MAXREFCOUNT) 927 goto skip; 928 ext.rc_refcount += adj; 929 trace_xfs_refcount_modify_extent(cur->bc_mp, 930 cur->bc_private.a.agno, &ext); 931 if (ext.rc_refcount > 1) { 932 error = xfs_refcount_update(cur, &ext); 933 if (error) 934 goto out_error; 935 cur->bc_private.a.priv.refc.nr_ops++; 936 } else if (ext.rc_refcount == 1) { 937 error = xfs_refcount_delete(cur, &found_rec); 938 if (error) 939 goto out_error; 940 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 941 found_rec == 1, out_error); 942 cur->bc_private.a.priv.refc.nr_ops++; 943 goto advloop; 944 } else { 945 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 946 cur->bc_private.a.agno, 947 ext.rc_startblock); 948 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 949 ext.rc_blockcount, oinfo); 950 } 951 952 skip: 953 error = xfs_btree_increment(cur, 0, &found_rec); 954 if (error) 955 goto out_error; 956 957 advloop: 958 (*agbno) += ext.rc_blockcount; 959 (*aglen) -= ext.rc_blockcount; 960 } 961 962 return error; 963 out_error: 964 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 965 cur->bc_private.a.agno, error, _RET_IP_); 966 return error; 967 } 968 969 /* Adjust the reference count of a range of AG blocks. */ 970 STATIC int 971 xfs_refcount_adjust( 972 struct xfs_btree_cur *cur, 973 xfs_agblock_t agbno, 974 xfs_extlen_t aglen, 975 xfs_agblock_t *new_agbno, 976 xfs_extlen_t *new_aglen, 977 enum xfs_refc_adjust_op adj, 978 struct xfs_defer_ops *dfops, 979 struct xfs_owner_info *oinfo) 980 { 981 bool shape_changed; 982 int shape_changes = 0; 983 int error; 984 985 *new_agbno = agbno; 986 *new_aglen = aglen; 987 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 988 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, 989 agbno, aglen); 990 else 991 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, 992 agbno, aglen); 993 994 /* 995 * Ensure that no rcextents cross the boundary of the adjustment range. 996 */ 997 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 998 if (error) 999 goto out_error; 1000 if (shape_changed) 1001 shape_changes++; 1002 1003 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1004 if (error) 1005 goto out_error; 1006 if (shape_changed) 1007 shape_changes++; 1008 1009 /* 1010 * Try to merge with the left or right extents of the range. 1011 */ 1012 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1013 XFS_FIND_RCEXT_SHARED, &shape_changed); 1014 if (error) 1015 goto out_error; 1016 if (shape_changed) 1017 shape_changes++; 1018 if (shape_changes) 1019 cur->bc_private.a.priv.refc.shape_changes++; 1020 1021 /* Now that we've taken care of the ends, adjust the middle extents */ 1022 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1023 adj, dfops, oinfo); 1024 if (error) 1025 goto out_error; 1026 1027 return 0; 1028 1029 out_error: 1030 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, 1031 error, _RET_IP_); 1032 return error; 1033 } 1034 1035 /* Clean up after calling xfs_refcount_finish_one. */ 1036 void 1037 xfs_refcount_finish_one_cleanup( 1038 struct xfs_trans *tp, 1039 struct xfs_btree_cur *rcur, 1040 int error) 1041 { 1042 struct xfs_buf *agbp; 1043 1044 if (rcur == NULL) 1045 return; 1046 agbp = rcur->bc_private.a.agbp; 1047 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 1048 if (error) 1049 xfs_trans_brelse(tp, agbp); 1050 } 1051 1052 /* 1053 * Process one of the deferred refcount operations. We pass back the 1054 * btree cursor to maintain our lock on the btree between calls. 1055 * This saves time and eliminates a buffer deadlock between the 1056 * superblock and the AGF because we'll always grab them in the same 1057 * order. 1058 */ 1059 int 1060 xfs_refcount_finish_one( 1061 struct xfs_trans *tp, 1062 struct xfs_defer_ops *dfops, 1063 enum xfs_refcount_intent_type type, 1064 xfs_fsblock_t startblock, 1065 xfs_extlen_t blockcount, 1066 xfs_fsblock_t *new_fsb, 1067 xfs_extlen_t *new_len, 1068 struct xfs_btree_cur **pcur) 1069 { 1070 struct xfs_mount *mp = tp->t_mountp; 1071 struct xfs_btree_cur *rcur; 1072 struct xfs_buf *agbp = NULL; 1073 int error = 0; 1074 xfs_agnumber_t agno; 1075 xfs_agblock_t bno; 1076 xfs_agblock_t new_agbno; 1077 unsigned long nr_ops = 0; 1078 int shape_changes = 0; 1079 1080 agno = XFS_FSB_TO_AGNO(mp, startblock); 1081 ASSERT(agno != NULLAGNUMBER); 1082 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1083 1084 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1085 type, XFS_FSB_TO_AGBNO(mp, startblock), 1086 blockcount); 1087 1088 if (XFS_TEST_ERROR(false, mp, 1089 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1090 return -EIO; 1091 1092 /* 1093 * If we haven't gotten a cursor or the cursor AG doesn't match 1094 * the startblock, get one now. 1095 */ 1096 rcur = *pcur; 1097 if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1098 nr_ops = rcur->bc_private.a.priv.refc.nr_ops; 1099 shape_changes = rcur->bc_private.a.priv.refc.shape_changes; 1100 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1101 rcur = NULL; 1102 *pcur = NULL; 1103 } 1104 if (rcur == NULL) { 1105 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1106 XFS_ALLOC_FLAG_FREEING, &agbp); 1107 if (error) 1108 return error; 1109 if (!agbp) 1110 return -EFSCORRUPTED; 1111 1112 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops); 1113 if (!rcur) { 1114 error = -ENOMEM; 1115 goto out_cur; 1116 } 1117 rcur->bc_private.a.priv.refc.nr_ops = nr_ops; 1118 rcur->bc_private.a.priv.refc.shape_changes = shape_changes; 1119 } 1120 *pcur = rcur; 1121 1122 switch (type) { 1123 case XFS_REFCOUNT_INCREASE: 1124 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1125 new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL); 1126 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1127 break; 1128 case XFS_REFCOUNT_DECREASE: 1129 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1130 new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL); 1131 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1132 break; 1133 case XFS_REFCOUNT_ALLOC_COW: 1134 *new_fsb = startblock + blockcount; 1135 *new_len = 0; 1136 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops); 1137 break; 1138 case XFS_REFCOUNT_FREE_COW: 1139 *new_fsb = startblock + blockcount; 1140 *new_len = 0; 1141 error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops); 1142 break; 1143 default: 1144 ASSERT(0); 1145 error = -EFSCORRUPTED; 1146 } 1147 if (!error && *new_len > 0) 1148 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1149 bno, blockcount, new_agbno, *new_len); 1150 return error; 1151 1152 out_cur: 1153 xfs_trans_brelse(tp, agbp); 1154 1155 return error; 1156 } 1157 1158 /* 1159 * Record a refcount intent for later processing. 1160 */ 1161 static int 1162 __xfs_refcount_add( 1163 struct xfs_mount *mp, 1164 struct xfs_defer_ops *dfops, 1165 enum xfs_refcount_intent_type type, 1166 xfs_fsblock_t startblock, 1167 xfs_extlen_t blockcount) 1168 { 1169 struct xfs_refcount_intent *ri; 1170 1171 trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock), 1172 type, XFS_FSB_TO_AGBNO(mp, startblock), 1173 blockcount); 1174 1175 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1176 KM_SLEEP | KM_NOFS); 1177 INIT_LIST_HEAD(&ri->ri_list); 1178 ri->ri_type = type; 1179 ri->ri_startblock = startblock; 1180 ri->ri_blockcount = blockcount; 1181 1182 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1183 return 0; 1184 } 1185 1186 /* 1187 * Increase the reference count of the blocks backing a file's extent. 1188 */ 1189 int 1190 xfs_refcount_increase_extent( 1191 struct xfs_mount *mp, 1192 struct xfs_defer_ops *dfops, 1193 struct xfs_bmbt_irec *PREV) 1194 { 1195 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1196 return 0; 1197 1198 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE, 1199 PREV->br_startblock, PREV->br_blockcount); 1200 } 1201 1202 /* 1203 * Decrease the reference count of the blocks backing a file's extent. 1204 */ 1205 int 1206 xfs_refcount_decrease_extent( 1207 struct xfs_mount *mp, 1208 struct xfs_defer_ops *dfops, 1209 struct xfs_bmbt_irec *PREV) 1210 { 1211 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1212 return 0; 1213 1214 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE, 1215 PREV->br_startblock, PREV->br_blockcount); 1216 } 1217 1218 /* 1219 * Given an AG extent, find the lowest-numbered run of shared blocks 1220 * within that range and return the range in fbno/flen. If 1221 * find_end_of_shared is set, return the longest contiguous extent of 1222 * shared blocks; if not, just return the first extent we find. If no 1223 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1224 * and 0, respectively. 1225 */ 1226 int 1227 xfs_refcount_find_shared( 1228 struct xfs_btree_cur *cur, 1229 xfs_agblock_t agbno, 1230 xfs_extlen_t aglen, 1231 xfs_agblock_t *fbno, 1232 xfs_extlen_t *flen, 1233 bool find_end_of_shared) 1234 { 1235 struct xfs_refcount_irec tmp; 1236 int i; 1237 int have; 1238 int error; 1239 1240 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, 1241 agbno, aglen); 1242 1243 /* By default, skip the whole range */ 1244 *fbno = NULLAGBLOCK; 1245 *flen = 0; 1246 1247 /* Try to find a refcount extent that crosses the start */ 1248 error = xfs_refcount_lookup_le(cur, agbno, &have); 1249 if (error) 1250 goto out_error; 1251 if (!have) { 1252 /* No left extent, look at the next one */ 1253 error = xfs_btree_increment(cur, 0, &have); 1254 if (error) 1255 goto out_error; 1256 if (!have) 1257 goto done; 1258 } 1259 error = xfs_refcount_get_rec(cur, &tmp, &i); 1260 if (error) 1261 goto out_error; 1262 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1263 1264 /* If the extent ends before the start, look at the next one */ 1265 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1266 error = xfs_btree_increment(cur, 0, &have); 1267 if (error) 1268 goto out_error; 1269 if (!have) 1270 goto done; 1271 error = xfs_refcount_get_rec(cur, &tmp, &i); 1272 if (error) 1273 goto out_error; 1274 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1275 } 1276 1277 /* If the extent starts after the range we want, bail out */ 1278 if (tmp.rc_startblock >= agbno + aglen) 1279 goto done; 1280 1281 /* We found the start of a shared extent! */ 1282 if (tmp.rc_startblock < agbno) { 1283 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1284 tmp.rc_startblock = agbno; 1285 } 1286 1287 *fbno = tmp.rc_startblock; 1288 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1289 if (!find_end_of_shared) 1290 goto done; 1291 1292 /* Otherwise, find the end of this shared extent */ 1293 while (*fbno + *flen < agbno + aglen) { 1294 error = xfs_btree_increment(cur, 0, &have); 1295 if (error) 1296 goto out_error; 1297 if (!have) 1298 break; 1299 error = xfs_refcount_get_rec(cur, &tmp, &i); 1300 if (error) 1301 goto out_error; 1302 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1303 if (tmp.rc_startblock >= agbno + aglen || 1304 tmp.rc_startblock != *fbno + *flen) 1305 break; 1306 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1307 } 1308 1309 done: 1310 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1311 cur->bc_private.a.agno, *fbno, *flen); 1312 1313 out_error: 1314 if (error) 1315 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1316 cur->bc_private.a.agno, error, _RET_IP_); 1317 return error; 1318 } 1319 1320 /* 1321 * Recovering CoW Blocks After a Crash 1322 * 1323 * Due to the way that the copy on write mechanism works, there's a window of 1324 * opportunity in which we can lose track of allocated blocks during a crash. 1325 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1326 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1327 * no longer in the free space btree but are not otherwise recorded anywhere 1328 * until the write completes and the blocks are mapped into the file. A crash 1329 * in between allocation and remapping results in the replacement blocks being 1330 * lost. This situation is exacerbated by the CoW extent size hint because 1331 * allocations can hang around for long time. 1332 * 1333 * However, there is a place where we can record these allocations before they 1334 * become mappings -- the reference count btree. The btree does not record 1335 * extents with refcount == 1, so we can record allocations with a refcount of 1336 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1337 * no conflict with shared block records. These mappings should be created 1338 * when we allocate blocks to the CoW fork and deleted when they're removed 1339 * from the CoW fork. 1340 * 1341 * Minor nit: records for in-progress CoW allocations and records for shared 1342 * extents must never be merged, to preserve the property that (except for CoW 1343 * allocations) there are no refcount btree entries with refcount == 1. The 1344 * only time this could potentially happen is when unsharing a block that's 1345 * adjacent to CoW allocations, so we must be careful to avoid this. 1346 * 1347 * At mount time we recover lost CoW allocations by searching the refcount 1348 * btree for these refcount == 1 mappings. These represent CoW allocations 1349 * that were in progress at the time the filesystem went down, so we can free 1350 * them to get the space back. 1351 * 1352 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1353 * several reasons -- first, EFIs pin the tail of the log and would have to be 1354 * periodically relogged to avoid filling up the log. Second, CoW completions 1355 * will have to file an EFD and create new EFIs for whatever remains in the 1356 * CoW fork; this partially takes care of (1) but extent-size reservations 1357 * will have to periodically relog even if there's no writeout in progress. 1358 * This can happen if the CoW extent size hint is set, which you really want. 1359 * Third, EFIs cannot currently be automatically relogged into newer 1360 * transactions to advance the log tail. Fourth, stuffing the log full of 1361 * EFIs places an upper bound on the number of CoW allocations that can be 1362 * held filesystem-wide at any given time. Recording them in the refcount 1363 * btree doesn't require us to maintain any state in memory and doesn't pin 1364 * the log. 1365 */ 1366 /* 1367 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1368 * in that they're not referenced anywhere else in the filesystem, so we 1369 * stash them in the refcount btree with a refcount of 1 until either file 1370 * remapping (or CoW cancellation) happens. 1371 */ 1372 STATIC int 1373 xfs_refcount_adjust_cow_extents( 1374 struct xfs_btree_cur *cur, 1375 xfs_agblock_t agbno, 1376 xfs_extlen_t aglen, 1377 enum xfs_refc_adjust_op adj) 1378 { 1379 struct xfs_refcount_irec ext, tmp; 1380 int error; 1381 int found_rec, found_tmp; 1382 1383 if (aglen == 0) 1384 return 0; 1385 1386 /* Find any overlapping refcount records */ 1387 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1388 if (error) 1389 goto out_error; 1390 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1391 if (error) 1392 goto out_error; 1393 if (!found_rec) { 1394 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1395 XFS_REFC_COW_START; 1396 ext.rc_blockcount = 0; 1397 ext.rc_refcount = 0; 1398 } 1399 1400 switch (adj) { 1401 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1402 /* Adding a CoW reservation, there should be nothing here. */ 1403 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1404 ext.rc_startblock >= agbno + aglen, out_error); 1405 1406 tmp.rc_startblock = agbno; 1407 tmp.rc_blockcount = aglen; 1408 tmp.rc_refcount = 1; 1409 trace_xfs_refcount_modify_extent(cur->bc_mp, 1410 cur->bc_private.a.agno, &tmp); 1411 1412 error = xfs_refcount_insert(cur, &tmp, 1413 &found_tmp); 1414 if (error) 1415 goto out_error; 1416 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1417 found_tmp == 1, out_error); 1418 break; 1419 case XFS_REFCOUNT_ADJUST_COW_FREE: 1420 /* Removing a CoW reservation, there should be one extent. */ 1421 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1422 ext.rc_startblock == agbno, out_error); 1423 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1424 ext.rc_blockcount == aglen, out_error); 1425 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1426 ext.rc_refcount == 1, out_error); 1427 1428 ext.rc_refcount = 0; 1429 trace_xfs_refcount_modify_extent(cur->bc_mp, 1430 cur->bc_private.a.agno, &ext); 1431 error = xfs_refcount_delete(cur, &found_rec); 1432 if (error) 1433 goto out_error; 1434 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1435 found_rec == 1, out_error); 1436 break; 1437 default: 1438 ASSERT(0); 1439 } 1440 1441 return error; 1442 out_error: 1443 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1444 cur->bc_private.a.agno, error, _RET_IP_); 1445 return error; 1446 } 1447 1448 /* 1449 * Add or remove refcount btree entries for CoW reservations. 1450 */ 1451 STATIC int 1452 xfs_refcount_adjust_cow( 1453 struct xfs_btree_cur *cur, 1454 xfs_agblock_t agbno, 1455 xfs_extlen_t aglen, 1456 enum xfs_refc_adjust_op adj) 1457 { 1458 bool shape_changed; 1459 int error; 1460 1461 agbno += XFS_REFC_COW_START; 1462 1463 /* 1464 * Ensure that no rcextents cross the boundary of the adjustment range. 1465 */ 1466 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1467 if (error) 1468 goto out_error; 1469 1470 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1471 if (error) 1472 goto out_error; 1473 1474 /* 1475 * Try to merge with the left or right extents of the range. 1476 */ 1477 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1478 XFS_FIND_RCEXT_COW, &shape_changed); 1479 if (error) 1480 goto out_error; 1481 1482 /* Now that we've taken care of the ends, adjust the middle extents */ 1483 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1484 if (error) 1485 goto out_error; 1486 1487 return 0; 1488 1489 out_error: 1490 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, 1491 error, _RET_IP_); 1492 return error; 1493 } 1494 1495 /* 1496 * Record a CoW allocation in the refcount btree. 1497 */ 1498 STATIC int 1499 __xfs_refcount_cow_alloc( 1500 struct xfs_btree_cur *rcur, 1501 xfs_agblock_t agbno, 1502 xfs_extlen_t aglen, 1503 struct xfs_defer_ops *dfops) 1504 { 1505 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1506 agbno, aglen); 1507 1508 /* Add refcount btree reservation */ 1509 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1510 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1511 } 1512 1513 /* 1514 * Remove a CoW allocation from the refcount btree. 1515 */ 1516 STATIC int 1517 __xfs_refcount_cow_free( 1518 struct xfs_btree_cur *rcur, 1519 xfs_agblock_t agbno, 1520 xfs_extlen_t aglen, 1521 struct xfs_defer_ops *dfops) 1522 { 1523 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1524 agbno, aglen); 1525 1526 /* Remove refcount btree reservation */ 1527 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1528 XFS_REFCOUNT_ADJUST_COW_FREE); 1529 } 1530 1531 /* Record a CoW staging extent in the refcount btree. */ 1532 int 1533 xfs_refcount_alloc_cow_extent( 1534 struct xfs_mount *mp, 1535 struct xfs_defer_ops *dfops, 1536 xfs_fsblock_t fsb, 1537 xfs_extlen_t len) 1538 { 1539 int error; 1540 1541 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1542 return 0; 1543 1544 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1545 fsb, len); 1546 if (error) 1547 return error; 1548 1549 /* Add rmap entry */ 1550 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1551 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1552 } 1553 1554 /* Forget a CoW staging event in the refcount btree. */ 1555 int 1556 xfs_refcount_free_cow_extent( 1557 struct xfs_mount *mp, 1558 struct xfs_defer_ops *dfops, 1559 xfs_fsblock_t fsb, 1560 xfs_extlen_t len) 1561 { 1562 int error; 1563 1564 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1565 return 0; 1566 1567 /* Remove rmap entry */ 1568 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1569 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1570 if (error) 1571 return error; 1572 1573 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1574 fsb, len); 1575 } 1576 1577 struct xfs_refcount_recovery { 1578 struct list_head rr_list; 1579 struct xfs_refcount_irec rr_rrec; 1580 }; 1581 1582 /* Stuff an extent on the recovery list. */ 1583 STATIC int 1584 xfs_refcount_recover_extent( 1585 struct xfs_btree_cur *cur, 1586 union xfs_btree_rec *rec, 1587 void *priv) 1588 { 1589 struct list_head *debris = priv; 1590 struct xfs_refcount_recovery *rr; 1591 1592 if (be32_to_cpu(rec->refc.rc_refcount) != 1) 1593 return -EFSCORRUPTED; 1594 1595 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); 1596 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1597 list_add_tail(&rr->rr_list, debris); 1598 1599 return 0; 1600 } 1601 1602 /* Find and remove leftover CoW reservations. */ 1603 int 1604 xfs_refcount_recover_cow_leftovers( 1605 struct xfs_mount *mp, 1606 xfs_agnumber_t agno) 1607 { 1608 struct xfs_trans *tp; 1609 struct xfs_btree_cur *cur; 1610 struct xfs_buf *agbp; 1611 struct xfs_refcount_recovery *rr, *n; 1612 struct list_head debris; 1613 union xfs_btree_irec low; 1614 union xfs_btree_irec high; 1615 struct xfs_defer_ops dfops; 1616 xfs_fsblock_t fsb; 1617 xfs_agblock_t agbno; 1618 int error; 1619 1620 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1621 return -EOPNOTSUPP; 1622 1623 INIT_LIST_HEAD(&debris); 1624 1625 /* 1626 * In this first part, we use an empty transaction to gather up 1627 * all the leftover CoW extents so that we can subsequently 1628 * delete them. The empty transaction is used to avoid 1629 * a buffer lock deadlock if there happens to be a loop in the 1630 * refcountbt because we're allowed to re-grab a buffer that is 1631 * already attached to our transaction. When we're done 1632 * recording the CoW debris we cancel the (empty) transaction 1633 * and everything goes away cleanly. 1634 */ 1635 error = xfs_trans_alloc_empty(mp, &tp); 1636 if (error) 1637 return error; 1638 1639 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1640 if (error) 1641 goto out_trans; 1642 if (!agbp) { 1643 error = -ENOMEM; 1644 goto out_trans; 1645 } 1646 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); 1647 1648 /* Find all the leftover CoW staging extents. */ 1649 memset(&low, 0, sizeof(low)); 1650 memset(&high, 0, sizeof(high)); 1651 low.rc.rc_startblock = XFS_REFC_COW_START; 1652 high.rc.rc_startblock = -1U; 1653 error = xfs_btree_query_range(cur, &low, &high, 1654 xfs_refcount_recover_extent, &debris); 1655 if (error) 1656 goto out_cursor; 1657 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1658 xfs_trans_brelse(tp, agbp); 1659 xfs_trans_cancel(tp); 1660 1661 /* Now iterate the list to free the leftovers */ 1662 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1663 /* Set up transaction. */ 1664 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1665 if (error) 1666 goto out_free; 1667 1668 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1669 1670 /* Free the orphan record */ 1671 xfs_defer_init(&dfops, &fsb); 1672 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1673 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1674 error = xfs_refcount_free_cow_extent(mp, &dfops, fsb, 1675 rr->rr_rrec.rc_blockcount); 1676 if (error) 1677 goto out_defer; 1678 1679 /* Free the block. */ 1680 xfs_bmap_add_free(mp, &dfops, fsb, 1681 rr->rr_rrec.rc_blockcount, NULL); 1682 1683 error = xfs_defer_finish(&tp, &dfops); 1684 if (error) 1685 goto out_defer; 1686 1687 error = xfs_trans_commit(tp); 1688 if (error) 1689 goto out_free; 1690 1691 list_del(&rr->rr_list); 1692 kmem_free(rr); 1693 } 1694 1695 return error; 1696 out_defer: 1697 xfs_defer_cancel(&dfops); 1698 out_trans: 1699 xfs_trans_cancel(tp); 1700 out_free: 1701 /* Free the leftover list */ 1702 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1703 list_del(&rr->rr_list); 1704 kmem_free(rr); 1705 } 1706 return error; 1707 1708 out_cursor: 1709 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1710 xfs_trans_brelse(tp, agbp); 1711 goto out_trans; 1712 } 1713 1714 /* Is there a record covering a given extent? */ 1715 int 1716 xfs_refcount_has_record( 1717 struct xfs_btree_cur *cur, 1718 xfs_agblock_t bno, 1719 xfs_extlen_t len, 1720 bool *exists) 1721 { 1722 union xfs_btree_irec low; 1723 union xfs_btree_irec high; 1724 1725 memset(&low, 0, sizeof(low)); 1726 low.rc.rc_startblock = bno; 1727 memset(&high, 0xFF, sizeof(high)); 1728 high.rc.rc_startblock = bno + len - 1; 1729 1730 return xfs_btree_has_record(cur, &low, &high, exists); 1731 } 1732