1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_log_format.h" 25 #include "xfs_trans_resv.h" 26 #include "xfs_sb.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_btree.h" 30 #include "xfs_bmap.h" 31 #include "xfs_refcount_btree.h" 32 #include "xfs_alloc.h" 33 #include "xfs_error.h" 34 #include "xfs_trace.h" 35 #include "xfs_cksum.h" 36 #include "xfs_trans.h" 37 #include "xfs_bit.h" 38 #include "xfs_refcount.h" 39 #include "xfs_rmap.h" 40 41 /* Allowable refcount adjustment amounts. */ 42 enum xfs_refc_adjust_op { 43 XFS_REFCOUNT_ADJUST_INCREASE = 1, 44 XFS_REFCOUNT_ADJUST_DECREASE = -1, 45 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 46 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 47 }; 48 49 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 50 xfs_agblock_t agbno, xfs_extlen_t aglen, 51 struct xfs_defer_ops *dfops); 52 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 53 xfs_agblock_t agbno, xfs_extlen_t aglen, 54 struct xfs_defer_ops *dfops); 55 56 /* 57 * Look up the first record less than or equal to [bno, len] in the btree 58 * given by cur. 59 */ 60 int 61 xfs_refcount_lookup_le( 62 struct xfs_btree_cur *cur, 63 xfs_agblock_t bno, 64 int *stat) 65 { 66 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 67 XFS_LOOKUP_LE); 68 cur->bc_rec.rc.rc_startblock = bno; 69 cur->bc_rec.rc.rc_blockcount = 0; 70 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 71 } 72 73 /* 74 * Look up the first record greater than or equal to [bno, len] in the btree 75 * given by cur. 76 */ 77 int 78 xfs_refcount_lookup_ge( 79 struct xfs_btree_cur *cur, 80 xfs_agblock_t bno, 81 int *stat) 82 { 83 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 84 XFS_LOOKUP_GE); 85 cur->bc_rec.rc.rc_startblock = bno; 86 cur->bc_rec.rc.rc_blockcount = 0; 87 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 88 } 89 90 /* Convert on-disk record to in-core format. */ 91 static inline void 92 xfs_refcount_btrec_to_irec( 93 union xfs_btree_rec *rec, 94 struct xfs_refcount_irec *irec) 95 { 96 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 97 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 98 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 99 } 100 101 /* 102 * Get the data from the pointed-to record. 103 */ 104 int 105 xfs_refcount_get_rec( 106 struct xfs_btree_cur *cur, 107 struct xfs_refcount_irec *irec, 108 int *stat) 109 { 110 union xfs_btree_rec *rec; 111 int error; 112 113 error = xfs_btree_get_rec(cur, &rec, stat); 114 if (!error && *stat == 1) { 115 xfs_refcount_btrec_to_irec(rec, irec); 116 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, 117 irec); 118 } 119 return error; 120 } 121 122 /* 123 * Update the record referred to by cur to the value given 124 * by [bno, len, refcount]. 125 * This either works (return 0) or gets an EFSCORRUPTED error. 126 */ 127 STATIC int 128 xfs_refcount_update( 129 struct xfs_btree_cur *cur, 130 struct xfs_refcount_irec *irec) 131 { 132 union xfs_btree_rec rec; 133 int error; 134 135 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); 136 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 137 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 138 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 139 error = xfs_btree_update(cur, &rec); 140 if (error) 141 trace_xfs_refcount_update_error(cur->bc_mp, 142 cur->bc_private.a.agno, error, _RET_IP_); 143 return error; 144 } 145 146 /* 147 * Insert the record referred to by cur to the value given 148 * by [bno, len, refcount]. 149 * This either works (return 0) or gets an EFSCORRUPTED error. 150 */ 151 STATIC int 152 xfs_refcount_insert( 153 struct xfs_btree_cur *cur, 154 struct xfs_refcount_irec *irec, 155 int *i) 156 { 157 int error; 158 159 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); 160 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 161 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 162 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 163 error = xfs_btree_insert(cur, i); 164 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 165 out_error: 166 if (error) 167 trace_xfs_refcount_insert_error(cur->bc_mp, 168 cur->bc_private.a.agno, error, _RET_IP_); 169 return error; 170 } 171 172 /* 173 * Remove the record referred to by cur, then set the pointer to the spot 174 * where the record could be re-inserted, in case we want to increment or 175 * decrement the cursor. 176 * This either works (return 0) or gets an EFSCORRUPTED error. 177 */ 178 STATIC int 179 xfs_refcount_delete( 180 struct xfs_btree_cur *cur, 181 int *i) 182 { 183 struct xfs_refcount_irec irec; 184 int found_rec; 185 int error; 186 187 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 188 if (error) 189 goto out_error; 190 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 191 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); 192 error = xfs_btree_delete(cur, i); 193 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 194 if (error) 195 goto out_error; 196 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 197 out_error: 198 if (error) 199 trace_xfs_refcount_delete_error(cur->bc_mp, 200 cur->bc_private.a.agno, error, _RET_IP_); 201 return error; 202 } 203 204 /* 205 * Adjusting the Reference Count 206 * 207 * As stated elsewhere, the reference count btree (refcbt) stores 208 * >1 reference counts for extents of physical blocks. In this 209 * operation, we're either raising or lowering the reference count of 210 * some subrange stored in the tree: 211 * 212 * <------ adjustment range ------> 213 * ----+ +---+-----+ +--+--------+--------- 214 * 2 | | 3 | 4 | |17| 55 | 10 215 * ----+ +---+-----+ +--+--------+--------- 216 * X axis is physical blocks number; 217 * reference counts are the numbers inside the rectangles 218 * 219 * The first thing we need to do is to ensure that there are no 220 * refcount extents crossing either boundary of the range to be 221 * adjusted. For any extent that does cross a boundary, split it into 222 * two extents so that we can increment the refcount of one of the 223 * pieces later: 224 * 225 * <------ adjustment range ------> 226 * ----+ +---+-----+ +--+--------+----+---- 227 * 2 | | 3 | 2 | |17| 55 | 10 | 10 228 * ----+ +---+-----+ +--+--------+----+---- 229 * 230 * For this next step, let's assume that all the physical blocks in 231 * the adjustment range are mapped to a file and are therefore in use 232 * at least once. Therefore, we can infer that any gap in the 233 * refcount tree within the adjustment range represents a physical 234 * extent with refcount == 1: 235 * 236 * <------ adjustment range ------> 237 * ----+---+---+-----+-+--+--------+----+---- 238 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 239 * ----+---+---+-----+-+--+--------+----+---- 240 * ^ 241 * 242 * For each extent that falls within the interval range, figure out 243 * which extent is to the left or the right of that extent. Now we 244 * have a left, current, and right extent. If the new reference count 245 * of the center extent enables us to merge left, center, and right 246 * into one record covering all three, do so. If the center extent is 247 * at the left end of the range, abuts the left extent, and its new 248 * reference count matches the left extent's record, then merge them. 249 * If the center extent is at the right end of the range, abuts the 250 * right extent, and the reference counts match, merge those. In the 251 * example, we can left merge (assuming an increment operation): 252 * 253 * <------ adjustment range ------> 254 * --------+---+-----+-+--+--------+----+---- 255 * 2 | 3 | 2 |1|17| 55 | 10 | 10 256 * --------+---+-----+-+--+--------+----+---- 257 * ^ 258 * 259 * For all other extents within the range, adjust the reference count 260 * or delete it if the refcount falls below 2. If we were 261 * incrementing, the end result looks like this: 262 * 263 * <------ adjustment range ------> 264 * --------+---+-----+-+--+--------+----+---- 265 * 2 | 4 | 3 |2|18| 56 | 11 | 10 266 * --------+---+-----+-+--+--------+----+---- 267 * 268 * The result of a decrement operation looks as such: 269 * 270 * <------ adjustment range ------> 271 * ----+ +---+ +--+--------+----+---- 272 * 2 | | 2 | |16| 54 | 9 | 10 273 * ----+ +---+ +--+--------+----+---- 274 * DDDD 111111DD 275 * 276 * The blocks marked "D" are freed; the blocks marked "1" are only 277 * referenced once and therefore the record is removed from the 278 * refcount btree. 279 */ 280 281 /* Next block after this extent. */ 282 static inline xfs_agblock_t 283 xfs_refc_next( 284 struct xfs_refcount_irec *rc) 285 { 286 return rc->rc_startblock + rc->rc_blockcount; 287 } 288 289 /* 290 * Split a refcount extent that crosses agbno. 291 */ 292 STATIC int 293 xfs_refcount_split_extent( 294 struct xfs_btree_cur *cur, 295 xfs_agblock_t agbno, 296 bool *shape_changed) 297 { 298 struct xfs_refcount_irec rcext, tmp; 299 int found_rec; 300 int error; 301 302 *shape_changed = false; 303 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 304 if (error) 305 goto out_error; 306 if (!found_rec) 307 return 0; 308 309 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 310 if (error) 311 goto out_error; 312 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 313 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 314 return 0; 315 316 *shape_changed = true; 317 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, 318 &rcext, agbno); 319 320 /* Establish the right extent. */ 321 tmp = rcext; 322 tmp.rc_startblock = agbno; 323 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 324 error = xfs_refcount_update(cur, &tmp); 325 if (error) 326 goto out_error; 327 328 /* Insert the left extent. */ 329 tmp = rcext; 330 tmp.rc_blockcount = agbno - rcext.rc_startblock; 331 error = xfs_refcount_insert(cur, &tmp, &found_rec); 332 if (error) 333 goto out_error; 334 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 335 return error; 336 337 out_error: 338 trace_xfs_refcount_split_extent_error(cur->bc_mp, 339 cur->bc_private.a.agno, error, _RET_IP_); 340 return error; 341 } 342 343 /* 344 * Merge the left, center, and right extents. 345 */ 346 STATIC int 347 xfs_refcount_merge_center_extents( 348 struct xfs_btree_cur *cur, 349 struct xfs_refcount_irec *left, 350 struct xfs_refcount_irec *center, 351 struct xfs_refcount_irec *right, 352 unsigned long long extlen, 353 xfs_agblock_t *agbno, 354 xfs_extlen_t *aglen) 355 { 356 int error; 357 int found_rec; 358 359 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 360 cur->bc_private.a.agno, left, center, right); 361 362 /* 363 * Make sure the center and right extents are not in the btree. 364 * If the center extent was synthesized, the first delete call 365 * removes the right extent and we skip the second deletion. 366 * If center and right were in the btree, then the first delete 367 * call removes the center and the second one removes the right 368 * extent. 369 */ 370 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 371 &found_rec); 372 if (error) 373 goto out_error; 374 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 375 376 error = xfs_refcount_delete(cur, &found_rec); 377 if (error) 378 goto out_error; 379 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 380 381 if (center->rc_refcount > 1) { 382 error = xfs_refcount_delete(cur, &found_rec); 383 if (error) 384 goto out_error; 385 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 386 out_error); 387 } 388 389 /* Enlarge the left extent. */ 390 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 391 &found_rec); 392 if (error) 393 goto out_error; 394 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 395 396 left->rc_blockcount = extlen; 397 error = xfs_refcount_update(cur, left); 398 if (error) 399 goto out_error; 400 401 *aglen = 0; 402 return error; 403 404 out_error: 405 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 406 cur->bc_private.a.agno, error, _RET_IP_); 407 return error; 408 } 409 410 /* 411 * Merge with the left extent. 412 */ 413 STATIC int 414 xfs_refcount_merge_left_extent( 415 struct xfs_btree_cur *cur, 416 struct xfs_refcount_irec *left, 417 struct xfs_refcount_irec *cleft, 418 xfs_agblock_t *agbno, 419 xfs_extlen_t *aglen) 420 { 421 int error; 422 int found_rec; 423 424 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 425 cur->bc_private.a.agno, left, cleft); 426 427 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 428 if (cleft->rc_refcount > 1) { 429 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 430 &found_rec); 431 if (error) 432 goto out_error; 433 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 434 out_error); 435 436 error = xfs_refcount_delete(cur, &found_rec); 437 if (error) 438 goto out_error; 439 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 440 out_error); 441 } 442 443 /* Enlarge the left extent. */ 444 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 445 &found_rec); 446 if (error) 447 goto out_error; 448 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 449 450 left->rc_blockcount += cleft->rc_blockcount; 451 error = xfs_refcount_update(cur, left); 452 if (error) 453 goto out_error; 454 455 *agbno += cleft->rc_blockcount; 456 *aglen -= cleft->rc_blockcount; 457 return error; 458 459 out_error: 460 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 461 cur->bc_private.a.agno, error, _RET_IP_); 462 return error; 463 } 464 465 /* 466 * Merge with the right extent. 467 */ 468 STATIC int 469 xfs_refcount_merge_right_extent( 470 struct xfs_btree_cur *cur, 471 struct xfs_refcount_irec *right, 472 struct xfs_refcount_irec *cright, 473 xfs_agblock_t *agbno, 474 xfs_extlen_t *aglen) 475 { 476 int error; 477 int found_rec; 478 479 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 480 cur->bc_private.a.agno, cright, right); 481 482 /* 483 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 484 * remove it. 485 */ 486 if (cright->rc_refcount > 1) { 487 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 488 &found_rec); 489 if (error) 490 goto out_error; 491 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 492 out_error); 493 494 error = xfs_refcount_delete(cur, &found_rec); 495 if (error) 496 goto out_error; 497 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 498 out_error); 499 } 500 501 /* Enlarge the right extent. */ 502 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 503 &found_rec); 504 if (error) 505 goto out_error; 506 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 507 508 right->rc_startblock -= cright->rc_blockcount; 509 right->rc_blockcount += cright->rc_blockcount; 510 error = xfs_refcount_update(cur, right); 511 if (error) 512 goto out_error; 513 514 *aglen -= cright->rc_blockcount; 515 return error; 516 517 out_error: 518 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 519 cur->bc_private.a.agno, error, _RET_IP_); 520 return error; 521 } 522 523 #define XFS_FIND_RCEXT_SHARED 1 524 #define XFS_FIND_RCEXT_COW 2 525 /* 526 * Find the left extent and the one after it (cleft). This function assumes 527 * that we've already split any extent crossing agbno. 528 */ 529 STATIC int 530 xfs_refcount_find_left_extents( 531 struct xfs_btree_cur *cur, 532 struct xfs_refcount_irec *left, 533 struct xfs_refcount_irec *cleft, 534 xfs_agblock_t agbno, 535 xfs_extlen_t aglen, 536 int flags) 537 { 538 struct xfs_refcount_irec tmp; 539 int error; 540 int found_rec; 541 542 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 543 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 544 if (error) 545 goto out_error; 546 if (!found_rec) 547 return 0; 548 549 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 550 if (error) 551 goto out_error; 552 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 553 554 if (xfs_refc_next(&tmp) != agbno) 555 return 0; 556 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 557 return 0; 558 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 559 return 0; 560 /* We have a left extent; retrieve (or invent) the next right one */ 561 *left = tmp; 562 563 error = xfs_btree_increment(cur, 0, &found_rec); 564 if (error) 565 goto out_error; 566 if (found_rec) { 567 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 568 if (error) 569 goto out_error; 570 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 571 out_error); 572 573 /* if tmp starts at the end of our range, just use that */ 574 if (tmp.rc_startblock == agbno) 575 *cleft = tmp; 576 else { 577 /* 578 * There's a gap in the refcntbt at the start of the 579 * range we're interested in (refcount == 1) so 580 * synthesize the implied extent and pass it back. 581 * We assume here that the agbno/aglen range was 582 * passed in from a data fork extent mapping and 583 * therefore is allocated to exactly one owner. 584 */ 585 cleft->rc_startblock = agbno; 586 cleft->rc_blockcount = min(aglen, 587 tmp.rc_startblock - agbno); 588 cleft->rc_refcount = 1; 589 } 590 } else { 591 /* 592 * No extents, so pretend that there's one covering the whole 593 * range. 594 */ 595 cleft->rc_startblock = agbno; 596 cleft->rc_blockcount = aglen; 597 cleft->rc_refcount = 1; 598 } 599 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, 600 left, cleft, agbno); 601 return error; 602 603 out_error: 604 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 605 cur->bc_private.a.agno, error, _RET_IP_); 606 return error; 607 } 608 609 /* 610 * Find the right extent and the one before it (cright). This function 611 * assumes that we've already split any extents crossing agbno + aglen. 612 */ 613 STATIC int 614 xfs_refcount_find_right_extents( 615 struct xfs_btree_cur *cur, 616 struct xfs_refcount_irec *right, 617 struct xfs_refcount_irec *cright, 618 xfs_agblock_t agbno, 619 xfs_extlen_t aglen, 620 int flags) 621 { 622 struct xfs_refcount_irec tmp; 623 int error; 624 int found_rec; 625 626 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 627 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 628 if (error) 629 goto out_error; 630 if (!found_rec) 631 return 0; 632 633 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 634 if (error) 635 goto out_error; 636 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 637 638 if (tmp.rc_startblock != agbno + aglen) 639 return 0; 640 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 641 return 0; 642 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 643 return 0; 644 /* We have a right extent; retrieve (or invent) the next left one */ 645 *right = tmp; 646 647 error = xfs_btree_decrement(cur, 0, &found_rec); 648 if (error) 649 goto out_error; 650 if (found_rec) { 651 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 652 if (error) 653 goto out_error; 654 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 655 out_error); 656 657 /* if tmp ends at the end of our range, just use that */ 658 if (xfs_refc_next(&tmp) == agbno + aglen) 659 *cright = tmp; 660 else { 661 /* 662 * There's a gap in the refcntbt at the end of the 663 * range we're interested in (refcount == 1) so 664 * create the implied extent and pass it back. 665 * We assume here that the agbno/aglen range was 666 * passed in from a data fork extent mapping and 667 * therefore is allocated to exactly one owner. 668 */ 669 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 670 cright->rc_blockcount = right->rc_startblock - 671 cright->rc_startblock; 672 cright->rc_refcount = 1; 673 } 674 } else { 675 /* 676 * No extents, so pretend that there's one covering the whole 677 * range. 678 */ 679 cright->rc_startblock = agbno; 680 cright->rc_blockcount = aglen; 681 cright->rc_refcount = 1; 682 } 683 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, 684 cright, right, agbno + aglen); 685 return error; 686 687 out_error: 688 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 689 cur->bc_private.a.agno, error, _RET_IP_); 690 return error; 691 } 692 693 /* Is this extent valid? */ 694 static inline bool 695 xfs_refc_valid( 696 struct xfs_refcount_irec *rc) 697 { 698 return rc->rc_startblock != NULLAGBLOCK; 699 } 700 701 /* 702 * Try to merge with any extents on the boundaries of the adjustment range. 703 */ 704 STATIC int 705 xfs_refcount_merge_extents( 706 struct xfs_btree_cur *cur, 707 xfs_agblock_t *agbno, 708 xfs_extlen_t *aglen, 709 enum xfs_refc_adjust_op adjust, 710 int flags, 711 bool *shape_changed) 712 { 713 struct xfs_refcount_irec left = {0}, cleft = {0}; 714 struct xfs_refcount_irec cright = {0}, right = {0}; 715 int error; 716 unsigned long long ulen; 717 bool cequal; 718 719 *shape_changed = false; 720 /* 721 * Find the extent just below agbno [left], just above agbno [cleft], 722 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 723 * [right]. 724 */ 725 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 726 *aglen, flags); 727 if (error) 728 return error; 729 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 730 *aglen, flags); 731 if (error) 732 return error; 733 734 /* No left or right extent to merge; exit. */ 735 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 736 return 0; 737 738 cequal = (cleft.rc_startblock == cright.rc_startblock) && 739 (cleft.rc_blockcount == cright.rc_blockcount); 740 741 /* Try to merge left, cleft, and right. cleft must == cright. */ 742 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 743 right.rc_blockcount; 744 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 745 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 746 left.rc_refcount == cleft.rc_refcount + adjust && 747 right.rc_refcount == cleft.rc_refcount + adjust && 748 ulen < MAXREFCEXTLEN) { 749 *shape_changed = true; 750 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 751 &right, ulen, agbno, aglen); 752 } 753 754 /* Try to merge left and cleft. */ 755 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 756 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 757 left.rc_refcount == cleft.rc_refcount + adjust && 758 ulen < MAXREFCEXTLEN) { 759 *shape_changed = true; 760 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 761 agbno, aglen); 762 if (error) 763 return error; 764 765 /* 766 * If we just merged left + cleft and cleft == cright, 767 * we no longer have a cright to merge with right. We're done. 768 */ 769 if (cequal) 770 return 0; 771 } 772 773 /* Try to merge cright and right. */ 774 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 775 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 776 right.rc_refcount == cright.rc_refcount + adjust && 777 ulen < MAXREFCEXTLEN) { 778 *shape_changed = true; 779 return xfs_refcount_merge_right_extent(cur, &right, &cright, 780 agbno, aglen); 781 } 782 783 return error; 784 } 785 786 /* 787 * While we're adjusting the refcounts records of an extent, we have 788 * to keep an eye on the number of extents we're dirtying -- run too 789 * many in a single transaction and we'll exceed the transaction's 790 * reservation and crash the fs. Each record adds 12 bytes to the 791 * log (plus any key updates) so we'll conservatively assume 24 bytes 792 * per record. We must also leave space for btree splits on both ends 793 * of the range and space for the CUD and a new CUI. 794 * 795 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 796 * true incorrectly is a shutdown FS; the penalty for guessing false 797 * incorrectly is more transaction rolls than might be necessary. 798 * Be conservative here. 799 */ 800 static bool 801 xfs_refcount_still_have_space( 802 struct xfs_btree_cur *cur) 803 { 804 unsigned long overhead; 805 806 overhead = cur->bc_private.a.priv.refc.shape_changes * 807 xfs_allocfree_log_count(cur->bc_mp, 1); 808 overhead *= cur->bc_mp->m_sb.sb_blocksize; 809 810 /* 811 * Only allow 2 refcount extent updates per transaction if the 812 * refcount continue update "error" has been injected. 813 */ 814 if (cur->bc_private.a.priv.refc.nr_ops > 2 && 815 XFS_TEST_ERROR(false, cur->bc_mp, 816 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE, 817 XFS_RANDOM_REFCOUNT_CONTINUE_UPDATE)) 818 return false; 819 820 if (cur->bc_private.a.priv.refc.nr_ops == 0) 821 return true; 822 else if (overhead > cur->bc_tp->t_log_res) 823 return false; 824 return cur->bc_tp->t_log_res - overhead > 825 cur->bc_private.a.priv.refc.nr_ops * 32; 826 } 827 828 /* 829 * Adjust the refcounts of middle extents. At this point we should have 830 * split extents that crossed the adjustment range; merged with adjacent 831 * extents; and updated agbno/aglen to reflect the merges. Therefore, 832 * all we have to do is update the extents inside [agbno, agbno + aglen]. 833 */ 834 STATIC int 835 xfs_refcount_adjust_extents( 836 struct xfs_btree_cur *cur, 837 xfs_agblock_t *agbno, 838 xfs_extlen_t *aglen, 839 enum xfs_refc_adjust_op adj, 840 struct xfs_defer_ops *dfops, 841 struct xfs_owner_info *oinfo) 842 { 843 struct xfs_refcount_irec ext, tmp; 844 int error; 845 int found_rec, found_tmp; 846 xfs_fsblock_t fsbno; 847 848 /* Merging did all the work already. */ 849 if (*aglen == 0) 850 return 0; 851 852 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 853 if (error) 854 goto out_error; 855 856 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 857 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 858 if (error) 859 goto out_error; 860 if (!found_rec) { 861 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 862 ext.rc_blockcount = 0; 863 ext.rc_refcount = 0; 864 } 865 866 /* 867 * Deal with a hole in the refcount tree; if a file maps to 868 * these blocks and there's no refcountbt record, pretend that 869 * there is one with refcount == 1. 870 */ 871 if (ext.rc_startblock != *agbno) { 872 tmp.rc_startblock = *agbno; 873 tmp.rc_blockcount = min(*aglen, 874 ext.rc_startblock - *agbno); 875 tmp.rc_refcount = 1 + adj; 876 trace_xfs_refcount_modify_extent(cur->bc_mp, 877 cur->bc_private.a.agno, &tmp); 878 879 /* 880 * Either cover the hole (increment) or 881 * delete the range (decrement). 882 */ 883 if (tmp.rc_refcount) { 884 error = xfs_refcount_insert(cur, &tmp, 885 &found_tmp); 886 if (error) 887 goto out_error; 888 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 889 found_tmp == 1, out_error); 890 cur->bc_private.a.priv.refc.nr_ops++; 891 } else { 892 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 893 cur->bc_private.a.agno, 894 tmp.rc_startblock); 895 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 896 tmp.rc_blockcount, oinfo); 897 } 898 899 (*agbno) += tmp.rc_blockcount; 900 (*aglen) -= tmp.rc_blockcount; 901 902 error = xfs_refcount_lookup_ge(cur, *agbno, 903 &found_rec); 904 if (error) 905 goto out_error; 906 } 907 908 /* Stop if there's nothing left to modify */ 909 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 910 break; 911 912 /* 913 * Adjust the reference count and either update the tree 914 * (incr) or free the blocks (decr). 915 */ 916 if (ext.rc_refcount == MAXREFCOUNT) 917 goto skip; 918 ext.rc_refcount += adj; 919 trace_xfs_refcount_modify_extent(cur->bc_mp, 920 cur->bc_private.a.agno, &ext); 921 if (ext.rc_refcount > 1) { 922 error = xfs_refcount_update(cur, &ext); 923 if (error) 924 goto out_error; 925 cur->bc_private.a.priv.refc.nr_ops++; 926 } else if (ext.rc_refcount == 1) { 927 error = xfs_refcount_delete(cur, &found_rec); 928 if (error) 929 goto out_error; 930 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 931 found_rec == 1, out_error); 932 cur->bc_private.a.priv.refc.nr_ops++; 933 goto advloop; 934 } else { 935 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 936 cur->bc_private.a.agno, 937 ext.rc_startblock); 938 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 939 ext.rc_blockcount, oinfo); 940 } 941 942 skip: 943 error = xfs_btree_increment(cur, 0, &found_rec); 944 if (error) 945 goto out_error; 946 947 advloop: 948 (*agbno) += ext.rc_blockcount; 949 (*aglen) -= ext.rc_blockcount; 950 } 951 952 return error; 953 out_error: 954 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 955 cur->bc_private.a.agno, error, _RET_IP_); 956 return error; 957 } 958 959 /* Adjust the reference count of a range of AG blocks. */ 960 STATIC int 961 xfs_refcount_adjust( 962 struct xfs_btree_cur *cur, 963 xfs_agblock_t agbno, 964 xfs_extlen_t aglen, 965 xfs_agblock_t *new_agbno, 966 xfs_extlen_t *new_aglen, 967 enum xfs_refc_adjust_op adj, 968 struct xfs_defer_ops *dfops, 969 struct xfs_owner_info *oinfo) 970 { 971 bool shape_changed; 972 int shape_changes = 0; 973 int error; 974 975 *new_agbno = agbno; 976 *new_aglen = aglen; 977 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 978 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, 979 agbno, aglen); 980 else 981 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, 982 agbno, aglen); 983 984 /* 985 * Ensure that no rcextents cross the boundary of the adjustment range. 986 */ 987 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 988 if (error) 989 goto out_error; 990 if (shape_changed) 991 shape_changes++; 992 993 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 994 if (error) 995 goto out_error; 996 if (shape_changed) 997 shape_changes++; 998 999 /* 1000 * Try to merge with the left or right extents of the range. 1001 */ 1002 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 1003 XFS_FIND_RCEXT_SHARED, &shape_changed); 1004 if (error) 1005 goto out_error; 1006 if (shape_changed) 1007 shape_changes++; 1008 if (shape_changes) 1009 cur->bc_private.a.priv.refc.shape_changes++; 1010 1011 /* Now that we've taken care of the ends, adjust the middle extents */ 1012 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1013 adj, dfops, oinfo); 1014 if (error) 1015 goto out_error; 1016 1017 return 0; 1018 1019 out_error: 1020 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, 1021 error, _RET_IP_); 1022 return error; 1023 } 1024 1025 /* Clean up after calling xfs_refcount_finish_one. */ 1026 void 1027 xfs_refcount_finish_one_cleanup( 1028 struct xfs_trans *tp, 1029 struct xfs_btree_cur *rcur, 1030 int error) 1031 { 1032 struct xfs_buf *agbp; 1033 1034 if (rcur == NULL) 1035 return; 1036 agbp = rcur->bc_private.a.agbp; 1037 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 1038 if (error) 1039 xfs_trans_brelse(tp, agbp); 1040 } 1041 1042 /* 1043 * Process one of the deferred refcount operations. We pass back the 1044 * btree cursor to maintain our lock on the btree between calls. 1045 * This saves time and eliminates a buffer deadlock between the 1046 * superblock and the AGF because we'll always grab them in the same 1047 * order. 1048 */ 1049 int 1050 xfs_refcount_finish_one( 1051 struct xfs_trans *tp, 1052 struct xfs_defer_ops *dfops, 1053 enum xfs_refcount_intent_type type, 1054 xfs_fsblock_t startblock, 1055 xfs_extlen_t blockcount, 1056 xfs_fsblock_t *new_fsb, 1057 xfs_extlen_t *new_len, 1058 struct xfs_btree_cur **pcur) 1059 { 1060 struct xfs_mount *mp = tp->t_mountp; 1061 struct xfs_btree_cur *rcur; 1062 struct xfs_buf *agbp = NULL; 1063 int error = 0; 1064 xfs_agnumber_t agno; 1065 xfs_agblock_t bno; 1066 xfs_agblock_t new_agbno; 1067 unsigned long nr_ops = 0; 1068 int shape_changes = 0; 1069 1070 agno = XFS_FSB_TO_AGNO(mp, startblock); 1071 ASSERT(agno != NULLAGNUMBER); 1072 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1073 1074 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1075 type, XFS_FSB_TO_AGBNO(mp, startblock), 1076 blockcount); 1077 1078 if (XFS_TEST_ERROR(false, mp, 1079 XFS_ERRTAG_REFCOUNT_FINISH_ONE, 1080 XFS_RANDOM_REFCOUNT_FINISH_ONE)) 1081 return -EIO; 1082 1083 /* 1084 * If we haven't gotten a cursor or the cursor AG doesn't match 1085 * the startblock, get one now. 1086 */ 1087 rcur = *pcur; 1088 if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1089 nr_ops = rcur->bc_private.a.priv.refc.nr_ops; 1090 shape_changes = rcur->bc_private.a.priv.refc.shape_changes; 1091 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1092 rcur = NULL; 1093 *pcur = NULL; 1094 } 1095 if (rcur == NULL) { 1096 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1097 XFS_ALLOC_FLAG_FREEING, &agbp); 1098 if (error) 1099 return error; 1100 if (!agbp) 1101 return -EFSCORRUPTED; 1102 1103 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops); 1104 if (!rcur) { 1105 error = -ENOMEM; 1106 goto out_cur; 1107 } 1108 rcur->bc_private.a.priv.refc.nr_ops = nr_ops; 1109 rcur->bc_private.a.priv.refc.shape_changes = shape_changes; 1110 } 1111 *pcur = rcur; 1112 1113 switch (type) { 1114 case XFS_REFCOUNT_INCREASE: 1115 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1116 new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL); 1117 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1118 break; 1119 case XFS_REFCOUNT_DECREASE: 1120 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1121 new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL); 1122 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1123 break; 1124 case XFS_REFCOUNT_ALLOC_COW: 1125 *new_fsb = startblock + blockcount; 1126 *new_len = 0; 1127 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops); 1128 break; 1129 case XFS_REFCOUNT_FREE_COW: 1130 *new_fsb = startblock + blockcount; 1131 *new_len = 0; 1132 error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops); 1133 break; 1134 default: 1135 ASSERT(0); 1136 error = -EFSCORRUPTED; 1137 } 1138 if (!error && *new_len > 0) 1139 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1140 bno, blockcount, new_agbno, *new_len); 1141 return error; 1142 1143 out_cur: 1144 xfs_trans_brelse(tp, agbp); 1145 1146 return error; 1147 } 1148 1149 /* 1150 * Record a refcount intent for later processing. 1151 */ 1152 static int 1153 __xfs_refcount_add( 1154 struct xfs_mount *mp, 1155 struct xfs_defer_ops *dfops, 1156 enum xfs_refcount_intent_type type, 1157 xfs_fsblock_t startblock, 1158 xfs_extlen_t blockcount) 1159 { 1160 struct xfs_refcount_intent *ri; 1161 1162 trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock), 1163 type, XFS_FSB_TO_AGBNO(mp, startblock), 1164 blockcount); 1165 1166 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1167 KM_SLEEP | KM_NOFS); 1168 INIT_LIST_HEAD(&ri->ri_list); 1169 ri->ri_type = type; 1170 ri->ri_startblock = startblock; 1171 ri->ri_blockcount = blockcount; 1172 1173 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1174 return 0; 1175 } 1176 1177 /* 1178 * Increase the reference count of the blocks backing a file's extent. 1179 */ 1180 int 1181 xfs_refcount_increase_extent( 1182 struct xfs_mount *mp, 1183 struct xfs_defer_ops *dfops, 1184 struct xfs_bmbt_irec *PREV) 1185 { 1186 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1187 return 0; 1188 1189 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE, 1190 PREV->br_startblock, PREV->br_blockcount); 1191 } 1192 1193 /* 1194 * Decrease the reference count of the blocks backing a file's extent. 1195 */ 1196 int 1197 xfs_refcount_decrease_extent( 1198 struct xfs_mount *mp, 1199 struct xfs_defer_ops *dfops, 1200 struct xfs_bmbt_irec *PREV) 1201 { 1202 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1203 return 0; 1204 1205 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE, 1206 PREV->br_startblock, PREV->br_blockcount); 1207 } 1208 1209 /* 1210 * Given an AG extent, find the lowest-numbered run of shared blocks 1211 * within that range and return the range in fbno/flen. If 1212 * find_end_of_shared is set, return the longest contiguous extent of 1213 * shared blocks; if not, just return the first extent we find. If no 1214 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1215 * and 0, respectively. 1216 */ 1217 int 1218 xfs_refcount_find_shared( 1219 struct xfs_btree_cur *cur, 1220 xfs_agblock_t agbno, 1221 xfs_extlen_t aglen, 1222 xfs_agblock_t *fbno, 1223 xfs_extlen_t *flen, 1224 bool find_end_of_shared) 1225 { 1226 struct xfs_refcount_irec tmp; 1227 int i; 1228 int have; 1229 int error; 1230 1231 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, 1232 agbno, aglen); 1233 1234 /* By default, skip the whole range */ 1235 *fbno = NULLAGBLOCK; 1236 *flen = 0; 1237 1238 /* Try to find a refcount extent that crosses the start */ 1239 error = xfs_refcount_lookup_le(cur, agbno, &have); 1240 if (error) 1241 goto out_error; 1242 if (!have) { 1243 /* No left extent, look at the next one */ 1244 error = xfs_btree_increment(cur, 0, &have); 1245 if (error) 1246 goto out_error; 1247 if (!have) 1248 goto done; 1249 } 1250 error = xfs_refcount_get_rec(cur, &tmp, &i); 1251 if (error) 1252 goto out_error; 1253 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1254 1255 /* If the extent ends before the start, look at the next one */ 1256 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1257 error = xfs_btree_increment(cur, 0, &have); 1258 if (error) 1259 goto out_error; 1260 if (!have) 1261 goto done; 1262 error = xfs_refcount_get_rec(cur, &tmp, &i); 1263 if (error) 1264 goto out_error; 1265 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1266 } 1267 1268 /* If the extent starts after the range we want, bail out */ 1269 if (tmp.rc_startblock >= agbno + aglen) 1270 goto done; 1271 1272 /* We found the start of a shared extent! */ 1273 if (tmp.rc_startblock < agbno) { 1274 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1275 tmp.rc_startblock = agbno; 1276 } 1277 1278 *fbno = tmp.rc_startblock; 1279 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1280 if (!find_end_of_shared) 1281 goto done; 1282 1283 /* Otherwise, find the end of this shared extent */ 1284 while (*fbno + *flen < agbno + aglen) { 1285 error = xfs_btree_increment(cur, 0, &have); 1286 if (error) 1287 goto out_error; 1288 if (!have) 1289 break; 1290 error = xfs_refcount_get_rec(cur, &tmp, &i); 1291 if (error) 1292 goto out_error; 1293 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1294 if (tmp.rc_startblock >= agbno + aglen || 1295 tmp.rc_startblock != *fbno + *flen) 1296 break; 1297 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1298 } 1299 1300 done: 1301 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1302 cur->bc_private.a.agno, *fbno, *flen); 1303 1304 out_error: 1305 if (error) 1306 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1307 cur->bc_private.a.agno, error, _RET_IP_); 1308 return error; 1309 } 1310 1311 /* 1312 * Recovering CoW Blocks After a Crash 1313 * 1314 * Due to the way that the copy on write mechanism works, there's a window of 1315 * opportunity in which we can lose track of allocated blocks during a crash. 1316 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1317 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1318 * no longer in the free space btree but are not otherwise recorded anywhere 1319 * until the write completes and the blocks are mapped into the file. A crash 1320 * in between allocation and remapping results in the replacement blocks being 1321 * lost. This situation is exacerbated by the CoW extent size hint because 1322 * allocations can hang around for long time. 1323 * 1324 * However, there is a place where we can record these allocations before they 1325 * become mappings -- the reference count btree. The btree does not record 1326 * extents with refcount == 1, so we can record allocations with a refcount of 1327 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1328 * no conflict with shared block records. These mappings should be created 1329 * when we allocate blocks to the CoW fork and deleted when they're removed 1330 * from the CoW fork. 1331 * 1332 * Minor nit: records for in-progress CoW allocations and records for shared 1333 * extents must never be merged, to preserve the property that (except for CoW 1334 * allocations) there are no refcount btree entries with refcount == 1. The 1335 * only time this could potentially happen is when unsharing a block that's 1336 * adjacent to CoW allocations, so we must be careful to avoid this. 1337 * 1338 * At mount time we recover lost CoW allocations by searching the refcount 1339 * btree for these refcount == 1 mappings. These represent CoW allocations 1340 * that were in progress at the time the filesystem went down, so we can free 1341 * them to get the space back. 1342 * 1343 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1344 * several reasons -- first, EFIs pin the tail of the log and would have to be 1345 * periodically relogged to avoid filling up the log. Second, CoW completions 1346 * will have to file an EFD and create new EFIs for whatever remains in the 1347 * CoW fork; this partially takes care of (1) but extent-size reservations 1348 * will have to periodically relog even if there's no writeout in progress. 1349 * This can happen if the CoW extent size hint is set, which you really want. 1350 * Third, EFIs cannot currently be automatically relogged into newer 1351 * transactions to advance the log tail. Fourth, stuffing the log full of 1352 * EFIs places an upper bound on the number of CoW allocations that can be 1353 * held filesystem-wide at any given time. Recording them in the refcount 1354 * btree doesn't require us to maintain any state in memory and doesn't pin 1355 * the log. 1356 */ 1357 /* 1358 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1359 * in that they're not referenced anywhere else in the filesystem, so we 1360 * stash them in the refcount btree with a refcount of 1 until either file 1361 * remapping (or CoW cancellation) happens. 1362 */ 1363 STATIC int 1364 xfs_refcount_adjust_cow_extents( 1365 struct xfs_btree_cur *cur, 1366 xfs_agblock_t agbno, 1367 xfs_extlen_t aglen, 1368 enum xfs_refc_adjust_op adj, 1369 struct xfs_defer_ops *dfops, 1370 struct xfs_owner_info *oinfo) 1371 { 1372 struct xfs_refcount_irec ext, tmp; 1373 int error; 1374 int found_rec, found_tmp; 1375 1376 if (aglen == 0) 1377 return 0; 1378 1379 /* Find any overlapping refcount records */ 1380 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1381 if (error) 1382 goto out_error; 1383 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1384 if (error) 1385 goto out_error; 1386 if (!found_rec) { 1387 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1388 XFS_REFC_COW_START; 1389 ext.rc_blockcount = 0; 1390 ext.rc_refcount = 0; 1391 } 1392 1393 switch (adj) { 1394 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1395 /* Adding a CoW reservation, there should be nothing here. */ 1396 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1397 ext.rc_startblock >= agbno + aglen, out_error); 1398 1399 tmp.rc_startblock = agbno; 1400 tmp.rc_blockcount = aglen; 1401 tmp.rc_refcount = 1; 1402 trace_xfs_refcount_modify_extent(cur->bc_mp, 1403 cur->bc_private.a.agno, &tmp); 1404 1405 error = xfs_refcount_insert(cur, &tmp, 1406 &found_tmp); 1407 if (error) 1408 goto out_error; 1409 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1410 found_tmp == 1, out_error); 1411 break; 1412 case XFS_REFCOUNT_ADJUST_COW_FREE: 1413 /* Removing a CoW reservation, there should be one extent. */ 1414 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1415 ext.rc_startblock == agbno, out_error); 1416 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1417 ext.rc_blockcount == aglen, out_error); 1418 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1419 ext.rc_refcount == 1, out_error); 1420 1421 ext.rc_refcount = 0; 1422 trace_xfs_refcount_modify_extent(cur->bc_mp, 1423 cur->bc_private.a.agno, &ext); 1424 error = xfs_refcount_delete(cur, &found_rec); 1425 if (error) 1426 goto out_error; 1427 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1428 found_rec == 1, out_error); 1429 break; 1430 default: 1431 ASSERT(0); 1432 } 1433 1434 return error; 1435 out_error: 1436 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1437 cur->bc_private.a.agno, error, _RET_IP_); 1438 return error; 1439 } 1440 1441 /* 1442 * Add or remove refcount btree entries for CoW reservations. 1443 */ 1444 STATIC int 1445 xfs_refcount_adjust_cow( 1446 struct xfs_btree_cur *cur, 1447 xfs_agblock_t agbno, 1448 xfs_extlen_t aglen, 1449 enum xfs_refc_adjust_op adj, 1450 struct xfs_defer_ops *dfops) 1451 { 1452 bool shape_changed; 1453 int error; 1454 1455 agbno += XFS_REFC_COW_START; 1456 1457 /* 1458 * Ensure that no rcextents cross the boundary of the adjustment range. 1459 */ 1460 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1461 if (error) 1462 goto out_error; 1463 1464 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1465 if (error) 1466 goto out_error; 1467 1468 /* 1469 * Try to merge with the left or right extents of the range. 1470 */ 1471 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1472 XFS_FIND_RCEXT_COW, &shape_changed); 1473 if (error) 1474 goto out_error; 1475 1476 /* Now that we've taken care of the ends, adjust the middle extents */ 1477 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj, 1478 dfops, NULL); 1479 if (error) 1480 goto out_error; 1481 1482 return 0; 1483 1484 out_error: 1485 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, 1486 error, _RET_IP_); 1487 return error; 1488 } 1489 1490 /* 1491 * Record a CoW allocation in the refcount btree. 1492 */ 1493 STATIC int 1494 __xfs_refcount_cow_alloc( 1495 struct xfs_btree_cur *rcur, 1496 xfs_agblock_t agbno, 1497 xfs_extlen_t aglen, 1498 struct xfs_defer_ops *dfops) 1499 { 1500 int error; 1501 1502 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1503 agbno, aglen); 1504 1505 /* Add refcount btree reservation */ 1506 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1507 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1508 if (error) 1509 return error; 1510 1511 /* Add rmap entry */ 1512 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { 1513 error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, 1514 rcur->bc_private.a.agno, 1515 agbno, aglen, XFS_RMAP_OWN_COW); 1516 if (error) 1517 return error; 1518 } 1519 1520 return error; 1521 } 1522 1523 /* 1524 * Remove a CoW allocation from the refcount btree. 1525 */ 1526 STATIC int 1527 __xfs_refcount_cow_free( 1528 struct xfs_btree_cur *rcur, 1529 xfs_agblock_t agbno, 1530 xfs_extlen_t aglen, 1531 struct xfs_defer_ops *dfops) 1532 { 1533 int error; 1534 1535 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1536 agbno, aglen); 1537 1538 /* Remove refcount btree reservation */ 1539 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1540 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1541 if (error) 1542 return error; 1543 1544 /* Remove rmap entry */ 1545 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { 1546 error = xfs_rmap_free_extent(rcur->bc_mp, dfops, 1547 rcur->bc_private.a.agno, 1548 agbno, aglen, XFS_RMAP_OWN_COW); 1549 if (error) 1550 return error; 1551 } 1552 1553 return error; 1554 } 1555 1556 /* Record a CoW staging extent in the refcount btree. */ 1557 int 1558 xfs_refcount_alloc_cow_extent( 1559 struct xfs_mount *mp, 1560 struct xfs_defer_ops *dfops, 1561 xfs_fsblock_t fsb, 1562 xfs_extlen_t len) 1563 { 1564 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1565 return 0; 1566 1567 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1568 fsb, len); 1569 } 1570 1571 /* Forget a CoW staging event in the refcount btree. */ 1572 int 1573 xfs_refcount_free_cow_extent( 1574 struct xfs_mount *mp, 1575 struct xfs_defer_ops *dfops, 1576 xfs_fsblock_t fsb, 1577 xfs_extlen_t len) 1578 { 1579 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1580 return 0; 1581 1582 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1583 fsb, len); 1584 } 1585 1586 struct xfs_refcount_recovery { 1587 struct list_head rr_list; 1588 struct xfs_refcount_irec rr_rrec; 1589 }; 1590 1591 /* Stuff an extent on the recovery list. */ 1592 STATIC int 1593 xfs_refcount_recover_extent( 1594 struct xfs_btree_cur *cur, 1595 union xfs_btree_rec *rec, 1596 void *priv) 1597 { 1598 struct list_head *debris = priv; 1599 struct xfs_refcount_recovery *rr; 1600 1601 if (be32_to_cpu(rec->refc.rc_refcount) != 1) 1602 return -EFSCORRUPTED; 1603 1604 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); 1605 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1606 list_add_tail(&rr->rr_list, debris); 1607 1608 return 0; 1609 } 1610 1611 /* Find and remove leftover CoW reservations. */ 1612 int 1613 xfs_refcount_recover_cow_leftovers( 1614 struct xfs_mount *mp, 1615 xfs_agnumber_t agno) 1616 { 1617 struct xfs_trans *tp; 1618 struct xfs_btree_cur *cur; 1619 struct xfs_buf *agbp; 1620 struct xfs_refcount_recovery *rr, *n; 1621 struct list_head debris; 1622 union xfs_btree_irec low; 1623 union xfs_btree_irec high; 1624 struct xfs_defer_ops dfops; 1625 xfs_fsblock_t fsb; 1626 xfs_agblock_t agbno; 1627 int error; 1628 1629 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1630 return -EOPNOTSUPP; 1631 1632 error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); 1633 if (error) 1634 return error; 1635 cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL); 1636 1637 /* Find all the leftover CoW staging extents. */ 1638 INIT_LIST_HEAD(&debris); 1639 memset(&low, 0, sizeof(low)); 1640 memset(&high, 0, sizeof(high)); 1641 low.rc.rc_startblock = XFS_REFC_COW_START; 1642 high.rc.rc_startblock = -1U; 1643 error = xfs_btree_query_range(cur, &low, &high, 1644 xfs_refcount_recover_extent, &debris); 1645 if (error) 1646 goto out_cursor; 1647 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1648 xfs_buf_relse(agbp); 1649 1650 /* Now iterate the list to free the leftovers */ 1651 list_for_each_entry(rr, &debris, rr_list) { 1652 /* Set up transaction. */ 1653 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1654 if (error) 1655 goto out_free; 1656 1657 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1658 1659 /* Free the orphan record */ 1660 xfs_defer_init(&dfops, &fsb); 1661 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1662 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1663 error = xfs_refcount_free_cow_extent(mp, &dfops, fsb, 1664 rr->rr_rrec.rc_blockcount); 1665 if (error) 1666 goto out_defer; 1667 1668 /* Free the block. */ 1669 xfs_bmap_add_free(mp, &dfops, fsb, 1670 rr->rr_rrec.rc_blockcount, NULL); 1671 1672 error = xfs_defer_finish(&tp, &dfops, NULL); 1673 if (error) 1674 goto out_defer; 1675 1676 error = xfs_trans_commit(tp); 1677 if (error) 1678 goto out_free; 1679 } 1680 1681 out_free: 1682 /* Free the leftover list */ 1683 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1684 list_del(&rr->rr_list); 1685 kmem_free(rr); 1686 } 1687 return error; 1688 1689 out_cursor: 1690 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1691 xfs_buf_relse(agbp); 1692 goto out_free; 1693 1694 out_defer: 1695 xfs_defer_cancel(&dfops); 1696 xfs_trans_cancel(tp); 1697 goto out_free; 1698 } 1699