1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_log_format.h" 25 #include "xfs_trans_resv.h" 26 #include "xfs_sb.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_btree.h" 30 #include "xfs_bmap.h" 31 #include "xfs_refcount_btree.h" 32 #include "xfs_alloc.h" 33 #include "xfs_errortag.h" 34 #include "xfs_error.h" 35 #include "xfs_trace.h" 36 #include "xfs_cksum.h" 37 #include "xfs_trans.h" 38 #include "xfs_bit.h" 39 #include "xfs_refcount.h" 40 #include "xfs_rmap.h" 41 42 /* Allowable refcount adjustment amounts. */ 43 enum xfs_refc_adjust_op { 44 XFS_REFCOUNT_ADJUST_INCREASE = 1, 45 XFS_REFCOUNT_ADJUST_DECREASE = -1, 46 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 47 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 48 }; 49 50 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 51 xfs_agblock_t agbno, xfs_extlen_t aglen, 52 struct xfs_defer_ops *dfops); 53 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 54 xfs_agblock_t agbno, xfs_extlen_t aglen, 55 struct xfs_defer_ops *dfops); 56 57 /* 58 * Look up the first record less than or equal to [bno, len] in the btree 59 * given by cur. 60 */ 61 int 62 xfs_refcount_lookup_le( 63 struct xfs_btree_cur *cur, 64 xfs_agblock_t bno, 65 int *stat) 66 { 67 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 68 XFS_LOOKUP_LE); 69 cur->bc_rec.rc.rc_startblock = bno; 70 cur->bc_rec.rc.rc_blockcount = 0; 71 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 72 } 73 74 /* 75 * Look up the first record greater than or equal to [bno, len] in the btree 76 * given by cur. 77 */ 78 int 79 xfs_refcount_lookup_ge( 80 struct xfs_btree_cur *cur, 81 xfs_agblock_t bno, 82 int *stat) 83 { 84 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 85 XFS_LOOKUP_GE); 86 cur->bc_rec.rc.rc_startblock = bno; 87 cur->bc_rec.rc.rc_blockcount = 0; 88 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 89 } 90 91 /* Convert on-disk record to in-core format. */ 92 static inline void 93 xfs_refcount_btrec_to_irec( 94 union xfs_btree_rec *rec, 95 struct xfs_refcount_irec *irec) 96 { 97 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 98 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 99 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 100 } 101 102 /* 103 * Get the data from the pointed-to record. 104 */ 105 int 106 xfs_refcount_get_rec( 107 struct xfs_btree_cur *cur, 108 struct xfs_refcount_irec *irec, 109 int *stat) 110 { 111 union xfs_btree_rec *rec; 112 int error; 113 114 error = xfs_btree_get_rec(cur, &rec, stat); 115 if (!error && *stat == 1) { 116 xfs_refcount_btrec_to_irec(rec, irec); 117 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, 118 irec); 119 } 120 return error; 121 } 122 123 /* 124 * Update the record referred to by cur to the value given 125 * by [bno, len, refcount]. 126 * This either works (return 0) or gets an EFSCORRUPTED error. 127 */ 128 STATIC int 129 xfs_refcount_update( 130 struct xfs_btree_cur *cur, 131 struct xfs_refcount_irec *irec) 132 { 133 union xfs_btree_rec rec; 134 int error; 135 136 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); 137 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 138 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 139 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 140 error = xfs_btree_update(cur, &rec); 141 if (error) 142 trace_xfs_refcount_update_error(cur->bc_mp, 143 cur->bc_private.a.agno, error, _RET_IP_); 144 return error; 145 } 146 147 /* 148 * Insert the record referred to by cur to the value given 149 * by [bno, len, refcount]. 150 * This either works (return 0) or gets an EFSCORRUPTED error. 151 */ 152 STATIC int 153 xfs_refcount_insert( 154 struct xfs_btree_cur *cur, 155 struct xfs_refcount_irec *irec, 156 int *i) 157 { 158 int error; 159 160 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); 161 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 162 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 163 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 164 error = xfs_btree_insert(cur, i); 165 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 166 out_error: 167 if (error) 168 trace_xfs_refcount_insert_error(cur->bc_mp, 169 cur->bc_private.a.agno, error, _RET_IP_); 170 return error; 171 } 172 173 /* 174 * Remove the record referred to by cur, then set the pointer to the spot 175 * where the record could be re-inserted, in case we want to increment or 176 * decrement the cursor. 177 * This either works (return 0) or gets an EFSCORRUPTED error. 178 */ 179 STATIC int 180 xfs_refcount_delete( 181 struct xfs_btree_cur *cur, 182 int *i) 183 { 184 struct xfs_refcount_irec irec; 185 int found_rec; 186 int error; 187 188 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 189 if (error) 190 goto out_error; 191 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 192 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); 193 error = xfs_btree_delete(cur, i); 194 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 195 if (error) 196 goto out_error; 197 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 198 out_error: 199 if (error) 200 trace_xfs_refcount_delete_error(cur->bc_mp, 201 cur->bc_private.a.agno, error, _RET_IP_); 202 return error; 203 } 204 205 /* 206 * Adjusting the Reference Count 207 * 208 * As stated elsewhere, the reference count btree (refcbt) stores 209 * >1 reference counts for extents of physical blocks. In this 210 * operation, we're either raising or lowering the reference count of 211 * some subrange stored in the tree: 212 * 213 * <------ adjustment range ------> 214 * ----+ +---+-----+ +--+--------+--------- 215 * 2 | | 3 | 4 | |17| 55 | 10 216 * ----+ +---+-----+ +--+--------+--------- 217 * X axis is physical blocks number; 218 * reference counts are the numbers inside the rectangles 219 * 220 * The first thing we need to do is to ensure that there are no 221 * refcount extents crossing either boundary of the range to be 222 * adjusted. For any extent that does cross a boundary, split it into 223 * two extents so that we can increment the refcount of one of the 224 * pieces later: 225 * 226 * <------ adjustment range ------> 227 * ----+ +---+-----+ +--+--------+----+---- 228 * 2 | | 3 | 2 | |17| 55 | 10 | 10 229 * ----+ +---+-----+ +--+--------+----+---- 230 * 231 * For this next step, let's assume that all the physical blocks in 232 * the adjustment range are mapped to a file and are therefore in use 233 * at least once. Therefore, we can infer that any gap in the 234 * refcount tree within the adjustment range represents a physical 235 * extent with refcount == 1: 236 * 237 * <------ adjustment range ------> 238 * ----+---+---+-----+-+--+--------+----+---- 239 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 240 * ----+---+---+-----+-+--+--------+----+---- 241 * ^ 242 * 243 * For each extent that falls within the interval range, figure out 244 * which extent is to the left or the right of that extent. Now we 245 * have a left, current, and right extent. If the new reference count 246 * of the center extent enables us to merge left, center, and right 247 * into one record covering all three, do so. If the center extent is 248 * at the left end of the range, abuts the left extent, and its new 249 * reference count matches the left extent's record, then merge them. 250 * If the center extent is at the right end of the range, abuts the 251 * right extent, and the reference counts match, merge those. In the 252 * example, we can left merge (assuming an increment operation): 253 * 254 * <------ adjustment range ------> 255 * --------+---+-----+-+--+--------+----+---- 256 * 2 | 3 | 2 |1|17| 55 | 10 | 10 257 * --------+---+-----+-+--+--------+----+---- 258 * ^ 259 * 260 * For all other extents within the range, adjust the reference count 261 * or delete it if the refcount falls below 2. If we were 262 * incrementing, the end result looks like this: 263 * 264 * <------ adjustment range ------> 265 * --------+---+-----+-+--+--------+----+---- 266 * 2 | 4 | 3 |2|18| 56 | 11 | 10 267 * --------+---+-----+-+--+--------+----+---- 268 * 269 * The result of a decrement operation looks as such: 270 * 271 * <------ adjustment range ------> 272 * ----+ +---+ +--+--------+----+---- 273 * 2 | | 2 | |16| 54 | 9 | 10 274 * ----+ +---+ +--+--------+----+---- 275 * DDDD 111111DD 276 * 277 * The blocks marked "D" are freed; the blocks marked "1" are only 278 * referenced once and therefore the record is removed from the 279 * refcount btree. 280 */ 281 282 /* Next block after this extent. */ 283 static inline xfs_agblock_t 284 xfs_refc_next( 285 struct xfs_refcount_irec *rc) 286 { 287 return rc->rc_startblock + rc->rc_blockcount; 288 } 289 290 /* 291 * Split a refcount extent that crosses agbno. 292 */ 293 STATIC int 294 xfs_refcount_split_extent( 295 struct xfs_btree_cur *cur, 296 xfs_agblock_t agbno, 297 bool *shape_changed) 298 { 299 struct xfs_refcount_irec rcext, tmp; 300 int found_rec; 301 int error; 302 303 *shape_changed = false; 304 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 305 if (error) 306 goto out_error; 307 if (!found_rec) 308 return 0; 309 310 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 311 if (error) 312 goto out_error; 313 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 314 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 315 return 0; 316 317 *shape_changed = true; 318 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, 319 &rcext, agbno); 320 321 /* Establish the right extent. */ 322 tmp = rcext; 323 tmp.rc_startblock = agbno; 324 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 325 error = xfs_refcount_update(cur, &tmp); 326 if (error) 327 goto out_error; 328 329 /* Insert the left extent. */ 330 tmp = rcext; 331 tmp.rc_blockcount = agbno - rcext.rc_startblock; 332 error = xfs_refcount_insert(cur, &tmp, &found_rec); 333 if (error) 334 goto out_error; 335 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 336 return error; 337 338 out_error: 339 trace_xfs_refcount_split_extent_error(cur->bc_mp, 340 cur->bc_private.a.agno, error, _RET_IP_); 341 return error; 342 } 343 344 /* 345 * Merge the left, center, and right extents. 346 */ 347 STATIC int 348 xfs_refcount_merge_center_extents( 349 struct xfs_btree_cur *cur, 350 struct xfs_refcount_irec *left, 351 struct xfs_refcount_irec *center, 352 struct xfs_refcount_irec *right, 353 unsigned long long extlen, 354 xfs_agblock_t *agbno, 355 xfs_extlen_t *aglen) 356 { 357 int error; 358 int found_rec; 359 360 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 361 cur->bc_private.a.agno, left, center, right); 362 363 /* 364 * Make sure the center and right extents are not in the btree. 365 * If the center extent was synthesized, the first delete call 366 * removes the right extent and we skip the second deletion. 367 * If center and right were in the btree, then the first delete 368 * call removes the center and the second one removes the right 369 * extent. 370 */ 371 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 372 &found_rec); 373 if (error) 374 goto out_error; 375 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 376 377 error = xfs_refcount_delete(cur, &found_rec); 378 if (error) 379 goto out_error; 380 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 381 382 if (center->rc_refcount > 1) { 383 error = xfs_refcount_delete(cur, &found_rec); 384 if (error) 385 goto out_error; 386 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 387 out_error); 388 } 389 390 /* Enlarge the left extent. */ 391 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 392 &found_rec); 393 if (error) 394 goto out_error; 395 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 396 397 left->rc_blockcount = extlen; 398 error = xfs_refcount_update(cur, left); 399 if (error) 400 goto out_error; 401 402 *aglen = 0; 403 return error; 404 405 out_error: 406 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 407 cur->bc_private.a.agno, error, _RET_IP_); 408 return error; 409 } 410 411 /* 412 * Merge with the left extent. 413 */ 414 STATIC int 415 xfs_refcount_merge_left_extent( 416 struct xfs_btree_cur *cur, 417 struct xfs_refcount_irec *left, 418 struct xfs_refcount_irec *cleft, 419 xfs_agblock_t *agbno, 420 xfs_extlen_t *aglen) 421 { 422 int error; 423 int found_rec; 424 425 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 426 cur->bc_private.a.agno, left, cleft); 427 428 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 429 if (cleft->rc_refcount > 1) { 430 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 431 &found_rec); 432 if (error) 433 goto out_error; 434 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 435 out_error); 436 437 error = xfs_refcount_delete(cur, &found_rec); 438 if (error) 439 goto out_error; 440 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 441 out_error); 442 } 443 444 /* Enlarge the left extent. */ 445 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 446 &found_rec); 447 if (error) 448 goto out_error; 449 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 450 451 left->rc_blockcount += cleft->rc_blockcount; 452 error = xfs_refcount_update(cur, left); 453 if (error) 454 goto out_error; 455 456 *agbno += cleft->rc_blockcount; 457 *aglen -= cleft->rc_blockcount; 458 return error; 459 460 out_error: 461 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 462 cur->bc_private.a.agno, error, _RET_IP_); 463 return error; 464 } 465 466 /* 467 * Merge with the right extent. 468 */ 469 STATIC int 470 xfs_refcount_merge_right_extent( 471 struct xfs_btree_cur *cur, 472 struct xfs_refcount_irec *right, 473 struct xfs_refcount_irec *cright, 474 xfs_agblock_t *agbno, 475 xfs_extlen_t *aglen) 476 { 477 int error; 478 int found_rec; 479 480 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 481 cur->bc_private.a.agno, cright, right); 482 483 /* 484 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 485 * remove it. 486 */ 487 if (cright->rc_refcount > 1) { 488 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 489 &found_rec); 490 if (error) 491 goto out_error; 492 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 493 out_error); 494 495 error = xfs_refcount_delete(cur, &found_rec); 496 if (error) 497 goto out_error; 498 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 499 out_error); 500 } 501 502 /* Enlarge the right extent. */ 503 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 504 &found_rec); 505 if (error) 506 goto out_error; 507 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 508 509 right->rc_startblock -= cright->rc_blockcount; 510 right->rc_blockcount += cright->rc_blockcount; 511 error = xfs_refcount_update(cur, right); 512 if (error) 513 goto out_error; 514 515 *aglen -= cright->rc_blockcount; 516 return error; 517 518 out_error: 519 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 520 cur->bc_private.a.agno, error, _RET_IP_); 521 return error; 522 } 523 524 #define XFS_FIND_RCEXT_SHARED 1 525 #define XFS_FIND_RCEXT_COW 2 526 /* 527 * Find the left extent and the one after it (cleft). This function assumes 528 * that we've already split any extent crossing agbno. 529 */ 530 STATIC int 531 xfs_refcount_find_left_extents( 532 struct xfs_btree_cur *cur, 533 struct xfs_refcount_irec *left, 534 struct xfs_refcount_irec *cleft, 535 xfs_agblock_t agbno, 536 xfs_extlen_t aglen, 537 int flags) 538 { 539 struct xfs_refcount_irec tmp; 540 int error; 541 int found_rec; 542 543 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 544 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 545 if (error) 546 goto out_error; 547 if (!found_rec) 548 return 0; 549 550 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 551 if (error) 552 goto out_error; 553 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 554 555 if (xfs_refc_next(&tmp) != agbno) 556 return 0; 557 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 558 return 0; 559 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 560 return 0; 561 /* We have a left extent; retrieve (or invent) the next right one */ 562 *left = tmp; 563 564 error = xfs_btree_increment(cur, 0, &found_rec); 565 if (error) 566 goto out_error; 567 if (found_rec) { 568 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 569 if (error) 570 goto out_error; 571 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 572 out_error); 573 574 /* if tmp starts at the end of our range, just use that */ 575 if (tmp.rc_startblock == agbno) 576 *cleft = tmp; 577 else { 578 /* 579 * There's a gap in the refcntbt at the start of the 580 * range we're interested in (refcount == 1) so 581 * synthesize the implied extent and pass it back. 582 * We assume here that the agbno/aglen range was 583 * passed in from a data fork extent mapping and 584 * therefore is allocated to exactly one owner. 585 */ 586 cleft->rc_startblock = agbno; 587 cleft->rc_blockcount = min(aglen, 588 tmp.rc_startblock - agbno); 589 cleft->rc_refcount = 1; 590 } 591 } else { 592 /* 593 * No extents, so pretend that there's one covering the whole 594 * range. 595 */ 596 cleft->rc_startblock = agbno; 597 cleft->rc_blockcount = aglen; 598 cleft->rc_refcount = 1; 599 } 600 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, 601 left, cleft, agbno); 602 return error; 603 604 out_error: 605 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 606 cur->bc_private.a.agno, error, _RET_IP_); 607 return error; 608 } 609 610 /* 611 * Find the right extent and the one before it (cright). This function 612 * assumes that we've already split any extents crossing agbno + aglen. 613 */ 614 STATIC int 615 xfs_refcount_find_right_extents( 616 struct xfs_btree_cur *cur, 617 struct xfs_refcount_irec *right, 618 struct xfs_refcount_irec *cright, 619 xfs_agblock_t agbno, 620 xfs_extlen_t aglen, 621 int flags) 622 { 623 struct xfs_refcount_irec tmp; 624 int error; 625 int found_rec; 626 627 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 628 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 629 if (error) 630 goto out_error; 631 if (!found_rec) 632 return 0; 633 634 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 635 if (error) 636 goto out_error; 637 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 638 639 if (tmp.rc_startblock != agbno + aglen) 640 return 0; 641 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 642 return 0; 643 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 644 return 0; 645 /* We have a right extent; retrieve (or invent) the next left one */ 646 *right = tmp; 647 648 error = xfs_btree_decrement(cur, 0, &found_rec); 649 if (error) 650 goto out_error; 651 if (found_rec) { 652 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 653 if (error) 654 goto out_error; 655 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 656 out_error); 657 658 /* if tmp ends at the end of our range, just use that */ 659 if (xfs_refc_next(&tmp) == agbno + aglen) 660 *cright = tmp; 661 else { 662 /* 663 * There's a gap in the refcntbt at the end of the 664 * range we're interested in (refcount == 1) so 665 * create the implied extent and pass it back. 666 * We assume here that the agbno/aglen range was 667 * passed in from a data fork extent mapping and 668 * therefore is allocated to exactly one owner. 669 */ 670 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 671 cright->rc_blockcount = right->rc_startblock - 672 cright->rc_startblock; 673 cright->rc_refcount = 1; 674 } 675 } else { 676 /* 677 * No extents, so pretend that there's one covering the whole 678 * range. 679 */ 680 cright->rc_startblock = agbno; 681 cright->rc_blockcount = aglen; 682 cright->rc_refcount = 1; 683 } 684 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, 685 cright, right, agbno + aglen); 686 return error; 687 688 out_error: 689 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 690 cur->bc_private.a.agno, error, _RET_IP_); 691 return error; 692 } 693 694 /* Is this extent valid? */ 695 static inline bool 696 xfs_refc_valid( 697 struct xfs_refcount_irec *rc) 698 { 699 return rc->rc_startblock != NULLAGBLOCK; 700 } 701 702 /* 703 * Try to merge with any extents on the boundaries of the adjustment range. 704 */ 705 STATIC int 706 xfs_refcount_merge_extents( 707 struct xfs_btree_cur *cur, 708 xfs_agblock_t *agbno, 709 xfs_extlen_t *aglen, 710 enum xfs_refc_adjust_op adjust, 711 int flags, 712 bool *shape_changed) 713 { 714 struct xfs_refcount_irec left = {0}, cleft = {0}; 715 struct xfs_refcount_irec cright = {0}, right = {0}; 716 int error; 717 unsigned long long ulen; 718 bool cequal; 719 720 *shape_changed = false; 721 /* 722 * Find the extent just below agbno [left], just above agbno [cleft], 723 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 724 * [right]. 725 */ 726 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 727 *aglen, flags); 728 if (error) 729 return error; 730 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 731 *aglen, flags); 732 if (error) 733 return error; 734 735 /* No left or right extent to merge; exit. */ 736 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 737 return 0; 738 739 cequal = (cleft.rc_startblock == cright.rc_startblock) && 740 (cleft.rc_blockcount == cright.rc_blockcount); 741 742 /* Try to merge left, cleft, and right. cleft must == cright. */ 743 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 744 right.rc_blockcount; 745 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 746 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 747 left.rc_refcount == cleft.rc_refcount + adjust && 748 right.rc_refcount == cleft.rc_refcount + adjust && 749 ulen < MAXREFCEXTLEN) { 750 *shape_changed = true; 751 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 752 &right, ulen, agbno, aglen); 753 } 754 755 /* Try to merge left and cleft. */ 756 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 757 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 758 left.rc_refcount == cleft.rc_refcount + adjust && 759 ulen < MAXREFCEXTLEN) { 760 *shape_changed = true; 761 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 762 agbno, aglen); 763 if (error) 764 return error; 765 766 /* 767 * If we just merged left + cleft and cleft == cright, 768 * we no longer have a cright to merge with right. We're done. 769 */ 770 if (cequal) 771 return 0; 772 } 773 774 /* Try to merge cright and right. */ 775 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 776 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 777 right.rc_refcount == cright.rc_refcount + adjust && 778 ulen < MAXREFCEXTLEN) { 779 *shape_changed = true; 780 return xfs_refcount_merge_right_extent(cur, &right, &cright, 781 agbno, aglen); 782 } 783 784 return error; 785 } 786 787 /* 788 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 789 * true incorrectly is a shutdown FS; the penalty for guessing false 790 * incorrectly is more transaction rolls than might be necessary. 791 * Be conservative here. 792 */ 793 static bool 794 xfs_refcount_still_have_space( 795 struct xfs_btree_cur *cur) 796 { 797 unsigned long overhead; 798 799 overhead = cur->bc_private.a.priv.refc.shape_changes * 800 xfs_allocfree_log_count(cur->bc_mp, 1); 801 overhead *= cur->bc_mp->m_sb.sb_blocksize; 802 803 /* 804 * Only allow 2 refcount extent updates per transaction if the 805 * refcount continue update "error" has been injected. 806 */ 807 if (cur->bc_private.a.priv.refc.nr_ops > 2 && 808 XFS_TEST_ERROR(false, cur->bc_mp, 809 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 810 return false; 811 812 if (cur->bc_private.a.priv.refc.nr_ops == 0) 813 return true; 814 else if (overhead > cur->bc_tp->t_log_res) 815 return false; 816 return cur->bc_tp->t_log_res - overhead > 817 cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 818 } 819 820 /* 821 * Adjust the refcounts of middle extents. At this point we should have 822 * split extents that crossed the adjustment range; merged with adjacent 823 * extents; and updated agbno/aglen to reflect the merges. Therefore, 824 * all we have to do is update the extents inside [agbno, agbno + aglen]. 825 */ 826 STATIC int 827 xfs_refcount_adjust_extents( 828 struct xfs_btree_cur *cur, 829 xfs_agblock_t *agbno, 830 xfs_extlen_t *aglen, 831 enum xfs_refc_adjust_op adj, 832 struct xfs_defer_ops *dfops, 833 struct xfs_owner_info *oinfo) 834 { 835 struct xfs_refcount_irec ext, tmp; 836 int error; 837 int found_rec, found_tmp; 838 xfs_fsblock_t fsbno; 839 840 /* Merging did all the work already. */ 841 if (*aglen == 0) 842 return 0; 843 844 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 845 if (error) 846 goto out_error; 847 848 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 849 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 850 if (error) 851 goto out_error; 852 if (!found_rec) { 853 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 854 ext.rc_blockcount = 0; 855 ext.rc_refcount = 0; 856 } 857 858 /* 859 * Deal with a hole in the refcount tree; if a file maps to 860 * these blocks and there's no refcountbt record, pretend that 861 * there is one with refcount == 1. 862 */ 863 if (ext.rc_startblock != *agbno) { 864 tmp.rc_startblock = *agbno; 865 tmp.rc_blockcount = min(*aglen, 866 ext.rc_startblock - *agbno); 867 tmp.rc_refcount = 1 + adj; 868 trace_xfs_refcount_modify_extent(cur->bc_mp, 869 cur->bc_private.a.agno, &tmp); 870 871 /* 872 * Either cover the hole (increment) or 873 * delete the range (decrement). 874 */ 875 if (tmp.rc_refcount) { 876 error = xfs_refcount_insert(cur, &tmp, 877 &found_tmp); 878 if (error) 879 goto out_error; 880 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 881 found_tmp == 1, out_error); 882 cur->bc_private.a.priv.refc.nr_ops++; 883 } else { 884 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 885 cur->bc_private.a.agno, 886 tmp.rc_startblock); 887 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 888 tmp.rc_blockcount, oinfo); 889 } 890 891 (*agbno) += tmp.rc_blockcount; 892 (*aglen) -= tmp.rc_blockcount; 893 894 error = xfs_refcount_lookup_ge(cur, *agbno, 895 &found_rec); 896 if (error) 897 goto out_error; 898 } 899 900 /* Stop if there's nothing left to modify */ 901 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 902 break; 903 904 /* 905 * Adjust the reference count and either update the tree 906 * (incr) or free the blocks (decr). 907 */ 908 if (ext.rc_refcount == MAXREFCOUNT) 909 goto skip; 910 ext.rc_refcount += adj; 911 trace_xfs_refcount_modify_extent(cur->bc_mp, 912 cur->bc_private.a.agno, &ext); 913 if (ext.rc_refcount > 1) { 914 error = xfs_refcount_update(cur, &ext); 915 if (error) 916 goto out_error; 917 cur->bc_private.a.priv.refc.nr_ops++; 918 } else if (ext.rc_refcount == 1) { 919 error = xfs_refcount_delete(cur, &found_rec); 920 if (error) 921 goto out_error; 922 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 923 found_rec == 1, out_error); 924 cur->bc_private.a.priv.refc.nr_ops++; 925 goto advloop; 926 } else { 927 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 928 cur->bc_private.a.agno, 929 ext.rc_startblock); 930 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 931 ext.rc_blockcount, oinfo); 932 } 933 934 skip: 935 error = xfs_btree_increment(cur, 0, &found_rec); 936 if (error) 937 goto out_error; 938 939 advloop: 940 (*agbno) += ext.rc_blockcount; 941 (*aglen) -= ext.rc_blockcount; 942 } 943 944 return error; 945 out_error: 946 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 947 cur->bc_private.a.agno, error, _RET_IP_); 948 return error; 949 } 950 951 /* Adjust the reference count of a range of AG blocks. */ 952 STATIC int 953 xfs_refcount_adjust( 954 struct xfs_btree_cur *cur, 955 xfs_agblock_t agbno, 956 xfs_extlen_t aglen, 957 xfs_agblock_t *new_agbno, 958 xfs_extlen_t *new_aglen, 959 enum xfs_refc_adjust_op adj, 960 struct xfs_defer_ops *dfops, 961 struct xfs_owner_info *oinfo) 962 { 963 bool shape_changed; 964 int shape_changes = 0; 965 int error; 966 967 *new_agbno = agbno; 968 *new_aglen = aglen; 969 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 970 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, 971 agbno, aglen); 972 else 973 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, 974 agbno, aglen); 975 976 /* 977 * Ensure that no rcextents cross the boundary of the adjustment range. 978 */ 979 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 980 if (error) 981 goto out_error; 982 if (shape_changed) 983 shape_changes++; 984 985 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 986 if (error) 987 goto out_error; 988 if (shape_changed) 989 shape_changes++; 990 991 /* 992 * Try to merge with the left or right extents of the range. 993 */ 994 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 995 XFS_FIND_RCEXT_SHARED, &shape_changed); 996 if (error) 997 goto out_error; 998 if (shape_changed) 999 shape_changes++; 1000 if (shape_changes) 1001 cur->bc_private.a.priv.refc.shape_changes++; 1002 1003 /* Now that we've taken care of the ends, adjust the middle extents */ 1004 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1005 adj, dfops, oinfo); 1006 if (error) 1007 goto out_error; 1008 1009 return 0; 1010 1011 out_error: 1012 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, 1013 error, _RET_IP_); 1014 return error; 1015 } 1016 1017 /* Clean up after calling xfs_refcount_finish_one. */ 1018 void 1019 xfs_refcount_finish_one_cleanup( 1020 struct xfs_trans *tp, 1021 struct xfs_btree_cur *rcur, 1022 int error) 1023 { 1024 struct xfs_buf *agbp; 1025 1026 if (rcur == NULL) 1027 return; 1028 agbp = rcur->bc_private.a.agbp; 1029 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 1030 if (error) 1031 xfs_trans_brelse(tp, agbp); 1032 } 1033 1034 /* 1035 * Process one of the deferred refcount operations. We pass back the 1036 * btree cursor to maintain our lock on the btree between calls. 1037 * This saves time and eliminates a buffer deadlock between the 1038 * superblock and the AGF because we'll always grab them in the same 1039 * order. 1040 */ 1041 int 1042 xfs_refcount_finish_one( 1043 struct xfs_trans *tp, 1044 struct xfs_defer_ops *dfops, 1045 enum xfs_refcount_intent_type type, 1046 xfs_fsblock_t startblock, 1047 xfs_extlen_t blockcount, 1048 xfs_fsblock_t *new_fsb, 1049 xfs_extlen_t *new_len, 1050 struct xfs_btree_cur **pcur) 1051 { 1052 struct xfs_mount *mp = tp->t_mountp; 1053 struct xfs_btree_cur *rcur; 1054 struct xfs_buf *agbp = NULL; 1055 int error = 0; 1056 xfs_agnumber_t agno; 1057 xfs_agblock_t bno; 1058 xfs_agblock_t new_agbno; 1059 unsigned long nr_ops = 0; 1060 int shape_changes = 0; 1061 1062 agno = XFS_FSB_TO_AGNO(mp, startblock); 1063 ASSERT(agno != NULLAGNUMBER); 1064 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1065 1066 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1067 type, XFS_FSB_TO_AGBNO(mp, startblock), 1068 blockcount); 1069 1070 if (XFS_TEST_ERROR(false, mp, 1071 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1072 return -EIO; 1073 1074 /* 1075 * If we haven't gotten a cursor or the cursor AG doesn't match 1076 * the startblock, get one now. 1077 */ 1078 rcur = *pcur; 1079 if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1080 nr_ops = rcur->bc_private.a.priv.refc.nr_ops; 1081 shape_changes = rcur->bc_private.a.priv.refc.shape_changes; 1082 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1083 rcur = NULL; 1084 *pcur = NULL; 1085 } 1086 if (rcur == NULL) { 1087 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1088 XFS_ALLOC_FLAG_FREEING, &agbp); 1089 if (error) 1090 return error; 1091 if (!agbp) 1092 return -EFSCORRUPTED; 1093 1094 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops); 1095 if (!rcur) { 1096 error = -ENOMEM; 1097 goto out_cur; 1098 } 1099 rcur->bc_private.a.priv.refc.nr_ops = nr_ops; 1100 rcur->bc_private.a.priv.refc.shape_changes = shape_changes; 1101 } 1102 *pcur = rcur; 1103 1104 switch (type) { 1105 case XFS_REFCOUNT_INCREASE: 1106 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1107 new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL); 1108 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1109 break; 1110 case XFS_REFCOUNT_DECREASE: 1111 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1112 new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL); 1113 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1114 break; 1115 case XFS_REFCOUNT_ALLOC_COW: 1116 *new_fsb = startblock + blockcount; 1117 *new_len = 0; 1118 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops); 1119 break; 1120 case XFS_REFCOUNT_FREE_COW: 1121 *new_fsb = startblock + blockcount; 1122 *new_len = 0; 1123 error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops); 1124 break; 1125 default: 1126 ASSERT(0); 1127 error = -EFSCORRUPTED; 1128 } 1129 if (!error && *new_len > 0) 1130 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1131 bno, blockcount, new_agbno, *new_len); 1132 return error; 1133 1134 out_cur: 1135 xfs_trans_brelse(tp, agbp); 1136 1137 return error; 1138 } 1139 1140 /* 1141 * Record a refcount intent for later processing. 1142 */ 1143 static int 1144 __xfs_refcount_add( 1145 struct xfs_mount *mp, 1146 struct xfs_defer_ops *dfops, 1147 enum xfs_refcount_intent_type type, 1148 xfs_fsblock_t startblock, 1149 xfs_extlen_t blockcount) 1150 { 1151 struct xfs_refcount_intent *ri; 1152 1153 trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock), 1154 type, XFS_FSB_TO_AGBNO(mp, startblock), 1155 blockcount); 1156 1157 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1158 KM_SLEEP | KM_NOFS); 1159 INIT_LIST_HEAD(&ri->ri_list); 1160 ri->ri_type = type; 1161 ri->ri_startblock = startblock; 1162 ri->ri_blockcount = blockcount; 1163 1164 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1165 return 0; 1166 } 1167 1168 /* 1169 * Increase the reference count of the blocks backing a file's extent. 1170 */ 1171 int 1172 xfs_refcount_increase_extent( 1173 struct xfs_mount *mp, 1174 struct xfs_defer_ops *dfops, 1175 struct xfs_bmbt_irec *PREV) 1176 { 1177 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1178 return 0; 1179 1180 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE, 1181 PREV->br_startblock, PREV->br_blockcount); 1182 } 1183 1184 /* 1185 * Decrease the reference count of the blocks backing a file's extent. 1186 */ 1187 int 1188 xfs_refcount_decrease_extent( 1189 struct xfs_mount *mp, 1190 struct xfs_defer_ops *dfops, 1191 struct xfs_bmbt_irec *PREV) 1192 { 1193 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1194 return 0; 1195 1196 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE, 1197 PREV->br_startblock, PREV->br_blockcount); 1198 } 1199 1200 /* 1201 * Given an AG extent, find the lowest-numbered run of shared blocks 1202 * within that range and return the range in fbno/flen. If 1203 * find_end_of_shared is set, return the longest contiguous extent of 1204 * shared blocks; if not, just return the first extent we find. If no 1205 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1206 * and 0, respectively. 1207 */ 1208 int 1209 xfs_refcount_find_shared( 1210 struct xfs_btree_cur *cur, 1211 xfs_agblock_t agbno, 1212 xfs_extlen_t aglen, 1213 xfs_agblock_t *fbno, 1214 xfs_extlen_t *flen, 1215 bool find_end_of_shared) 1216 { 1217 struct xfs_refcount_irec tmp; 1218 int i; 1219 int have; 1220 int error; 1221 1222 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, 1223 agbno, aglen); 1224 1225 /* By default, skip the whole range */ 1226 *fbno = NULLAGBLOCK; 1227 *flen = 0; 1228 1229 /* Try to find a refcount extent that crosses the start */ 1230 error = xfs_refcount_lookup_le(cur, agbno, &have); 1231 if (error) 1232 goto out_error; 1233 if (!have) { 1234 /* No left extent, look at the next one */ 1235 error = xfs_btree_increment(cur, 0, &have); 1236 if (error) 1237 goto out_error; 1238 if (!have) 1239 goto done; 1240 } 1241 error = xfs_refcount_get_rec(cur, &tmp, &i); 1242 if (error) 1243 goto out_error; 1244 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1245 1246 /* If the extent ends before the start, look at the next one */ 1247 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1248 error = xfs_btree_increment(cur, 0, &have); 1249 if (error) 1250 goto out_error; 1251 if (!have) 1252 goto done; 1253 error = xfs_refcount_get_rec(cur, &tmp, &i); 1254 if (error) 1255 goto out_error; 1256 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1257 } 1258 1259 /* If the extent starts after the range we want, bail out */ 1260 if (tmp.rc_startblock >= agbno + aglen) 1261 goto done; 1262 1263 /* We found the start of a shared extent! */ 1264 if (tmp.rc_startblock < agbno) { 1265 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1266 tmp.rc_startblock = agbno; 1267 } 1268 1269 *fbno = tmp.rc_startblock; 1270 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1271 if (!find_end_of_shared) 1272 goto done; 1273 1274 /* Otherwise, find the end of this shared extent */ 1275 while (*fbno + *flen < agbno + aglen) { 1276 error = xfs_btree_increment(cur, 0, &have); 1277 if (error) 1278 goto out_error; 1279 if (!have) 1280 break; 1281 error = xfs_refcount_get_rec(cur, &tmp, &i); 1282 if (error) 1283 goto out_error; 1284 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1285 if (tmp.rc_startblock >= agbno + aglen || 1286 tmp.rc_startblock != *fbno + *flen) 1287 break; 1288 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1289 } 1290 1291 done: 1292 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1293 cur->bc_private.a.agno, *fbno, *flen); 1294 1295 out_error: 1296 if (error) 1297 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1298 cur->bc_private.a.agno, error, _RET_IP_); 1299 return error; 1300 } 1301 1302 /* 1303 * Recovering CoW Blocks After a Crash 1304 * 1305 * Due to the way that the copy on write mechanism works, there's a window of 1306 * opportunity in which we can lose track of allocated blocks during a crash. 1307 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1308 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1309 * no longer in the free space btree but are not otherwise recorded anywhere 1310 * until the write completes and the blocks are mapped into the file. A crash 1311 * in between allocation and remapping results in the replacement blocks being 1312 * lost. This situation is exacerbated by the CoW extent size hint because 1313 * allocations can hang around for long time. 1314 * 1315 * However, there is a place where we can record these allocations before they 1316 * become mappings -- the reference count btree. The btree does not record 1317 * extents with refcount == 1, so we can record allocations with a refcount of 1318 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1319 * no conflict with shared block records. These mappings should be created 1320 * when we allocate blocks to the CoW fork and deleted when they're removed 1321 * from the CoW fork. 1322 * 1323 * Minor nit: records for in-progress CoW allocations and records for shared 1324 * extents must never be merged, to preserve the property that (except for CoW 1325 * allocations) there are no refcount btree entries with refcount == 1. The 1326 * only time this could potentially happen is when unsharing a block that's 1327 * adjacent to CoW allocations, so we must be careful to avoid this. 1328 * 1329 * At mount time we recover lost CoW allocations by searching the refcount 1330 * btree for these refcount == 1 mappings. These represent CoW allocations 1331 * that were in progress at the time the filesystem went down, so we can free 1332 * them to get the space back. 1333 * 1334 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1335 * several reasons -- first, EFIs pin the tail of the log and would have to be 1336 * periodically relogged to avoid filling up the log. Second, CoW completions 1337 * will have to file an EFD and create new EFIs for whatever remains in the 1338 * CoW fork; this partially takes care of (1) but extent-size reservations 1339 * will have to periodically relog even if there's no writeout in progress. 1340 * This can happen if the CoW extent size hint is set, which you really want. 1341 * Third, EFIs cannot currently be automatically relogged into newer 1342 * transactions to advance the log tail. Fourth, stuffing the log full of 1343 * EFIs places an upper bound on the number of CoW allocations that can be 1344 * held filesystem-wide at any given time. Recording them in the refcount 1345 * btree doesn't require us to maintain any state in memory and doesn't pin 1346 * the log. 1347 */ 1348 /* 1349 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1350 * in that they're not referenced anywhere else in the filesystem, so we 1351 * stash them in the refcount btree with a refcount of 1 until either file 1352 * remapping (or CoW cancellation) happens. 1353 */ 1354 STATIC int 1355 xfs_refcount_adjust_cow_extents( 1356 struct xfs_btree_cur *cur, 1357 xfs_agblock_t agbno, 1358 xfs_extlen_t aglen, 1359 enum xfs_refc_adjust_op adj, 1360 struct xfs_defer_ops *dfops, 1361 struct xfs_owner_info *oinfo) 1362 { 1363 struct xfs_refcount_irec ext, tmp; 1364 int error; 1365 int found_rec, found_tmp; 1366 1367 if (aglen == 0) 1368 return 0; 1369 1370 /* Find any overlapping refcount records */ 1371 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1372 if (error) 1373 goto out_error; 1374 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1375 if (error) 1376 goto out_error; 1377 if (!found_rec) { 1378 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1379 XFS_REFC_COW_START; 1380 ext.rc_blockcount = 0; 1381 ext.rc_refcount = 0; 1382 } 1383 1384 switch (adj) { 1385 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1386 /* Adding a CoW reservation, there should be nothing here. */ 1387 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1388 ext.rc_startblock >= agbno + aglen, out_error); 1389 1390 tmp.rc_startblock = agbno; 1391 tmp.rc_blockcount = aglen; 1392 tmp.rc_refcount = 1; 1393 trace_xfs_refcount_modify_extent(cur->bc_mp, 1394 cur->bc_private.a.agno, &tmp); 1395 1396 error = xfs_refcount_insert(cur, &tmp, 1397 &found_tmp); 1398 if (error) 1399 goto out_error; 1400 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1401 found_tmp == 1, out_error); 1402 break; 1403 case XFS_REFCOUNT_ADJUST_COW_FREE: 1404 /* Removing a CoW reservation, there should be one extent. */ 1405 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1406 ext.rc_startblock == agbno, out_error); 1407 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1408 ext.rc_blockcount == aglen, out_error); 1409 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1410 ext.rc_refcount == 1, out_error); 1411 1412 ext.rc_refcount = 0; 1413 trace_xfs_refcount_modify_extent(cur->bc_mp, 1414 cur->bc_private.a.agno, &ext); 1415 error = xfs_refcount_delete(cur, &found_rec); 1416 if (error) 1417 goto out_error; 1418 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1419 found_rec == 1, out_error); 1420 break; 1421 default: 1422 ASSERT(0); 1423 } 1424 1425 return error; 1426 out_error: 1427 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1428 cur->bc_private.a.agno, error, _RET_IP_); 1429 return error; 1430 } 1431 1432 /* 1433 * Add or remove refcount btree entries for CoW reservations. 1434 */ 1435 STATIC int 1436 xfs_refcount_adjust_cow( 1437 struct xfs_btree_cur *cur, 1438 xfs_agblock_t agbno, 1439 xfs_extlen_t aglen, 1440 enum xfs_refc_adjust_op adj, 1441 struct xfs_defer_ops *dfops) 1442 { 1443 bool shape_changed; 1444 int error; 1445 1446 agbno += XFS_REFC_COW_START; 1447 1448 /* 1449 * Ensure that no rcextents cross the boundary of the adjustment range. 1450 */ 1451 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1452 if (error) 1453 goto out_error; 1454 1455 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1456 if (error) 1457 goto out_error; 1458 1459 /* 1460 * Try to merge with the left or right extents of the range. 1461 */ 1462 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1463 XFS_FIND_RCEXT_COW, &shape_changed); 1464 if (error) 1465 goto out_error; 1466 1467 /* Now that we've taken care of the ends, adjust the middle extents */ 1468 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj, 1469 dfops, NULL); 1470 if (error) 1471 goto out_error; 1472 1473 return 0; 1474 1475 out_error: 1476 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, 1477 error, _RET_IP_); 1478 return error; 1479 } 1480 1481 /* 1482 * Record a CoW allocation in the refcount btree. 1483 */ 1484 STATIC int 1485 __xfs_refcount_cow_alloc( 1486 struct xfs_btree_cur *rcur, 1487 xfs_agblock_t agbno, 1488 xfs_extlen_t aglen, 1489 struct xfs_defer_ops *dfops) 1490 { 1491 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1492 agbno, aglen); 1493 1494 /* Add refcount btree reservation */ 1495 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1496 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1497 } 1498 1499 /* 1500 * Remove a CoW allocation from the refcount btree. 1501 */ 1502 STATIC int 1503 __xfs_refcount_cow_free( 1504 struct xfs_btree_cur *rcur, 1505 xfs_agblock_t agbno, 1506 xfs_extlen_t aglen, 1507 struct xfs_defer_ops *dfops) 1508 { 1509 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1510 agbno, aglen); 1511 1512 /* Remove refcount btree reservation */ 1513 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1514 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1515 } 1516 1517 /* Record a CoW staging extent in the refcount btree. */ 1518 int 1519 xfs_refcount_alloc_cow_extent( 1520 struct xfs_mount *mp, 1521 struct xfs_defer_ops *dfops, 1522 xfs_fsblock_t fsb, 1523 xfs_extlen_t len) 1524 { 1525 int error; 1526 1527 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1528 return 0; 1529 1530 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1531 fsb, len); 1532 if (error) 1533 return error; 1534 1535 /* Add rmap entry */ 1536 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1537 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1538 } 1539 1540 /* Forget a CoW staging event in the refcount btree. */ 1541 int 1542 xfs_refcount_free_cow_extent( 1543 struct xfs_mount *mp, 1544 struct xfs_defer_ops *dfops, 1545 xfs_fsblock_t fsb, 1546 xfs_extlen_t len) 1547 { 1548 int error; 1549 1550 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1551 return 0; 1552 1553 /* Remove rmap entry */ 1554 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1555 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1556 if (error) 1557 return error; 1558 1559 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1560 fsb, len); 1561 } 1562 1563 struct xfs_refcount_recovery { 1564 struct list_head rr_list; 1565 struct xfs_refcount_irec rr_rrec; 1566 }; 1567 1568 /* Stuff an extent on the recovery list. */ 1569 STATIC int 1570 xfs_refcount_recover_extent( 1571 struct xfs_btree_cur *cur, 1572 union xfs_btree_rec *rec, 1573 void *priv) 1574 { 1575 struct list_head *debris = priv; 1576 struct xfs_refcount_recovery *rr; 1577 1578 if (be32_to_cpu(rec->refc.rc_refcount) != 1) 1579 return -EFSCORRUPTED; 1580 1581 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); 1582 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1583 list_add_tail(&rr->rr_list, debris); 1584 1585 return 0; 1586 } 1587 1588 /* Find and remove leftover CoW reservations. */ 1589 int 1590 xfs_refcount_recover_cow_leftovers( 1591 struct xfs_mount *mp, 1592 xfs_agnumber_t agno) 1593 { 1594 struct xfs_trans *tp; 1595 struct xfs_btree_cur *cur; 1596 struct xfs_buf *agbp; 1597 struct xfs_refcount_recovery *rr, *n; 1598 struct list_head debris; 1599 union xfs_btree_irec low; 1600 union xfs_btree_irec high; 1601 struct xfs_defer_ops dfops; 1602 xfs_fsblock_t fsb; 1603 xfs_agblock_t agbno; 1604 int error; 1605 1606 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1607 return -EOPNOTSUPP; 1608 1609 INIT_LIST_HEAD(&debris); 1610 1611 /* 1612 * In this first part, we use an empty transaction to gather up 1613 * all the leftover CoW extents so that we can subsequently 1614 * delete them. The empty transaction is used to avoid 1615 * a buffer lock deadlock if there happens to be a loop in the 1616 * refcountbt because we're allowed to re-grab a buffer that is 1617 * already attached to our transaction. When we're done 1618 * recording the CoW debris we cancel the (empty) transaction 1619 * and everything goes away cleanly. 1620 */ 1621 error = xfs_trans_alloc_empty(mp, &tp); 1622 if (error) 1623 return error; 1624 1625 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1626 if (error) 1627 goto out_trans; 1628 if (!agbp) { 1629 error = -ENOMEM; 1630 goto out_trans; 1631 } 1632 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); 1633 1634 /* Find all the leftover CoW staging extents. */ 1635 memset(&low, 0, sizeof(low)); 1636 memset(&high, 0, sizeof(high)); 1637 low.rc.rc_startblock = XFS_REFC_COW_START; 1638 high.rc.rc_startblock = -1U; 1639 error = xfs_btree_query_range(cur, &low, &high, 1640 xfs_refcount_recover_extent, &debris); 1641 if (error) 1642 goto out_cursor; 1643 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1644 xfs_trans_brelse(tp, agbp); 1645 xfs_trans_cancel(tp); 1646 1647 /* Now iterate the list to free the leftovers */ 1648 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1649 /* Set up transaction. */ 1650 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1651 if (error) 1652 goto out_free; 1653 1654 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1655 1656 /* Free the orphan record */ 1657 xfs_defer_init(&dfops, &fsb); 1658 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1659 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1660 error = xfs_refcount_free_cow_extent(mp, &dfops, fsb, 1661 rr->rr_rrec.rc_blockcount); 1662 if (error) 1663 goto out_defer; 1664 1665 /* Free the block. */ 1666 xfs_bmap_add_free(mp, &dfops, fsb, 1667 rr->rr_rrec.rc_blockcount, NULL); 1668 1669 error = xfs_defer_finish(&tp, &dfops); 1670 if (error) 1671 goto out_defer; 1672 1673 error = xfs_trans_commit(tp); 1674 if (error) 1675 goto out_free; 1676 1677 list_del(&rr->rr_list); 1678 kmem_free(rr); 1679 } 1680 1681 return error; 1682 out_defer: 1683 xfs_defer_cancel(&dfops); 1684 out_trans: 1685 xfs_trans_cancel(tp); 1686 out_free: 1687 /* Free the leftover list */ 1688 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1689 list_del(&rr->rr_list); 1690 kmem_free(rr); 1691 } 1692 return error; 1693 1694 out_cursor: 1695 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1696 xfs_trans_brelse(tp, agbp); 1697 goto out_trans; 1698 } 1699 1700 /* Is there a record covering a given extent? */ 1701 int 1702 xfs_refcount_has_record( 1703 struct xfs_btree_cur *cur, 1704 xfs_agblock_t bno, 1705 xfs_extlen_t len, 1706 bool *exists) 1707 { 1708 union xfs_btree_irec low; 1709 union xfs_btree_irec high; 1710 1711 memset(&low, 0, sizeof(low)); 1712 low.rc.rc_startblock = bno; 1713 memset(&high, 0xFF, sizeof(high)); 1714 high.rc.rc_startblock = bno + len - 1; 1715 1716 return xfs_btree_has_record(cur, &low, &high, exists); 1717 } 1718