1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_log_format.h" 25 #include "xfs_trans_resv.h" 26 #include "xfs_sb.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_btree.h" 30 #include "xfs_bmap.h" 31 #include "xfs_refcount_btree.h" 32 #include "xfs_alloc.h" 33 #include "xfs_error.h" 34 #include "xfs_trace.h" 35 #include "xfs_cksum.h" 36 #include "xfs_trans.h" 37 #include "xfs_bit.h" 38 #include "xfs_refcount.h" 39 #include "xfs_rmap.h" 40 41 /* Allowable refcount adjustment amounts. */ 42 enum xfs_refc_adjust_op { 43 XFS_REFCOUNT_ADJUST_INCREASE = 1, 44 XFS_REFCOUNT_ADJUST_DECREASE = -1, 45 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 46 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 47 }; 48 49 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 50 xfs_agblock_t agbno, xfs_extlen_t aglen, 51 struct xfs_defer_ops *dfops); 52 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 53 xfs_agblock_t agbno, xfs_extlen_t aglen, 54 struct xfs_defer_ops *dfops); 55 56 /* 57 * Look up the first record less than or equal to [bno, len] in the btree 58 * given by cur. 59 */ 60 int 61 xfs_refcount_lookup_le( 62 struct xfs_btree_cur *cur, 63 xfs_agblock_t bno, 64 int *stat) 65 { 66 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 67 XFS_LOOKUP_LE); 68 cur->bc_rec.rc.rc_startblock = bno; 69 cur->bc_rec.rc.rc_blockcount = 0; 70 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 71 } 72 73 /* 74 * Look up the first record greater than or equal to [bno, len] in the btree 75 * given by cur. 76 */ 77 int 78 xfs_refcount_lookup_ge( 79 struct xfs_btree_cur *cur, 80 xfs_agblock_t bno, 81 int *stat) 82 { 83 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 84 XFS_LOOKUP_GE); 85 cur->bc_rec.rc.rc_startblock = bno; 86 cur->bc_rec.rc.rc_blockcount = 0; 87 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 88 } 89 90 /* Convert on-disk record to in-core format. */ 91 static inline void 92 xfs_refcount_btrec_to_irec( 93 union xfs_btree_rec *rec, 94 struct xfs_refcount_irec *irec) 95 { 96 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 97 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 98 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 99 } 100 101 /* 102 * Get the data from the pointed-to record. 103 */ 104 int 105 xfs_refcount_get_rec( 106 struct xfs_btree_cur *cur, 107 struct xfs_refcount_irec *irec, 108 int *stat) 109 { 110 union xfs_btree_rec *rec; 111 int error; 112 113 error = xfs_btree_get_rec(cur, &rec, stat); 114 if (!error && *stat == 1) { 115 xfs_refcount_btrec_to_irec(rec, irec); 116 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, 117 irec); 118 } 119 return error; 120 } 121 122 /* 123 * Update the record referred to by cur to the value given 124 * by [bno, len, refcount]. 125 * This either works (return 0) or gets an EFSCORRUPTED error. 126 */ 127 STATIC int 128 xfs_refcount_update( 129 struct xfs_btree_cur *cur, 130 struct xfs_refcount_irec *irec) 131 { 132 union xfs_btree_rec rec; 133 int error; 134 135 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); 136 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 137 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 138 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 139 error = xfs_btree_update(cur, &rec); 140 if (error) 141 trace_xfs_refcount_update_error(cur->bc_mp, 142 cur->bc_private.a.agno, error, _RET_IP_); 143 return error; 144 } 145 146 /* 147 * Insert the record referred to by cur to the value given 148 * by [bno, len, refcount]. 149 * This either works (return 0) or gets an EFSCORRUPTED error. 150 */ 151 STATIC int 152 xfs_refcount_insert( 153 struct xfs_btree_cur *cur, 154 struct xfs_refcount_irec *irec, 155 int *i) 156 { 157 int error; 158 159 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); 160 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 161 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 162 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 163 error = xfs_btree_insert(cur, i); 164 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 165 out_error: 166 if (error) 167 trace_xfs_refcount_insert_error(cur->bc_mp, 168 cur->bc_private.a.agno, error, _RET_IP_); 169 return error; 170 } 171 172 /* 173 * Remove the record referred to by cur, then set the pointer to the spot 174 * where the record could be re-inserted, in case we want to increment or 175 * decrement the cursor. 176 * This either works (return 0) or gets an EFSCORRUPTED error. 177 */ 178 STATIC int 179 xfs_refcount_delete( 180 struct xfs_btree_cur *cur, 181 int *i) 182 { 183 struct xfs_refcount_irec irec; 184 int found_rec; 185 int error; 186 187 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 188 if (error) 189 goto out_error; 190 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 191 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); 192 error = xfs_btree_delete(cur, i); 193 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 194 if (error) 195 goto out_error; 196 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 197 out_error: 198 if (error) 199 trace_xfs_refcount_delete_error(cur->bc_mp, 200 cur->bc_private.a.agno, error, _RET_IP_); 201 return error; 202 } 203 204 /* 205 * Adjusting the Reference Count 206 * 207 * As stated elsewhere, the reference count btree (refcbt) stores 208 * >1 reference counts for extents of physical blocks. In this 209 * operation, we're either raising or lowering the reference count of 210 * some subrange stored in the tree: 211 * 212 * <------ adjustment range ------> 213 * ----+ +---+-----+ +--+--------+--------- 214 * 2 | | 3 | 4 | |17| 55 | 10 215 * ----+ +---+-----+ +--+--------+--------- 216 * X axis is physical blocks number; 217 * reference counts are the numbers inside the rectangles 218 * 219 * The first thing we need to do is to ensure that there are no 220 * refcount extents crossing either boundary of the range to be 221 * adjusted. For any extent that does cross a boundary, split it into 222 * two extents so that we can increment the refcount of one of the 223 * pieces later: 224 * 225 * <------ adjustment range ------> 226 * ----+ +---+-----+ +--+--------+----+---- 227 * 2 | | 3 | 2 | |17| 55 | 10 | 10 228 * ----+ +---+-----+ +--+--------+----+---- 229 * 230 * For this next step, let's assume that all the physical blocks in 231 * the adjustment range are mapped to a file and are therefore in use 232 * at least once. Therefore, we can infer that any gap in the 233 * refcount tree within the adjustment range represents a physical 234 * extent with refcount == 1: 235 * 236 * <------ adjustment range ------> 237 * ----+---+---+-----+-+--+--------+----+---- 238 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 239 * ----+---+---+-----+-+--+--------+----+---- 240 * ^ 241 * 242 * For each extent that falls within the interval range, figure out 243 * which extent is to the left or the right of that extent. Now we 244 * have a left, current, and right extent. If the new reference count 245 * of the center extent enables us to merge left, center, and right 246 * into one record covering all three, do so. If the center extent is 247 * at the left end of the range, abuts the left extent, and its new 248 * reference count matches the left extent's record, then merge them. 249 * If the center extent is at the right end of the range, abuts the 250 * right extent, and the reference counts match, merge those. In the 251 * example, we can left merge (assuming an increment operation): 252 * 253 * <------ adjustment range ------> 254 * --------+---+-----+-+--+--------+----+---- 255 * 2 | 3 | 2 |1|17| 55 | 10 | 10 256 * --------+---+-----+-+--+--------+----+---- 257 * ^ 258 * 259 * For all other extents within the range, adjust the reference count 260 * or delete it if the refcount falls below 2. If we were 261 * incrementing, the end result looks like this: 262 * 263 * <------ adjustment range ------> 264 * --------+---+-----+-+--+--------+----+---- 265 * 2 | 4 | 3 |2|18| 56 | 11 | 10 266 * --------+---+-----+-+--+--------+----+---- 267 * 268 * The result of a decrement operation looks as such: 269 * 270 * <------ adjustment range ------> 271 * ----+ +---+ +--+--------+----+---- 272 * 2 | | 2 | |16| 54 | 9 | 10 273 * ----+ +---+ +--+--------+----+---- 274 * DDDD 111111DD 275 * 276 * The blocks marked "D" are freed; the blocks marked "1" are only 277 * referenced once and therefore the record is removed from the 278 * refcount btree. 279 */ 280 281 /* Next block after this extent. */ 282 static inline xfs_agblock_t 283 xfs_refc_next( 284 struct xfs_refcount_irec *rc) 285 { 286 return rc->rc_startblock + rc->rc_blockcount; 287 } 288 289 /* 290 * Split a refcount extent that crosses agbno. 291 */ 292 STATIC int 293 xfs_refcount_split_extent( 294 struct xfs_btree_cur *cur, 295 xfs_agblock_t agbno, 296 bool *shape_changed) 297 { 298 struct xfs_refcount_irec rcext, tmp; 299 int found_rec; 300 int error; 301 302 *shape_changed = false; 303 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 304 if (error) 305 goto out_error; 306 if (!found_rec) 307 return 0; 308 309 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 310 if (error) 311 goto out_error; 312 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 313 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 314 return 0; 315 316 *shape_changed = true; 317 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, 318 &rcext, agbno); 319 320 /* Establish the right extent. */ 321 tmp = rcext; 322 tmp.rc_startblock = agbno; 323 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 324 error = xfs_refcount_update(cur, &tmp); 325 if (error) 326 goto out_error; 327 328 /* Insert the left extent. */ 329 tmp = rcext; 330 tmp.rc_blockcount = agbno - rcext.rc_startblock; 331 error = xfs_refcount_insert(cur, &tmp, &found_rec); 332 if (error) 333 goto out_error; 334 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 335 return error; 336 337 out_error: 338 trace_xfs_refcount_split_extent_error(cur->bc_mp, 339 cur->bc_private.a.agno, error, _RET_IP_); 340 return error; 341 } 342 343 /* 344 * Merge the left, center, and right extents. 345 */ 346 STATIC int 347 xfs_refcount_merge_center_extents( 348 struct xfs_btree_cur *cur, 349 struct xfs_refcount_irec *left, 350 struct xfs_refcount_irec *center, 351 struct xfs_refcount_irec *right, 352 unsigned long long extlen, 353 xfs_agblock_t *agbno, 354 xfs_extlen_t *aglen) 355 { 356 int error; 357 int found_rec; 358 359 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 360 cur->bc_private.a.agno, left, center, right); 361 362 /* 363 * Make sure the center and right extents are not in the btree. 364 * If the center extent was synthesized, the first delete call 365 * removes the right extent and we skip the second deletion. 366 * If center and right were in the btree, then the first delete 367 * call removes the center and the second one removes the right 368 * extent. 369 */ 370 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 371 &found_rec); 372 if (error) 373 goto out_error; 374 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 375 376 error = xfs_refcount_delete(cur, &found_rec); 377 if (error) 378 goto out_error; 379 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 380 381 if (center->rc_refcount > 1) { 382 error = xfs_refcount_delete(cur, &found_rec); 383 if (error) 384 goto out_error; 385 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 386 out_error); 387 } 388 389 /* Enlarge the left extent. */ 390 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 391 &found_rec); 392 if (error) 393 goto out_error; 394 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 395 396 left->rc_blockcount = extlen; 397 error = xfs_refcount_update(cur, left); 398 if (error) 399 goto out_error; 400 401 *aglen = 0; 402 return error; 403 404 out_error: 405 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 406 cur->bc_private.a.agno, error, _RET_IP_); 407 return error; 408 } 409 410 /* 411 * Merge with the left extent. 412 */ 413 STATIC int 414 xfs_refcount_merge_left_extent( 415 struct xfs_btree_cur *cur, 416 struct xfs_refcount_irec *left, 417 struct xfs_refcount_irec *cleft, 418 xfs_agblock_t *agbno, 419 xfs_extlen_t *aglen) 420 { 421 int error; 422 int found_rec; 423 424 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 425 cur->bc_private.a.agno, left, cleft); 426 427 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 428 if (cleft->rc_refcount > 1) { 429 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 430 &found_rec); 431 if (error) 432 goto out_error; 433 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 434 out_error); 435 436 error = xfs_refcount_delete(cur, &found_rec); 437 if (error) 438 goto out_error; 439 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 440 out_error); 441 } 442 443 /* Enlarge the left extent. */ 444 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 445 &found_rec); 446 if (error) 447 goto out_error; 448 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 449 450 left->rc_blockcount += cleft->rc_blockcount; 451 error = xfs_refcount_update(cur, left); 452 if (error) 453 goto out_error; 454 455 *agbno += cleft->rc_blockcount; 456 *aglen -= cleft->rc_blockcount; 457 return error; 458 459 out_error: 460 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 461 cur->bc_private.a.agno, error, _RET_IP_); 462 return error; 463 } 464 465 /* 466 * Merge with the right extent. 467 */ 468 STATIC int 469 xfs_refcount_merge_right_extent( 470 struct xfs_btree_cur *cur, 471 struct xfs_refcount_irec *right, 472 struct xfs_refcount_irec *cright, 473 xfs_agblock_t *agbno, 474 xfs_extlen_t *aglen) 475 { 476 int error; 477 int found_rec; 478 479 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 480 cur->bc_private.a.agno, cright, right); 481 482 /* 483 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 484 * remove it. 485 */ 486 if (cright->rc_refcount > 1) { 487 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 488 &found_rec); 489 if (error) 490 goto out_error; 491 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 492 out_error); 493 494 error = xfs_refcount_delete(cur, &found_rec); 495 if (error) 496 goto out_error; 497 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 498 out_error); 499 } 500 501 /* Enlarge the right extent. */ 502 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 503 &found_rec); 504 if (error) 505 goto out_error; 506 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 507 508 right->rc_startblock -= cright->rc_blockcount; 509 right->rc_blockcount += cright->rc_blockcount; 510 error = xfs_refcount_update(cur, right); 511 if (error) 512 goto out_error; 513 514 *aglen -= cright->rc_blockcount; 515 return error; 516 517 out_error: 518 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 519 cur->bc_private.a.agno, error, _RET_IP_); 520 return error; 521 } 522 523 #define XFS_FIND_RCEXT_SHARED 1 524 #define XFS_FIND_RCEXT_COW 2 525 /* 526 * Find the left extent and the one after it (cleft). This function assumes 527 * that we've already split any extent crossing agbno. 528 */ 529 STATIC int 530 xfs_refcount_find_left_extents( 531 struct xfs_btree_cur *cur, 532 struct xfs_refcount_irec *left, 533 struct xfs_refcount_irec *cleft, 534 xfs_agblock_t agbno, 535 xfs_extlen_t aglen, 536 int flags) 537 { 538 struct xfs_refcount_irec tmp; 539 int error; 540 int found_rec; 541 542 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 543 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 544 if (error) 545 goto out_error; 546 if (!found_rec) 547 return 0; 548 549 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 550 if (error) 551 goto out_error; 552 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 553 554 if (xfs_refc_next(&tmp) != agbno) 555 return 0; 556 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 557 return 0; 558 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 559 return 0; 560 /* We have a left extent; retrieve (or invent) the next right one */ 561 *left = tmp; 562 563 error = xfs_btree_increment(cur, 0, &found_rec); 564 if (error) 565 goto out_error; 566 if (found_rec) { 567 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 568 if (error) 569 goto out_error; 570 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 571 out_error); 572 573 /* if tmp starts at the end of our range, just use that */ 574 if (tmp.rc_startblock == agbno) 575 *cleft = tmp; 576 else { 577 /* 578 * There's a gap in the refcntbt at the start of the 579 * range we're interested in (refcount == 1) so 580 * synthesize the implied extent and pass it back. 581 * We assume here that the agbno/aglen range was 582 * passed in from a data fork extent mapping and 583 * therefore is allocated to exactly one owner. 584 */ 585 cleft->rc_startblock = agbno; 586 cleft->rc_blockcount = min(aglen, 587 tmp.rc_startblock - agbno); 588 cleft->rc_refcount = 1; 589 } 590 } else { 591 /* 592 * No extents, so pretend that there's one covering the whole 593 * range. 594 */ 595 cleft->rc_startblock = agbno; 596 cleft->rc_blockcount = aglen; 597 cleft->rc_refcount = 1; 598 } 599 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, 600 left, cleft, agbno); 601 return error; 602 603 out_error: 604 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 605 cur->bc_private.a.agno, error, _RET_IP_); 606 return error; 607 } 608 609 /* 610 * Find the right extent and the one before it (cright). This function 611 * assumes that we've already split any extents crossing agbno + aglen. 612 */ 613 STATIC int 614 xfs_refcount_find_right_extents( 615 struct xfs_btree_cur *cur, 616 struct xfs_refcount_irec *right, 617 struct xfs_refcount_irec *cright, 618 xfs_agblock_t agbno, 619 xfs_extlen_t aglen, 620 int flags) 621 { 622 struct xfs_refcount_irec tmp; 623 int error; 624 int found_rec; 625 626 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 627 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 628 if (error) 629 goto out_error; 630 if (!found_rec) 631 return 0; 632 633 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 634 if (error) 635 goto out_error; 636 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 637 638 if (tmp.rc_startblock != agbno + aglen) 639 return 0; 640 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 641 return 0; 642 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 643 return 0; 644 /* We have a right extent; retrieve (or invent) the next left one */ 645 *right = tmp; 646 647 error = xfs_btree_decrement(cur, 0, &found_rec); 648 if (error) 649 goto out_error; 650 if (found_rec) { 651 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 652 if (error) 653 goto out_error; 654 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 655 out_error); 656 657 /* if tmp ends at the end of our range, just use that */ 658 if (xfs_refc_next(&tmp) == agbno + aglen) 659 *cright = tmp; 660 else { 661 /* 662 * There's a gap in the refcntbt at the end of the 663 * range we're interested in (refcount == 1) so 664 * create the implied extent and pass it back. 665 * We assume here that the agbno/aglen range was 666 * passed in from a data fork extent mapping and 667 * therefore is allocated to exactly one owner. 668 */ 669 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 670 cright->rc_blockcount = right->rc_startblock - 671 cright->rc_startblock; 672 cright->rc_refcount = 1; 673 } 674 } else { 675 /* 676 * No extents, so pretend that there's one covering the whole 677 * range. 678 */ 679 cright->rc_startblock = agbno; 680 cright->rc_blockcount = aglen; 681 cright->rc_refcount = 1; 682 } 683 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, 684 cright, right, agbno + aglen); 685 return error; 686 687 out_error: 688 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 689 cur->bc_private.a.agno, error, _RET_IP_); 690 return error; 691 } 692 693 /* Is this extent valid? */ 694 static inline bool 695 xfs_refc_valid( 696 struct xfs_refcount_irec *rc) 697 { 698 return rc->rc_startblock != NULLAGBLOCK; 699 } 700 701 /* 702 * Try to merge with any extents on the boundaries of the adjustment range. 703 */ 704 STATIC int 705 xfs_refcount_merge_extents( 706 struct xfs_btree_cur *cur, 707 xfs_agblock_t *agbno, 708 xfs_extlen_t *aglen, 709 enum xfs_refc_adjust_op adjust, 710 int flags, 711 bool *shape_changed) 712 { 713 struct xfs_refcount_irec left = {0}, cleft = {0}; 714 struct xfs_refcount_irec cright = {0}, right = {0}; 715 int error; 716 unsigned long long ulen; 717 bool cequal; 718 719 *shape_changed = false; 720 /* 721 * Find the extent just below agbno [left], just above agbno [cleft], 722 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 723 * [right]. 724 */ 725 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 726 *aglen, flags); 727 if (error) 728 return error; 729 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 730 *aglen, flags); 731 if (error) 732 return error; 733 734 /* No left or right extent to merge; exit. */ 735 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 736 return 0; 737 738 cequal = (cleft.rc_startblock == cright.rc_startblock) && 739 (cleft.rc_blockcount == cright.rc_blockcount); 740 741 /* Try to merge left, cleft, and right. cleft must == cright. */ 742 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 743 right.rc_blockcount; 744 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 745 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 746 left.rc_refcount == cleft.rc_refcount + adjust && 747 right.rc_refcount == cleft.rc_refcount + adjust && 748 ulen < MAXREFCEXTLEN) { 749 *shape_changed = true; 750 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 751 &right, ulen, agbno, aglen); 752 } 753 754 /* Try to merge left and cleft. */ 755 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 756 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 757 left.rc_refcount == cleft.rc_refcount + adjust && 758 ulen < MAXREFCEXTLEN) { 759 *shape_changed = true; 760 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 761 agbno, aglen); 762 if (error) 763 return error; 764 765 /* 766 * If we just merged left + cleft and cleft == cright, 767 * we no longer have a cright to merge with right. We're done. 768 */ 769 if (cequal) 770 return 0; 771 } 772 773 /* Try to merge cright and right. */ 774 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 775 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 776 right.rc_refcount == cright.rc_refcount + adjust && 777 ulen < MAXREFCEXTLEN) { 778 *shape_changed = true; 779 return xfs_refcount_merge_right_extent(cur, &right, &cright, 780 agbno, aglen); 781 } 782 783 return error; 784 } 785 786 /* 787 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 788 * true incorrectly is a shutdown FS; the penalty for guessing false 789 * incorrectly is more transaction rolls than might be necessary. 790 * Be conservative here. 791 */ 792 static bool 793 xfs_refcount_still_have_space( 794 struct xfs_btree_cur *cur) 795 { 796 unsigned long overhead; 797 798 overhead = cur->bc_private.a.priv.refc.shape_changes * 799 xfs_allocfree_log_count(cur->bc_mp, 1); 800 overhead *= cur->bc_mp->m_sb.sb_blocksize; 801 802 /* 803 * Only allow 2 refcount extent updates per transaction if the 804 * refcount continue update "error" has been injected. 805 */ 806 if (cur->bc_private.a.priv.refc.nr_ops > 2 && 807 XFS_TEST_ERROR(false, cur->bc_mp, 808 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 809 return false; 810 811 if (cur->bc_private.a.priv.refc.nr_ops == 0) 812 return true; 813 else if (overhead > cur->bc_tp->t_log_res) 814 return false; 815 return cur->bc_tp->t_log_res - overhead > 816 cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 817 } 818 819 /* 820 * Adjust the refcounts of middle extents. At this point we should have 821 * split extents that crossed the adjustment range; merged with adjacent 822 * extents; and updated agbno/aglen to reflect the merges. Therefore, 823 * all we have to do is update the extents inside [agbno, agbno + aglen]. 824 */ 825 STATIC int 826 xfs_refcount_adjust_extents( 827 struct xfs_btree_cur *cur, 828 xfs_agblock_t *agbno, 829 xfs_extlen_t *aglen, 830 enum xfs_refc_adjust_op adj, 831 struct xfs_defer_ops *dfops, 832 struct xfs_owner_info *oinfo) 833 { 834 struct xfs_refcount_irec ext, tmp; 835 int error; 836 int found_rec, found_tmp; 837 xfs_fsblock_t fsbno; 838 839 /* Merging did all the work already. */ 840 if (*aglen == 0) 841 return 0; 842 843 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 844 if (error) 845 goto out_error; 846 847 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 848 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 849 if (error) 850 goto out_error; 851 if (!found_rec) { 852 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 853 ext.rc_blockcount = 0; 854 ext.rc_refcount = 0; 855 } 856 857 /* 858 * Deal with a hole in the refcount tree; if a file maps to 859 * these blocks and there's no refcountbt record, pretend that 860 * there is one with refcount == 1. 861 */ 862 if (ext.rc_startblock != *agbno) { 863 tmp.rc_startblock = *agbno; 864 tmp.rc_blockcount = min(*aglen, 865 ext.rc_startblock - *agbno); 866 tmp.rc_refcount = 1 + adj; 867 trace_xfs_refcount_modify_extent(cur->bc_mp, 868 cur->bc_private.a.agno, &tmp); 869 870 /* 871 * Either cover the hole (increment) or 872 * delete the range (decrement). 873 */ 874 if (tmp.rc_refcount) { 875 error = xfs_refcount_insert(cur, &tmp, 876 &found_tmp); 877 if (error) 878 goto out_error; 879 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 880 found_tmp == 1, out_error); 881 cur->bc_private.a.priv.refc.nr_ops++; 882 } else { 883 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 884 cur->bc_private.a.agno, 885 tmp.rc_startblock); 886 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 887 tmp.rc_blockcount, oinfo); 888 } 889 890 (*agbno) += tmp.rc_blockcount; 891 (*aglen) -= tmp.rc_blockcount; 892 893 error = xfs_refcount_lookup_ge(cur, *agbno, 894 &found_rec); 895 if (error) 896 goto out_error; 897 } 898 899 /* Stop if there's nothing left to modify */ 900 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 901 break; 902 903 /* 904 * Adjust the reference count and either update the tree 905 * (incr) or free the blocks (decr). 906 */ 907 if (ext.rc_refcount == MAXREFCOUNT) 908 goto skip; 909 ext.rc_refcount += adj; 910 trace_xfs_refcount_modify_extent(cur->bc_mp, 911 cur->bc_private.a.agno, &ext); 912 if (ext.rc_refcount > 1) { 913 error = xfs_refcount_update(cur, &ext); 914 if (error) 915 goto out_error; 916 cur->bc_private.a.priv.refc.nr_ops++; 917 } else if (ext.rc_refcount == 1) { 918 error = xfs_refcount_delete(cur, &found_rec); 919 if (error) 920 goto out_error; 921 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 922 found_rec == 1, out_error); 923 cur->bc_private.a.priv.refc.nr_ops++; 924 goto advloop; 925 } else { 926 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 927 cur->bc_private.a.agno, 928 ext.rc_startblock); 929 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 930 ext.rc_blockcount, oinfo); 931 } 932 933 skip: 934 error = xfs_btree_increment(cur, 0, &found_rec); 935 if (error) 936 goto out_error; 937 938 advloop: 939 (*agbno) += ext.rc_blockcount; 940 (*aglen) -= ext.rc_blockcount; 941 } 942 943 return error; 944 out_error: 945 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 946 cur->bc_private.a.agno, error, _RET_IP_); 947 return error; 948 } 949 950 /* Adjust the reference count of a range of AG blocks. */ 951 STATIC int 952 xfs_refcount_adjust( 953 struct xfs_btree_cur *cur, 954 xfs_agblock_t agbno, 955 xfs_extlen_t aglen, 956 xfs_agblock_t *new_agbno, 957 xfs_extlen_t *new_aglen, 958 enum xfs_refc_adjust_op adj, 959 struct xfs_defer_ops *dfops, 960 struct xfs_owner_info *oinfo) 961 { 962 bool shape_changed; 963 int shape_changes = 0; 964 int error; 965 966 *new_agbno = agbno; 967 *new_aglen = aglen; 968 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 969 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, 970 agbno, aglen); 971 else 972 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, 973 agbno, aglen); 974 975 /* 976 * Ensure that no rcextents cross the boundary of the adjustment range. 977 */ 978 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 979 if (error) 980 goto out_error; 981 if (shape_changed) 982 shape_changes++; 983 984 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 985 if (error) 986 goto out_error; 987 if (shape_changed) 988 shape_changes++; 989 990 /* 991 * Try to merge with the left or right extents of the range. 992 */ 993 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 994 XFS_FIND_RCEXT_SHARED, &shape_changed); 995 if (error) 996 goto out_error; 997 if (shape_changed) 998 shape_changes++; 999 if (shape_changes) 1000 cur->bc_private.a.priv.refc.shape_changes++; 1001 1002 /* Now that we've taken care of the ends, adjust the middle extents */ 1003 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1004 adj, dfops, oinfo); 1005 if (error) 1006 goto out_error; 1007 1008 return 0; 1009 1010 out_error: 1011 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, 1012 error, _RET_IP_); 1013 return error; 1014 } 1015 1016 /* Clean up after calling xfs_refcount_finish_one. */ 1017 void 1018 xfs_refcount_finish_one_cleanup( 1019 struct xfs_trans *tp, 1020 struct xfs_btree_cur *rcur, 1021 int error) 1022 { 1023 struct xfs_buf *agbp; 1024 1025 if (rcur == NULL) 1026 return; 1027 agbp = rcur->bc_private.a.agbp; 1028 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 1029 if (error) 1030 xfs_trans_brelse(tp, agbp); 1031 } 1032 1033 /* 1034 * Process one of the deferred refcount operations. We pass back the 1035 * btree cursor to maintain our lock on the btree between calls. 1036 * This saves time and eliminates a buffer deadlock between the 1037 * superblock and the AGF because we'll always grab them in the same 1038 * order. 1039 */ 1040 int 1041 xfs_refcount_finish_one( 1042 struct xfs_trans *tp, 1043 struct xfs_defer_ops *dfops, 1044 enum xfs_refcount_intent_type type, 1045 xfs_fsblock_t startblock, 1046 xfs_extlen_t blockcount, 1047 xfs_fsblock_t *new_fsb, 1048 xfs_extlen_t *new_len, 1049 struct xfs_btree_cur **pcur) 1050 { 1051 struct xfs_mount *mp = tp->t_mountp; 1052 struct xfs_btree_cur *rcur; 1053 struct xfs_buf *agbp = NULL; 1054 int error = 0; 1055 xfs_agnumber_t agno; 1056 xfs_agblock_t bno; 1057 xfs_agblock_t new_agbno; 1058 unsigned long nr_ops = 0; 1059 int shape_changes = 0; 1060 1061 agno = XFS_FSB_TO_AGNO(mp, startblock); 1062 ASSERT(agno != NULLAGNUMBER); 1063 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1064 1065 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1066 type, XFS_FSB_TO_AGBNO(mp, startblock), 1067 blockcount); 1068 1069 if (XFS_TEST_ERROR(false, mp, 1070 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1071 return -EIO; 1072 1073 /* 1074 * If we haven't gotten a cursor or the cursor AG doesn't match 1075 * the startblock, get one now. 1076 */ 1077 rcur = *pcur; 1078 if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1079 nr_ops = rcur->bc_private.a.priv.refc.nr_ops; 1080 shape_changes = rcur->bc_private.a.priv.refc.shape_changes; 1081 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1082 rcur = NULL; 1083 *pcur = NULL; 1084 } 1085 if (rcur == NULL) { 1086 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1087 XFS_ALLOC_FLAG_FREEING, &agbp); 1088 if (error) 1089 return error; 1090 if (!agbp) 1091 return -EFSCORRUPTED; 1092 1093 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops); 1094 if (!rcur) { 1095 error = -ENOMEM; 1096 goto out_cur; 1097 } 1098 rcur->bc_private.a.priv.refc.nr_ops = nr_ops; 1099 rcur->bc_private.a.priv.refc.shape_changes = shape_changes; 1100 } 1101 *pcur = rcur; 1102 1103 switch (type) { 1104 case XFS_REFCOUNT_INCREASE: 1105 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1106 new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL); 1107 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1108 break; 1109 case XFS_REFCOUNT_DECREASE: 1110 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1111 new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL); 1112 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1113 break; 1114 case XFS_REFCOUNT_ALLOC_COW: 1115 *new_fsb = startblock + blockcount; 1116 *new_len = 0; 1117 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops); 1118 break; 1119 case XFS_REFCOUNT_FREE_COW: 1120 *new_fsb = startblock + blockcount; 1121 *new_len = 0; 1122 error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops); 1123 break; 1124 default: 1125 ASSERT(0); 1126 error = -EFSCORRUPTED; 1127 } 1128 if (!error && *new_len > 0) 1129 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1130 bno, blockcount, new_agbno, *new_len); 1131 return error; 1132 1133 out_cur: 1134 xfs_trans_brelse(tp, agbp); 1135 1136 return error; 1137 } 1138 1139 /* 1140 * Record a refcount intent for later processing. 1141 */ 1142 static int 1143 __xfs_refcount_add( 1144 struct xfs_mount *mp, 1145 struct xfs_defer_ops *dfops, 1146 enum xfs_refcount_intent_type type, 1147 xfs_fsblock_t startblock, 1148 xfs_extlen_t blockcount) 1149 { 1150 struct xfs_refcount_intent *ri; 1151 1152 trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock), 1153 type, XFS_FSB_TO_AGBNO(mp, startblock), 1154 blockcount); 1155 1156 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1157 KM_SLEEP | KM_NOFS); 1158 INIT_LIST_HEAD(&ri->ri_list); 1159 ri->ri_type = type; 1160 ri->ri_startblock = startblock; 1161 ri->ri_blockcount = blockcount; 1162 1163 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1164 return 0; 1165 } 1166 1167 /* 1168 * Increase the reference count of the blocks backing a file's extent. 1169 */ 1170 int 1171 xfs_refcount_increase_extent( 1172 struct xfs_mount *mp, 1173 struct xfs_defer_ops *dfops, 1174 struct xfs_bmbt_irec *PREV) 1175 { 1176 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1177 return 0; 1178 1179 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE, 1180 PREV->br_startblock, PREV->br_blockcount); 1181 } 1182 1183 /* 1184 * Decrease the reference count of the blocks backing a file's extent. 1185 */ 1186 int 1187 xfs_refcount_decrease_extent( 1188 struct xfs_mount *mp, 1189 struct xfs_defer_ops *dfops, 1190 struct xfs_bmbt_irec *PREV) 1191 { 1192 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1193 return 0; 1194 1195 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE, 1196 PREV->br_startblock, PREV->br_blockcount); 1197 } 1198 1199 /* 1200 * Given an AG extent, find the lowest-numbered run of shared blocks 1201 * within that range and return the range in fbno/flen. If 1202 * find_end_of_shared is set, return the longest contiguous extent of 1203 * shared blocks; if not, just return the first extent we find. If no 1204 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1205 * and 0, respectively. 1206 */ 1207 int 1208 xfs_refcount_find_shared( 1209 struct xfs_btree_cur *cur, 1210 xfs_agblock_t agbno, 1211 xfs_extlen_t aglen, 1212 xfs_agblock_t *fbno, 1213 xfs_extlen_t *flen, 1214 bool find_end_of_shared) 1215 { 1216 struct xfs_refcount_irec tmp; 1217 int i; 1218 int have; 1219 int error; 1220 1221 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, 1222 agbno, aglen); 1223 1224 /* By default, skip the whole range */ 1225 *fbno = NULLAGBLOCK; 1226 *flen = 0; 1227 1228 /* Try to find a refcount extent that crosses the start */ 1229 error = xfs_refcount_lookup_le(cur, agbno, &have); 1230 if (error) 1231 goto out_error; 1232 if (!have) { 1233 /* No left extent, look at the next one */ 1234 error = xfs_btree_increment(cur, 0, &have); 1235 if (error) 1236 goto out_error; 1237 if (!have) 1238 goto done; 1239 } 1240 error = xfs_refcount_get_rec(cur, &tmp, &i); 1241 if (error) 1242 goto out_error; 1243 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1244 1245 /* If the extent ends before the start, look at the next one */ 1246 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1247 error = xfs_btree_increment(cur, 0, &have); 1248 if (error) 1249 goto out_error; 1250 if (!have) 1251 goto done; 1252 error = xfs_refcount_get_rec(cur, &tmp, &i); 1253 if (error) 1254 goto out_error; 1255 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1256 } 1257 1258 /* If the extent starts after the range we want, bail out */ 1259 if (tmp.rc_startblock >= agbno + aglen) 1260 goto done; 1261 1262 /* We found the start of a shared extent! */ 1263 if (tmp.rc_startblock < agbno) { 1264 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1265 tmp.rc_startblock = agbno; 1266 } 1267 1268 *fbno = tmp.rc_startblock; 1269 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1270 if (!find_end_of_shared) 1271 goto done; 1272 1273 /* Otherwise, find the end of this shared extent */ 1274 while (*fbno + *flen < agbno + aglen) { 1275 error = xfs_btree_increment(cur, 0, &have); 1276 if (error) 1277 goto out_error; 1278 if (!have) 1279 break; 1280 error = xfs_refcount_get_rec(cur, &tmp, &i); 1281 if (error) 1282 goto out_error; 1283 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1284 if (tmp.rc_startblock >= agbno + aglen || 1285 tmp.rc_startblock != *fbno + *flen) 1286 break; 1287 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1288 } 1289 1290 done: 1291 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1292 cur->bc_private.a.agno, *fbno, *flen); 1293 1294 out_error: 1295 if (error) 1296 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1297 cur->bc_private.a.agno, error, _RET_IP_); 1298 return error; 1299 } 1300 1301 /* 1302 * Recovering CoW Blocks After a Crash 1303 * 1304 * Due to the way that the copy on write mechanism works, there's a window of 1305 * opportunity in which we can lose track of allocated blocks during a crash. 1306 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1307 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1308 * no longer in the free space btree but are not otherwise recorded anywhere 1309 * until the write completes and the blocks are mapped into the file. A crash 1310 * in between allocation and remapping results in the replacement blocks being 1311 * lost. This situation is exacerbated by the CoW extent size hint because 1312 * allocations can hang around for long time. 1313 * 1314 * However, there is a place where we can record these allocations before they 1315 * become mappings -- the reference count btree. The btree does not record 1316 * extents with refcount == 1, so we can record allocations with a refcount of 1317 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1318 * no conflict with shared block records. These mappings should be created 1319 * when we allocate blocks to the CoW fork and deleted when they're removed 1320 * from the CoW fork. 1321 * 1322 * Minor nit: records for in-progress CoW allocations and records for shared 1323 * extents must never be merged, to preserve the property that (except for CoW 1324 * allocations) there are no refcount btree entries with refcount == 1. The 1325 * only time this could potentially happen is when unsharing a block that's 1326 * adjacent to CoW allocations, so we must be careful to avoid this. 1327 * 1328 * At mount time we recover lost CoW allocations by searching the refcount 1329 * btree for these refcount == 1 mappings. These represent CoW allocations 1330 * that were in progress at the time the filesystem went down, so we can free 1331 * them to get the space back. 1332 * 1333 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1334 * several reasons -- first, EFIs pin the tail of the log and would have to be 1335 * periodically relogged to avoid filling up the log. Second, CoW completions 1336 * will have to file an EFD and create new EFIs for whatever remains in the 1337 * CoW fork; this partially takes care of (1) but extent-size reservations 1338 * will have to periodically relog even if there's no writeout in progress. 1339 * This can happen if the CoW extent size hint is set, which you really want. 1340 * Third, EFIs cannot currently be automatically relogged into newer 1341 * transactions to advance the log tail. Fourth, stuffing the log full of 1342 * EFIs places an upper bound on the number of CoW allocations that can be 1343 * held filesystem-wide at any given time. Recording them in the refcount 1344 * btree doesn't require us to maintain any state in memory and doesn't pin 1345 * the log. 1346 */ 1347 /* 1348 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1349 * in that they're not referenced anywhere else in the filesystem, so we 1350 * stash them in the refcount btree with a refcount of 1 until either file 1351 * remapping (or CoW cancellation) happens. 1352 */ 1353 STATIC int 1354 xfs_refcount_adjust_cow_extents( 1355 struct xfs_btree_cur *cur, 1356 xfs_agblock_t agbno, 1357 xfs_extlen_t aglen, 1358 enum xfs_refc_adjust_op adj, 1359 struct xfs_defer_ops *dfops, 1360 struct xfs_owner_info *oinfo) 1361 { 1362 struct xfs_refcount_irec ext, tmp; 1363 int error; 1364 int found_rec, found_tmp; 1365 1366 if (aglen == 0) 1367 return 0; 1368 1369 /* Find any overlapping refcount records */ 1370 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1371 if (error) 1372 goto out_error; 1373 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1374 if (error) 1375 goto out_error; 1376 if (!found_rec) { 1377 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1378 XFS_REFC_COW_START; 1379 ext.rc_blockcount = 0; 1380 ext.rc_refcount = 0; 1381 } 1382 1383 switch (adj) { 1384 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1385 /* Adding a CoW reservation, there should be nothing here. */ 1386 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1387 ext.rc_startblock >= agbno + aglen, out_error); 1388 1389 tmp.rc_startblock = agbno; 1390 tmp.rc_blockcount = aglen; 1391 tmp.rc_refcount = 1; 1392 trace_xfs_refcount_modify_extent(cur->bc_mp, 1393 cur->bc_private.a.agno, &tmp); 1394 1395 error = xfs_refcount_insert(cur, &tmp, 1396 &found_tmp); 1397 if (error) 1398 goto out_error; 1399 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1400 found_tmp == 1, out_error); 1401 break; 1402 case XFS_REFCOUNT_ADJUST_COW_FREE: 1403 /* Removing a CoW reservation, there should be one extent. */ 1404 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1405 ext.rc_startblock == agbno, out_error); 1406 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1407 ext.rc_blockcount == aglen, out_error); 1408 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1409 ext.rc_refcount == 1, out_error); 1410 1411 ext.rc_refcount = 0; 1412 trace_xfs_refcount_modify_extent(cur->bc_mp, 1413 cur->bc_private.a.agno, &ext); 1414 error = xfs_refcount_delete(cur, &found_rec); 1415 if (error) 1416 goto out_error; 1417 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1418 found_rec == 1, out_error); 1419 break; 1420 default: 1421 ASSERT(0); 1422 } 1423 1424 return error; 1425 out_error: 1426 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1427 cur->bc_private.a.agno, error, _RET_IP_); 1428 return error; 1429 } 1430 1431 /* 1432 * Add or remove refcount btree entries for CoW reservations. 1433 */ 1434 STATIC int 1435 xfs_refcount_adjust_cow( 1436 struct xfs_btree_cur *cur, 1437 xfs_agblock_t agbno, 1438 xfs_extlen_t aglen, 1439 enum xfs_refc_adjust_op adj, 1440 struct xfs_defer_ops *dfops) 1441 { 1442 bool shape_changed; 1443 int error; 1444 1445 agbno += XFS_REFC_COW_START; 1446 1447 /* 1448 * Ensure that no rcextents cross the boundary of the adjustment range. 1449 */ 1450 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1451 if (error) 1452 goto out_error; 1453 1454 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1455 if (error) 1456 goto out_error; 1457 1458 /* 1459 * Try to merge with the left or right extents of the range. 1460 */ 1461 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1462 XFS_FIND_RCEXT_COW, &shape_changed); 1463 if (error) 1464 goto out_error; 1465 1466 /* Now that we've taken care of the ends, adjust the middle extents */ 1467 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj, 1468 dfops, NULL); 1469 if (error) 1470 goto out_error; 1471 1472 return 0; 1473 1474 out_error: 1475 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, 1476 error, _RET_IP_); 1477 return error; 1478 } 1479 1480 /* 1481 * Record a CoW allocation in the refcount btree. 1482 */ 1483 STATIC int 1484 __xfs_refcount_cow_alloc( 1485 struct xfs_btree_cur *rcur, 1486 xfs_agblock_t agbno, 1487 xfs_extlen_t aglen, 1488 struct xfs_defer_ops *dfops) 1489 { 1490 int error; 1491 1492 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1493 agbno, aglen); 1494 1495 /* Add refcount btree reservation */ 1496 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1497 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1498 if (error) 1499 return error; 1500 1501 /* Add rmap entry */ 1502 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { 1503 error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops, 1504 rcur->bc_private.a.agno, 1505 agbno, aglen, XFS_RMAP_OWN_COW); 1506 if (error) 1507 return error; 1508 } 1509 1510 return error; 1511 } 1512 1513 /* 1514 * Remove a CoW allocation from the refcount btree. 1515 */ 1516 STATIC int 1517 __xfs_refcount_cow_free( 1518 struct xfs_btree_cur *rcur, 1519 xfs_agblock_t agbno, 1520 xfs_extlen_t aglen, 1521 struct xfs_defer_ops *dfops) 1522 { 1523 int error; 1524 1525 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1526 agbno, aglen); 1527 1528 /* Remove refcount btree reservation */ 1529 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1530 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1531 if (error) 1532 return error; 1533 1534 /* Remove rmap entry */ 1535 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) { 1536 error = xfs_rmap_free_extent(rcur->bc_mp, dfops, 1537 rcur->bc_private.a.agno, 1538 agbno, aglen, XFS_RMAP_OWN_COW); 1539 if (error) 1540 return error; 1541 } 1542 1543 return error; 1544 } 1545 1546 /* Record a CoW staging extent in the refcount btree. */ 1547 int 1548 xfs_refcount_alloc_cow_extent( 1549 struct xfs_mount *mp, 1550 struct xfs_defer_ops *dfops, 1551 xfs_fsblock_t fsb, 1552 xfs_extlen_t len) 1553 { 1554 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1555 return 0; 1556 1557 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1558 fsb, len); 1559 } 1560 1561 /* Forget a CoW staging event in the refcount btree. */ 1562 int 1563 xfs_refcount_free_cow_extent( 1564 struct xfs_mount *mp, 1565 struct xfs_defer_ops *dfops, 1566 xfs_fsblock_t fsb, 1567 xfs_extlen_t len) 1568 { 1569 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1570 return 0; 1571 1572 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1573 fsb, len); 1574 } 1575 1576 struct xfs_refcount_recovery { 1577 struct list_head rr_list; 1578 struct xfs_refcount_irec rr_rrec; 1579 }; 1580 1581 /* Stuff an extent on the recovery list. */ 1582 STATIC int 1583 xfs_refcount_recover_extent( 1584 struct xfs_btree_cur *cur, 1585 union xfs_btree_rec *rec, 1586 void *priv) 1587 { 1588 struct list_head *debris = priv; 1589 struct xfs_refcount_recovery *rr; 1590 1591 if (be32_to_cpu(rec->refc.rc_refcount) != 1) 1592 return -EFSCORRUPTED; 1593 1594 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); 1595 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1596 list_add_tail(&rr->rr_list, debris); 1597 1598 return 0; 1599 } 1600 1601 /* Find and remove leftover CoW reservations. */ 1602 int 1603 xfs_refcount_recover_cow_leftovers( 1604 struct xfs_mount *mp, 1605 xfs_agnumber_t agno) 1606 { 1607 struct xfs_trans *tp; 1608 struct xfs_btree_cur *cur; 1609 struct xfs_buf *agbp; 1610 struct xfs_refcount_recovery *rr, *n; 1611 struct list_head debris; 1612 union xfs_btree_irec low; 1613 union xfs_btree_irec high; 1614 struct xfs_defer_ops dfops; 1615 xfs_fsblock_t fsb; 1616 xfs_agblock_t agbno; 1617 int error; 1618 1619 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1620 return -EOPNOTSUPP; 1621 1622 INIT_LIST_HEAD(&debris); 1623 1624 /* 1625 * In this first part, we use an empty transaction to gather up 1626 * all the leftover CoW extents so that we can subsequently 1627 * delete them. The empty transaction is used to avoid 1628 * a buffer lock deadlock if there happens to be a loop in the 1629 * refcountbt because we're allowed to re-grab a buffer that is 1630 * already attached to our transaction. When we're done 1631 * recording the CoW debris we cancel the (empty) transaction 1632 * and everything goes away cleanly. 1633 */ 1634 error = xfs_trans_alloc_empty(mp, &tp); 1635 if (error) 1636 return error; 1637 1638 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1639 if (error) 1640 goto out_trans; 1641 if (!agbp) { 1642 error = -ENOMEM; 1643 goto out_trans; 1644 } 1645 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); 1646 1647 /* Find all the leftover CoW staging extents. */ 1648 memset(&low, 0, sizeof(low)); 1649 memset(&high, 0, sizeof(high)); 1650 low.rc.rc_startblock = XFS_REFC_COW_START; 1651 high.rc.rc_startblock = -1U; 1652 error = xfs_btree_query_range(cur, &low, &high, 1653 xfs_refcount_recover_extent, &debris); 1654 if (error) 1655 goto out_cursor; 1656 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1657 xfs_trans_brelse(tp, agbp); 1658 xfs_trans_cancel(tp); 1659 1660 /* Now iterate the list to free the leftovers */ 1661 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1662 /* Set up transaction. */ 1663 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1664 if (error) 1665 goto out_free; 1666 1667 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1668 1669 /* Free the orphan record */ 1670 xfs_defer_init(&dfops, &fsb); 1671 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1672 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1673 error = xfs_refcount_free_cow_extent(mp, &dfops, fsb, 1674 rr->rr_rrec.rc_blockcount); 1675 if (error) 1676 goto out_defer; 1677 1678 /* Free the block. */ 1679 xfs_bmap_add_free(mp, &dfops, fsb, 1680 rr->rr_rrec.rc_blockcount, NULL); 1681 1682 error = xfs_defer_finish(&tp, &dfops); 1683 if (error) 1684 goto out_defer; 1685 1686 error = xfs_trans_commit(tp); 1687 if (error) 1688 goto out_free; 1689 1690 list_del(&rr->rr_list); 1691 kmem_free(rr); 1692 } 1693 1694 return error; 1695 out_defer: 1696 xfs_defer_cancel(&dfops); 1697 out_trans: 1698 xfs_trans_cancel(tp); 1699 out_free: 1700 /* Free the leftover list */ 1701 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1702 list_del(&rr->rr_list); 1703 kmem_free(rr); 1704 } 1705 return error; 1706 1707 out_cursor: 1708 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1709 xfs_trans_brelse(tp, agbp); 1710 goto out_trans; 1711 } 1712