1 /* 2 * Copyright (C) 2016 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_log_format.h" 25 #include "xfs_trans_resv.h" 26 #include "xfs_sb.h" 27 #include "xfs_mount.h" 28 #include "xfs_defer.h" 29 #include "xfs_btree.h" 30 #include "xfs_bmap.h" 31 #include "xfs_refcount_btree.h" 32 #include "xfs_alloc.h" 33 #include "xfs_errortag.h" 34 #include "xfs_error.h" 35 #include "xfs_trace.h" 36 #include "xfs_cksum.h" 37 #include "xfs_trans.h" 38 #include "xfs_bit.h" 39 #include "xfs_refcount.h" 40 #include "xfs_rmap.h" 41 42 /* Allowable refcount adjustment amounts. */ 43 enum xfs_refc_adjust_op { 44 XFS_REFCOUNT_ADJUST_INCREASE = 1, 45 XFS_REFCOUNT_ADJUST_DECREASE = -1, 46 XFS_REFCOUNT_ADJUST_COW_ALLOC = 0, 47 XFS_REFCOUNT_ADJUST_COW_FREE = -1, 48 }; 49 50 STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur, 51 xfs_agblock_t agbno, xfs_extlen_t aglen, 52 struct xfs_defer_ops *dfops); 53 STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur, 54 xfs_agblock_t agbno, xfs_extlen_t aglen, 55 struct xfs_defer_ops *dfops); 56 57 /* 58 * Look up the first record less than or equal to [bno, len] in the btree 59 * given by cur. 60 */ 61 int 62 xfs_refcount_lookup_le( 63 struct xfs_btree_cur *cur, 64 xfs_agblock_t bno, 65 int *stat) 66 { 67 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 68 XFS_LOOKUP_LE); 69 cur->bc_rec.rc.rc_startblock = bno; 70 cur->bc_rec.rc.rc_blockcount = 0; 71 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 72 } 73 74 /* 75 * Look up the first record greater than or equal to [bno, len] in the btree 76 * given by cur. 77 */ 78 int 79 xfs_refcount_lookup_ge( 80 struct xfs_btree_cur *cur, 81 xfs_agblock_t bno, 82 int *stat) 83 { 84 trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_private.a.agno, bno, 85 XFS_LOOKUP_GE); 86 cur->bc_rec.rc.rc_startblock = bno; 87 cur->bc_rec.rc.rc_blockcount = 0; 88 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 89 } 90 91 /* Convert on-disk record to in-core format. */ 92 static inline void 93 xfs_refcount_btrec_to_irec( 94 union xfs_btree_rec *rec, 95 struct xfs_refcount_irec *irec) 96 { 97 irec->rc_startblock = be32_to_cpu(rec->refc.rc_startblock); 98 irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount); 99 irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount); 100 } 101 102 /* 103 * Get the data from the pointed-to record. 104 */ 105 int 106 xfs_refcount_get_rec( 107 struct xfs_btree_cur *cur, 108 struct xfs_refcount_irec *irec, 109 int *stat) 110 { 111 union xfs_btree_rec *rec; 112 int error; 113 114 error = xfs_btree_get_rec(cur, &rec, stat); 115 if (!error && *stat == 1) { 116 xfs_refcount_btrec_to_irec(rec, irec); 117 trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, 118 irec); 119 } 120 return error; 121 } 122 123 /* 124 * Update the record referred to by cur to the value given 125 * by [bno, len, refcount]. 126 * This either works (return 0) or gets an EFSCORRUPTED error. 127 */ 128 STATIC int 129 xfs_refcount_update( 130 struct xfs_btree_cur *cur, 131 struct xfs_refcount_irec *irec) 132 { 133 union xfs_btree_rec rec; 134 int error; 135 136 trace_xfs_refcount_update(cur->bc_mp, cur->bc_private.a.agno, irec); 137 rec.refc.rc_startblock = cpu_to_be32(irec->rc_startblock); 138 rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount); 139 rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount); 140 error = xfs_btree_update(cur, &rec); 141 if (error) 142 trace_xfs_refcount_update_error(cur->bc_mp, 143 cur->bc_private.a.agno, error, _RET_IP_); 144 return error; 145 } 146 147 /* 148 * Insert the record referred to by cur to the value given 149 * by [bno, len, refcount]. 150 * This either works (return 0) or gets an EFSCORRUPTED error. 151 */ 152 STATIC int 153 xfs_refcount_insert( 154 struct xfs_btree_cur *cur, 155 struct xfs_refcount_irec *irec, 156 int *i) 157 { 158 int error; 159 160 trace_xfs_refcount_insert(cur->bc_mp, cur->bc_private.a.agno, irec); 161 cur->bc_rec.rc.rc_startblock = irec->rc_startblock; 162 cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount; 163 cur->bc_rec.rc.rc_refcount = irec->rc_refcount; 164 error = xfs_btree_insert(cur, i); 165 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 166 out_error: 167 if (error) 168 trace_xfs_refcount_insert_error(cur->bc_mp, 169 cur->bc_private.a.agno, error, _RET_IP_); 170 return error; 171 } 172 173 /* 174 * Remove the record referred to by cur, then set the pointer to the spot 175 * where the record could be re-inserted, in case we want to increment or 176 * decrement the cursor. 177 * This either works (return 0) or gets an EFSCORRUPTED error. 178 */ 179 STATIC int 180 xfs_refcount_delete( 181 struct xfs_btree_cur *cur, 182 int *i) 183 { 184 struct xfs_refcount_irec irec; 185 int found_rec; 186 int error; 187 188 error = xfs_refcount_get_rec(cur, &irec, &found_rec); 189 if (error) 190 goto out_error; 191 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 192 trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec); 193 error = xfs_btree_delete(cur, i); 194 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error); 195 if (error) 196 goto out_error; 197 error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec); 198 out_error: 199 if (error) 200 trace_xfs_refcount_delete_error(cur->bc_mp, 201 cur->bc_private.a.agno, error, _RET_IP_); 202 return error; 203 } 204 205 /* 206 * Adjusting the Reference Count 207 * 208 * As stated elsewhere, the reference count btree (refcbt) stores 209 * >1 reference counts for extents of physical blocks. In this 210 * operation, we're either raising or lowering the reference count of 211 * some subrange stored in the tree: 212 * 213 * <------ adjustment range ------> 214 * ----+ +---+-----+ +--+--------+--------- 215 * 2 | | 3 | 4 | |17| 55 | 10 216 * ----+ +---+-----+ +--+--------+--------- 217 * X axis is physical blocks number; 218 * reference counts are the numbers inside the rectangles 219 * 220 * The first thing we need to do is to ensure that there are no 221 * refcount extents crossing either boundary of the range to be 222 * adjusted. For any extent that does cross a boundary, split it into 223 * two extents so that we can increment the refcount of one of the 224 * pieces later: 225 * 226 * <------ adjustment range ------> 227 * ----+ +---+-----+ +--+--------+----+---- 228 * 2 | | 3 | 2 | |17| 55 | 10 | 10 229 * ----+ +---+-----+ +--+--------+----+---- 230 * 231 * For this next step, let's assume that all the physical blocks in 232 * the adjustment range are mapped to a file and are therefore in use 233 * at least once. Therefore, we can infer that any gap in the 234 * refcount tree within the adjustment range represents a physical 235 * extent with refcount == 1: 236 * 237 * <------ adjustment range ------> 238 * ----+---+---+-----+-+--+--------+----+---- 239 * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10 240 * ----+---+---+-----+-+--+--------+----+---- 241 * ^ 242 * 243 * For each extent that falls within the interval range, figure out 244 * which extent is to the left or the right of that extent. Now we 245 * have a left, current, and right extent. If the new reference count 246 * of the center extent enables us to merge left, center, and right 247 * into one record covering all three, do so. If the center extent is 248 * at the left end of the range, abuts the left extent, and its new 249 * reference count matches the left extent's record, then merge them. 250 * If the center extent is at the right end of the range, abuts the 251 * right extent, and the reference counts match, merge those. In the 252 * example, we can left merge (assuming an increment operation): 253 * 254 * <------ adjustment range ------> 255 * --------+---+-----+-+--+--------+----+---- 256 * 2 | 3 | 2 |1|17| 55 | 10 | 10 257 * --------+---+-----+-+--+--------+----+---- 258 * ^ 259 * 260 * For all other extents within the range, adjust the reference count 261 * or delete it if the refcount falls below 2. If we were 262 * incrementing, the end result looks like this: 263 * 264 * <------ adjustment range ------> 265 * --------+---+-----+-+--+--------+----+---- 266 * 2 | 4 | 3 |2|18| 56 | 11 | 10 267 * --------+---+-----+-+--+--------+----+---- 268 * 269 * The result of a decrement operation looks as such: 270 * 271 * <------ adjustment range ------> 272 * ----+ +---+ +--+--------+----+---- 273 * 2 | | 2 | |16| 54 | 9 | 10 274 * ----+ +---+ +--+--------+----+---- 275 * DDDD 111111DD 276 * 277 * The blocks marked "D" are freed; the blocks marked "1" are only 278 * referenced once and therefore the record is removed from the 279 * refcount btree. 280 */ 281 282 /* Next block after this extent. */ 283 static inline xfs_agblock_t 284 xfs_refc_next( 285 struct xfs_refcount_irec *rc) 286 { 287 return rc->rc_startblock + rc->rc_blockcount; 288 } 289 290 /* 291 * Split a refcount extent that crosses agbno. 292 */ 293 STATIC int 294 xfs_refcount_split_extent( 295 struct xfs_btree_cur *cur, 296 xfs_agblock_t agbno, 297 bool *shape_changed) 298 { 299 struct xfs_refcount_irec rcext, tmp; 300 int found_rec; 301 int error; 302 303 *shape_changed = false; 304 error = xfs_refcount_lookup_le(cur, agbno, &found_rec); 305 if (error) 306 goto out_error; 307 if (!found_rec) 308 return 0; 309 310 error = xfs_refcount_get_rec(cur, &rcext, &found_rec); 311 if (error) 312 goto out_error; 313 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 314 if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno) 315 return 0; 316 317 *shape_changed = true; 318 trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_private.a.agno, 319 &rcext, agbno); 320 321 /* Establish the right extent. */ 322 tmp = rcext; 323 tmp.rc_startblock = agbno; 324 tmp.rc_blockcount -= (agbno - rcext.rc_startblock); 325 error = xfs_refcount_update(cur, &tmp); 326 if (error) 327 goto out_error; 328 329 /* Insert the left extent. */ 330 tmp = rcext; 331 tmp.rc_blockcount = agbno - rcext.rc_startblock; 332 error = xfs_refcount_insert(cur, &tmp, &found_rec); 333 if (error) 334 goto out_error; 335 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 336 return error; 337 338 out_error: 339 trace_xfs_refcount_split_extent_error(cur->bc_mp, 340 cur->bc_private.a.agno, error, _RET_IP_); 341 return error; 342 } 343 344 /* 345 * Merge the left, center, and right extents. 346 */ 347 STATIC int 348 xfs_refcount_merge_center_extents( 349 struct xfs_btree_cur *cur, 350 struct xfs_refcount_irec *left, 351 struct xfs_refcount_irec *center, 352 struct xfs_refcount_irec *right, 353 unsigned long long extlen, 354 xfs_extlen_t *aglen) 355 { 356 int error; 357 int found_rec; 358 359 trace_xfs_refcount_merge_center_extents(cur->bc_mp, 360 cur->bc_private.a.agno, left, center, right); 361 362 /* 363 * Make sure the center and right extents are not in the btree. 364 * If the center extent was synthesized, the first delete call 365 * removes the right extent and we skip the second deletion. 366 * If center and right were in the btree, then the first delete 367 * call removes the center and the second one removes the right 368 * extent. 369 */ 370 error = xfs_refcount_lookup_ge(cur, center->rc_startblock, 371 &found_rec); 372 if (error) 373 goto out_error; 374 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 375 376 error = xfs_refcount_delete(cur, &found_rec); 377 if (error) 378 goto out_error; 379 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 380 381 if (center->rc_refcount > 1) { 382 error = xfs_refcount_delete(cur, &found_rec); 383 if (error) 384 goto out_error; 385 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 386 out_error); 387 } 388 389 /* Enlarge the left extent. */ 390 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 391 &found_rec); 392 if (error) 393 goto out_error; 394 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 395 396 left->rc_blockcount = extlen; 397 error = xfs_refcount_update(cur, left); 398 if (error) 399 goto out_error; 400 401 *aglen = 0; 402 return error; 403 404 out_error: 405 trace_xfs_refcount_merge_center_extents_error(cur->bc_mp, 406 cur->bc_private.a.agno, error, _RET_IP_); 407 return error; 408 } 409 410 /* 411 * Merge with the left extent. 412 */ 413 STATIC int 414 xfs_refcount_merge_left_extent( 415 struct xfs_btree_cur *cur, 416 struct xfs_refcount_irec *left, 417 struct xfs_refcount_irec *cleft, 418 xfs_agblock_t *agbno, 419 xfs_extlen_t *aglen) 420 { 421 int error; 422 int found_rec; 423 424 trace_xfs_refcount_merge_left_extent(cur->bc_mp, 425 cur->bc_private.a.agno, left, cleft); 426 427 /* If the extent at agbno (cleft) wasn't synthesized, remove it. */ 428 if (cleft->rc_refcount > 1) { 429 error = xfs_refcount_lookup_le(cur, cleft->rc_startblock, 430 &found_rec); 431 if (error) 432 goto out_error; 433 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 434 out_error); 435 436 error = xfs_refcount_delete(cur, &found_rec); 437 if (error) 438 goto out_error; 439 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 440 out_error); 441 } 442 443 /* Enlarge the left extent. */ 444 error = xfs_refcount_lookup_le(cur, left->rc_startblock, 445 &found_rec); 446 if (error) 447 goto out_error; 448 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 449 450 left->rc_blockcount += cleft->rc_blockcount; 451 error = xfs_refcount_update(cur, left); 452 if (error) 453 goto out_error; 454 455 *agbno += cleft->rc_blockcount; 456 *aglen -= cleft->rc_blockcount; 457 return error; 458 459 out_error: 460 trace_xfs_refcount_merge_left_extent_error(cur->bc_mp, 461 cur->bc_private.a.agno, error, _RET_IP_); 462 return error; 463 } 464 465 /* 466 * Merge with the right extent. 467 */ 468 STATIC int 469 xfs_refcount_merge_right_extent( 470 struct xfs_btree_cur *cur, 471 struct xfs_refcount_irec *right, 472 struct xfs_refcount_irec *cright, 473 xfs_extlen_t *aglen) 474 { 475 int error; 476 int found_rec; 477 478 trace_xfs_refcount_merge_right_extent(cur->bc_mp, 479 cur->bc_private.a.agno, cright, right); 480 481 /* 482 * If the extent ending at agbno+aglen (cright) wasn't synthesized, 483 * remove it. 484 */ 485 if (cright->rc_refcount > 1) { 486 error = xfs_refcount_lookup_le(cur, cright->rc_startblock, 487 &found_rec); 488 if (error) 489 goto out_error; 490 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 491 out_error); 492 493 error = xfs_refcount_delete(cur, &found_rec); 494 if (error) 495 goto out_error; 496 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 497 out_error); 498 } 499 500 /* Enlarge the right extent. */ 501 error = xfs_refcount_lookup_le(cur, right->rc_startblock, 502 &found_rec); 503 if (error) 504 goto out_error; 505 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 506 507 right->rc_startblock -= cright->rc_blockcount; 508 right->rc_blockcount += cright->rc_blockcount; 509 error = xfs_refcount_update(cur, right); 510 if (error) 511 goto out_error; 512 513 *aglen -= cright->rc_blockcount; 514 return error; 515 516 out_error: 517 trace_xfs_refcount_merge_right_extent_error(cur->bc_mp, 518 cur->bc_private.a.agno, error, _RET_IP_); 519 return error; 520 } 521 522 #define XFS_FIND_RCEXT_SHARED 1 523 #define XFS_FIND_RCEXT_COW 2 524 /* 525 * Find the left extent and the one after it (cleft). This function assumes 526 * that we've already split any extent crossing agbno. 527 */ 528 STATIC int 529 xfs_refcount_find_left_extents( 530 struct xfs_btree_cur *cur, 531 struct xfs_refcount_irec *left, 532 struct xfs_refcount_irec *cleft, 533 xfs_agblock_t agbno, 534 xfs_extlen_t aglen, 535 int flags) 536 { 537 struct xfs_refcount_irec tmp; 538 int error; 539 int found_rec; 540 541 left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK; 542 error = xfs_refcount_lookup_le(cur, agbno - 1, &found_rec); 543 if (error) 544 goto out_error; 545 if (!found_rec) 546 return 0; 547 548 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 549 if (error) 550 goto out_error; 551 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 552 553 if (xfs_refc_next(&tmp) != agbno) 554 return 0; 555 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 556 return 0; 557 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 558 return 0; 559 /* We have a left extent; retrieve (or invent) the next right one */ 560 *left = tmp; 561 562 error = xfs_btree_increment(cur, 0, &found_rec); 563 if (error) 564 goto out_error; 565 if (found_rec) { 566 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 567 if (error) 568 goto out_error; 569 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 570 out_error); 571 572 /* if tmp starts at the end of our range, just use that */ 573 if (tmp.rc_startblock == agbno) 574 *cleft = tmp; 575 else { 576 /* 577 * There's a gap in the refcntbt at the start of the 578 * range we're interested in (refcount == 1) so 579 * synthesize the implied extent and pass it back. 580 * We assume here that the agbno/aglen range was 581 * passed in from a data fork extent mapping and 582 * therefore is allocated to exactly one owner. 583 */ 584 cleft->rc_startblock = agbno; 585 cleft->rc_blockcount = min(aglen, 586 tmp.rc_startblock - agbno); 587 cleft->rc_refcount = 1; 588 } 589 } else { 590 /* 591 * No extents, so pretend that there's one covering the whole 592 * range. 593 */ 594 cleft->rc_startblock = agbno; 595 cleft->rc_blockcount = aglen; 596 cleft->rc_refcount = 1; 597 } 598 trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_private.a.agno, 599 left, cleft, agbno); 600 return error; 601 602 out_error: 603 trace_xfs_refcount_find_left_extent_error(cur->bc_mp, 604 cur->bc_private.a.agno, error, _RET_IP_); 605 return error; 606 } 607 608 /* 609 * Find the right extent and the one before it (cright). This function 610 * assumes that we've already split any extents crossing agbno + aglen. 611 */ 612 STATIC int 613 xfs_refcount_find_right_extents( 614 struct xfs_btree_cur *cur, 615 struct xfs_refcount_irec *right, 616 struct xfs_refcount_irec *cright, 617 xfs_agblock_t agbno, 618 xfs_extlen_t aglen, 619 int flags) 620 { 621 struct xfs_refcount_irec tmp; 622 int error; 623 int found_rec; 624 625 right->rc_startblock = cright->rc_startblock = NULLAGBLOCK; 626 error = xfs_refcount_lookup_ge(cur, agbno + aglen, &found_rec); 627 if (error) 628 goto out_error; 629 if (!found_rec) 630 return 0; 631 632 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 633 if (error) 634 goto out_error; 635 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error); 636 637 if (tmp.rc_startblock != agbno + aglen) 638 return 0; 639 if ((flags & XFS_FIND_RCEXT_SHARED) && tmp.rc_refcount < 2) 640 return 0; 641 if ((flags & XFS_FIND_RCEXT_COW) && tmp.rc_refcount > 1) 642 return 0; 643 /* We have a right extent; retrieve (or invent) the next left one */ 644 *right = tmp; 645 646 error = xfs_btree_decrement(cur, 0, &found_rec); 647 if (error) 648 goto out_error; 649 if (found_rec) { 650 error = xfs_refcount_get_rec(cur, &tmp, &found_rec); 651 if (error) 652 goto out_error; 653 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, 654 out_error); 655 656 /* if tmp ends at the end of our range, just use that */ 657 if (xfs_refc_next(&tmp) == agbno + aglen) 658 *cright = tmp; 659 else { 660 /* 661 * There's a gap in the refcntbt at the end of the 662 * range we're interested in (refcount == 1) so 663 * create the implied extent and pass it back. 664 * We assume here that the agbno/aglen range was 665 * passed in from a data fork extent mapping and 666 * therefore is allocated to exactly one owner. 667 */ 668 cright->rc_startblock = max(agbno, xfs_refc_next(&tmp)); 669 cright->rc_blockcount = right->rc_startblock - 670 cright->rc_startblock; 671 cright->rc_refcount = 1; 672 } 673 } else { 674 /* 675 * No extents, so pretend that there's one covering the whole 676 * range. 677 */ 678 cright->rc_startblock = agbno; 679 cright->rc_blockcount = aglen; 680 cright->rc_refcount = 1; 681 } 682 trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_private.a.agno, 683 cright, right, agbno + aglen); 684 return error; 685 686 out_error: 687 trace_xfs_refcount_find_right_extent_error(cur->bc_mp, 688 cur->bc_private.a.agno, error, _RET_IP_); 689 return error; 690 } 691 692 /* Is this extent valid? */ 693 static inline bool 694 xfs_refc_valid( 695 struct xfs_refcount_irec *rc) 696 { 697 return rc->rc_startblock != NULLAGBLOCK; 698 } 699 700 /* 701 * Try to merge with any extents on the boundaries of the adjustment range. 702 */ 703 STATIC int 704 xfs_refcount_merge_extents( 705 struct xfs_btree_cur *cur, 706 xfs_agblock_t *agbno, 707 xfs_extlen_t *aglen, 708 enum xfs_refc_adjust_op adjust, 709 int flags, 710 bool *shape_changed) 711 { 712 struct xfs_refcount_irec left = {0}, cleft = {0}; 713 struct xfs_refcount_irec cright = {0}, right = {0}; 714 int error; 715 unsigned long long ulen; 716 bool cequal; 717 718 *shape_changed = false; 719 /* 720 * Find the extent just below agbno [left], just above agbno [cleft], 721 * just below (agbno + aglen) [cright], and just above (agbno + aglen) 722 * [right]. 723 */ 724 error = xfs_refcount_find_left_extents(cur, &left, &cleft, *agbno, 725 *aglen, flags); 726 if (error) 727 return error; 728 error = xfs_refcount_find_right_extents(cur, &right, &cright, *agbno, 729 *aglen, flags); 730 if (error) 731 return error; 732 733 /* No left or right extent to merge; exit. */ 734 if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right)) 735 return 0; 736 737 cequal = (cleft.rc_startblock == cright.rc_startblock) && 738 (cleft.rc_blockcount == cright.rc_blockcount); 739 740 /* Try to merge left, cleft, and right. cleft must == cright. */ 741 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + 742 right.rc_blockcount; 743 if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && 744 xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && 745 left.rc_refcount == cleft.rc_refcount + adjust && 746 right.rc_refcount == cleft.rc_refcount + adjust && 747 ulen < MAXREFCEXTLEN) { 748 *shape_changed = true; 749 return xfs_refcount_merge_center_extents(cur, &left, &cleft, 750 &right, ulen, aglen); 751 } 752 753 /* Try to merge left and cleft. */ 754 ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; 755 if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && 756 left.rc_refcount == cleft.rc_refcount + adjust && 757 ulen < MAXREFCEXTLEN) { 758 *shape_changed = true; 759 error = xfs_refcount_merge_left_extent(cur, &left, &cleft, 760 agbno, aglen); 761 if (error) 762 return error; 763 764 /* 765 * If we just merged left + cleft and cleft == cright, 766 * we no longer have a cright to merge with right. We're done. 767 */ 768 if (cequal) 769 return 0; 770 } 771 772 /* Try to merge cright and right. */ 773 ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; 774 if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && 775 right.rc_refcount == cright.rc_refcount + adjust && 776 ulen < MAXREFCEXTLEN) { 777 *shape_changed = true; 778 return xfs_refcount_merge_right_extent(cur, &right, &cright, 779 aglen); 780 } 781 782 return error; 783 } 784 785 /* 786 * XXX: This is a pretty hand-wavy estimate. The penalty for guessing 787 * true incorrectly is a shutdown FS; the penalty for guessing false 788 * incorrectly is more transaction rolls than might be necessary. 789 * Be conservative here. 790 */ 791 static bool 792 xfs_refcount_still_have_space( 793 struct xfs_btree_cur *cur) 794 { 795 unsigned long overhead; 796 797 overhead = cur->bc_private.a.priv.refc.shape_changes * 798 xfs_allocfree_log_count(cur->bc_mp, 1); 799 overhead *= cur->bc_mp->m_sb.sb_blocksize; 800 801 /* 802 * Only allow 2 refcount extent updates per transaction if the 803 * refcount continue update "error" has been injected. 804 */ 805 if (cur->bc_private.a.priv.refc.nr_ops > 2 && 806 XFS_TEST_ERROR(false, cur->bc_mp, 807 XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) 808 return false; 809 810 if (cur->bc_private.a.priv.refc.nr_ops == 0) 811 return true; 812 else if (overhead > cur->bc_tp->t_log_res) 813 return false; 814 return cur->bc_tp->t_log_res - overhead > 815 cur->bc_private.a.priv.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; 816 } 817 818 /* 819 * Adjust the refcounts of middle extents. At this point we should have 820 * split extents that crossed the adjustment range; merged with adjacent 821 * extents; and updated agbno/aglen to reflect the merges. Therefore, 822 * all we have to do is update the extents inside [agbno, agbno + aglen]. 823 */ 824 STATIC int 825 xfs_refcount_adjust_extents( 826 struct xfs_btree_cur *cur, 827 xfs_agblock_t *agbno, 828 xfs_extlen_t *aglen, 829 enum xfs_refc_adjust_op adj, 830 struct xfs_defer_ops *dfops, 831 struct xfs_owner_info *oinfo) 832 { 833 struct xfs_refcount_irec ext, tmp; 834 int error; 835 int found_rec, found_tmp; 836 xfs_fsblock_t fsbno; 837 838 /* Merging did all the work already. */ 839 if (*aglen == 0) 840 return 0; 841 842 error = xfs_refcount_lookup_ge(cur, *agbno, &found_rec); 843 if (error) 844 goto out_error; 845 846 while (*aglen > 0 && xfs_refcount_still_have_space(cur)) { 847 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 848 if (error) 849 goto out_error; 850 if (!found_rec) { 851 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks; 852 ext.rc_blockcount = 0; 853 ext.rc_refcount = 0; 854 } 855 856 /* 857 * Deal with a hole in the refcount tree; if a file maps to 858 * these blocks and there's no refcountbt record, pretend that 859 * there is one with refcount == 1. 860 */ 861 if (ext.rc_startblock != *agbno) { 862 tmp.rc_startblock = *agbno; 863 tmp.rc_blockcount = min(*aglen, 864 ext.rc_startblock - *agbno); 865 tmp.rc_refcount = 1 + adj; 866 trace_xfs_refcount_modify_extent(cur->bc_mp, 867 cur->bc_private.a.agno, &tmp); 868 869 /* 870 * Either cover the hole (increment) or 871 * delete the range (decrement). 872 */ 873 if (tmp.rc_refcount) { 874 error = xfs_refcount_insert(cur, &tmp, 875 &found_tmp); 876 if (error) 877 goto out_error; 878 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 879 found_tmp == 1, out_error); 880 cur->bc_private.a.priv.refc.nr_ops++; 881 } else { 882 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 883 cur->bc_private.a.agno, 884 tmp.rc_startblock); 885 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 886 tmp.rc_blockcount, oinfo); 887 } 888 889 (*agbno) += tmp.rc_blockcount; 890 (*aglen) -= tmp.rc_blockcount; 891 892 error = xfs_refcount_lookup_ge(cur, *agbno, 893 &found_rec); 894 if (error) 895 goto out_error; 896 } 897 898 /* Stop if there's nothing left to modify */ 899 if (*aglen == 0 || !xfs_refcount_still_have_space(cur)) 900 break; 901 902 /* 903 * Adjust the reference count and either update the tree 904 * (incr) or free the blocks (decr). 905 */ 906 if (ext.rc_refcount == MAXREFCOUNT) 907 goto skip; 908 ext.rc_refcount += adj; 909 trace_xfs_refcount_modify_extent(cur->bc_mp, 910 cur->bc_private.a.agno, &ext); 911 if (ext.rc_refcount > 1) { 912 error = xfs_refcount_update(cur, &ext); 913 if (error) 914 goto out_error; 915 cur->bc_private.a.priv.refc.nr_ops++; 916 } else if (ext.rc_refcount == 1) { 917 error = xfs_refcount_delete(cur, &found_rec); 918 if (error) 919 goto out_error; 920 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 921 found_rec == 1, out_error); 922 cur->bc_private.a.priv.refc.nr_ops++; 923 goto advloop; 924 } else { 925 fsbno = XFS_AGB_TO_FSB(cur->bc_mp, 926 cur->bc_private.a.agno, 927 ext.rc_startblock); 928 xfs_bmap_add_free(cur->bc_mp, dfops, fsbno, 929 ext.rc_blockcount, oinfo); 930 } 931 932 skip: 933 error = xfs_btree_increment(cur, 0, &found_rec); 934 if (error) 935 goto out_error; 936 937 advloop: 938 (*agbno) += ext.rc_blockcount; 939 (*aglen) -= ext.rc_blockcount; 940 } 941 942 return error; 943 out_error: 944 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 945 cur->bc_private.a.agno, error, _RET_IP_); 946 return error; 947 } 948 949 /* Adjust the reference count of a range of AG blocks. */ 950 STATIC int 951 xfs_refcount_adjust( 952 struct xfs_btree_cur *cur, 953 xfs_agblock_t agbno, 954 xfs_extlen_t aglen, 955 xfs_agblock_t *new_agbno, 956 xfs_extlen_t *new_aglen, 957 enum xfs_refc_adjust_op adj, 958 struct xfs_defer_ops *dfops, 959 struct xfs_owner_info *oinfo) 960 { 961 bool shape_changed; 962 int shape_changes = 0; 963 int error; 964 965 *new_agbno = agbno; 966 *new_aglen = aglen; 967 if (adj == XFS_REFCOUNT_ADJUST_INCREASE) 968 trace_xfs_refcount_increase(cur->bc_mp, cur->bc_private.a.agno, 969 agbno, aglen); 970 else 971 trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_private.a.agno, 972 agbno, aglen); 973 974 /* 975 * Ensure that no rcextents cross the boundary of the adjustment range. 976 */ 977 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 978 if (error) 979 goto out_error; 980 if (shape_changed) 981 shape_changes++; 982 983 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 984 if (error) 985 goto out_error; 986 if (shape_changed) 987 shape_changes++; 988 989 /* 990 * Try to merge with the left or right extents of the range. 991 */ 992 error = xfs_refcount_merge_extents(cur, new_agbno, new_aglen, adj, 993 XFS_FIND_RCEXT_SHARED, &shape_changed); 994 if (error) 995 goto out_error; 996 if (shape_changed) 997 shape_changes++; 998 if (shape_changes) 999 cur->bc_private.a.priv.refc.shape_changes++; 1000 1001 /* Now that we've taken care of the ends, adjust the middle extents */ 1002 error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, 1003 adj, dfops, oinfo); 1004 if (error) 1005 goto out_error; 1006 1007 return 0; 1008 1009 out_error: 1010 trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_private.a.agno, 1011 error, _RET_IP_); 1012 return error; 1013 } 1014 1015 /* Clean up after calling xfs_refcount_finish_one. */ 1016 void 1017 xfs_refcount_finish_one_cleanup( 1018 struct xfs_trans *tp, 1019 struct xfs_btree_cur *rcur, 1020 int error) 1021 { 1022 struct xfs_buf *agbp; 1023 1024 if (rcur == NULL) 1025 return; 1026 agbp = rcur->bc_private.a.agbp; 1027 xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 1028 if (error) 1029 xfs_trans_brelse(tp, agbp); 1030 } 1031 1032 /* 1033 * Process one of the deferred refcount operations. We pass back the 1034 * btree cursor to maintain our lock on the btree between calls. 1035 * This saves time and eliminates a buffer deadlock between the 1036 * superblock and the AGF because we'll always grab them in the same 1037 * order. 1038 */ 1039 int 1040 xfs_refcount_finish_one( 1041 struct xfs_trans *tp, 1042 struct xfs_defer_ops *dfops, 1043 enum xfs_refcount_intent_type type, 1044 xfs_fsblock_t startblock, 1045 xfs_extlen_t blockcount, 1046 xfs_fsblock_t *new_fsb, 1047 xfs_extlen_t *new_len, 1048 struct xfs_btree_cur **pcur) 1049 { 1050 struct xfs_mount *mp = tp->t_mountp; 1051 struct xfs_btree_cur *rcur; 1052 struct xfs_buf *agbp = NULL; 1053 int error = 0; 1054 xfs_agnumber_t agno; 1055 xfs_agblock_t bno; 1056 xfs_agblock_t new_agbno; 1057 unsigned long nr_ops = 0; 1058 int shape_changes = 0; 1059 1060 agno = XFS_FSB_TO_AGNO(mp, startblock); 1061 ASSERT(agno != NULLAGNUMBER); 1062 bno = XFS_FSB_TO_AGBNO(mp, startblock); 1063 1064 trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), 1065 type, XFS_FSB_TO_AGBNO(mp, startblock), 1066 blockcount); 1067 1068 if (XFS_TEST_ERROR(false, mp, 1069 XFS_ERRTAG_REFCOUNT_FINISH_ONE)) 1070 return -EIO; 1071 1072 /* 1073 * If we haven't gotten a cursor or the cursor AG doesn't match 1074 * the startblock, get one now. 1075 */ 1076 rcur = *pcur; 1077 if (rcur != NULL && rcur->bc_private.a.agno != agno) { 1078 nr_ops = rcur->bc_private.a.priv.refc.nr_ops; 1079 shape_changes = rcur->bc_private.a.priv.refc.shape_changes; 1080 xfs_refcount_finish_one_cleanup(tp, rcur, 0); 1081 rcur = NULL; 1082 *pcur = NULL; 1083 } 1084 if (rcur == NULL) { 1085 error = xfs_alloc_read_agf(tp->t_mountp, tp, agno, 1086 XFS_ALLOC_FLAG_FREEING, &agbp); 1087 if (error) 1088 return error; 1089 if (!agbp) 1090 return -EFSCORRUPTED; 1091 1092 rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, dfops); 1093 if (!rcur) { 1094 error = -ENOMEM; 1095 goto out_cur; 1096 } 1097 rcur->bc_private.a.priv.refc.nr_ops = nr_ops; 1098 rcur->bc_private.a.priv.refc.shape_changes = shape_changes; 1099 } 1100 *pcur = rcur; 1101 1102 switch (type) { 1103 case XFS_REFCOUNT_INCREASE: 1104 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1105 new_len, XFS_REFCOUNT_ADJUST_INCREASE, dfops, NULL); 1106 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1107 break; 1108 case XFS_REFCOUNT_DECREASE: 1109 error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, 1110 new_len, XFS_REFCOUNT_ADJUST_DECREASE, dfops, NULL); 1111 *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); 1112 break; 1113 case XFS_REFCOUNT_ALLOC_COW: 1114 *new_fsb = startblock + blockcount; 1115 *new_len = 0; 1116 error = __xfs_refcount_cow_alloc(rcur, bno, blockcount, dfops); 1117 break; 1118 case XFS_REFCOUNT_FREE_COW: 1119 *new_fsb = startblock + blockcount; 1120 *new_len = 0; 1121 error = __xfs_refcount_cow_free(rcur, bno, blockcount, dfops); 1122 break; 1123 default: 1124 ASSERT(0); 1125 error = -EFSCORRUPTED; 1126 } 1127 if (!error && *new_len > 0) 1128 trace_xfs_refcount_finish_one_leftover(mp, agno, type, 1129 bno, blockcount, new_agbno, *new_len); 1130 return error; 1131 1132 out_cur: 1133 xfs_trans_brelse(tp, agbp); 1134 1135 return error; 1136 } 1137 1138 /* 1139 * Record a refcount intent for later processing. 1140 */ 1141 static int 1142 __xfs_refcount_add( 1143 struct xfs_mount *mp, 1144 struct xfs_defer_ops *dfops, 1145 enum xfs_refcount_intent_type type, 1146 xfs_fsblock_t startblock, 1147 xfs_extlen_t blockcount) 1148 { 1149 struct xfs_refcount_intent *ri; 1150 1151 trace_xfs_refcount_defer(mp, XFS_FSB_TO_AGNO(mp, startblock), 1152 type, XFS_FSB_TO_AGBNO(mp, startblock), 1153 blockcount); 1154 1155 ri = kmem_alloc(sizeof(struct xfs_refcount_intent), 1156 KM_SLEEP | KM_NOFS); 1157 INIT_LIST_HEAD(&ri->ri_list); 1158 ri->ri_type = type; 1159 ri->ri_startblock = startblock; 1160 ri->ri_blockcount = blockcount; 1161 1162 xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); 1163 return 0; 1164 } 1165 1166 /* 1167 * Increase the reference count of the blocks backing a file's extent. 1168 */ 1169 int 1170 xfs_refcount_increase_extent( 1171 struct xfs_mount *mp, 1172 struct xfs_defer_ops *dfops, 1173 struct xfs_bmbt_irec *PREV) 1174 { 1175 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1176 return 0; 1177 1178 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_INCREASE, 1179 PREV->br_startblock, PREV->br_blockcount); 1180 } 1181 1182 /* 1183 * Decrease the reference count of the blocks backing a file's extent. 1184 */ 1185 int 1186 xfs_refcount_decrease_extent( 1187 struct xfs_mount *mp, 1188 struct xfs_defer_ops *dfops, 1189 struct xfs_bmbt_irec *PREV) 1190 { 1191 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1192 return 0; 1193 1194 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_DECREASE, 1195 PREV->br_startblock, PREV->br_blockcount); 1196 } 1197 1198 /* 1199 * Given an AG extent, find the lowest-numbered run of shared blocks 1200 * within that range and return the range in fbno/flen. If 1201 * find_end_of_shared is set, return the longest contiguous extent of 1202 * shared blocks; if not, just return the first extent we find. If no 1203 * shared blocks are found, fbno and flen will be set to NULLAGBLOCK 1204 * and 0, respectively. 1205 */ 1206 int 1207 xfs_refcount_find_shared( 1208 struct xfs_btree_cur *cur, 1209 xfs_agblock_t agbno, 1210 xfs_extlen_t aglen, 1211 xfs_agblock_t *fbno, 1212 xfs_extlen_t *flen, 1213 bool find_end_of_shared) 1214 { 1215 struct xfs_refcount_irec tmp; 1216 int i; 1217 int have; 1218 int error; 1219 1220 trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_private.a.agno, 1221 agbno, aglen); 1222 1223 /* By default, skip the whole range */ 1224 *fbno = NULLAGBLOCK; 1225 *flen = 0; 1226 1227 /* Try to find a refcount extent that crosses the start */ 1228 error = xfs_refcount_lookup_le(cur, agbno, &have); 1229 if (error) 1230 goto out_error; 1231 if (!have) { 1232 /* No left extent, look at the next one */ 1233 error = xfs_btree_increment(cur, 0, &have); 1234 if (error) 1235 goto out_error; 1236 if (!have) 1237 goto done; 1238 } 1239 error = xfs_refcount_get_rec(cur, &tmp, &i); 1240 if (error) 1241 goto out_error; 1242 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1243 1244 /* If the extent ends before the start, look at the next one */ 1245 if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) { 1246 error = xfs_btree_increment(cur, 0, &have); 1247 if (error) 1248 goto out_error; 1249 if (!have) 1250 goto done; 1251 error = xfs_refcount_get_rec(cur, &tmp, &i); 1252 if (error) 1253 goto out_error; 1254 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1255 } 1256 1257 /* If the extent starts after the range we want, bail out */ 1258 if (tmp.rc_startblock >= agbno + aglen) 1259 goto done; 1260 1261 /* We found the start of a shared extent! */ 1262 if (tmp.rc_startblock < agbno) { 1263 tmp.rc_blockcount -= (agbno - tmp.rc_startblock); 1264 tmp.rc_startblock = agbno; 1265 } 1266 1267 *fbno = tmp.rc_startblock; 1268 *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno); 1269 if (!find_end_of_shared) 1270 goto done; 1271 1272 /* Otherwise, find the end of this shared extent */ 1273 while (*fbno + *flen < agbno + aglen) { 1274 error = xfs_btree_increment(cur, 0, &have); 1275 if (error) 1276 goto out_error; 1277 if (!have) 1278 break; 1279 error = xfs_refcount_get_rec(cur, &tmp, &i); 1280 if (error) 1281 goto out_error; 1282 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error); 1283 if (tmp.rc_startblock >= agbno + aglen || 1284 tmp.rc_startblock != *fbno + *flen) 1285 break; 1286 *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno); 1287 } 1288 1289 done: 1290 trace_xfs_refcount_find_shared_result(cur->bc_mp, 1291 cur->bc_private.a.agno, *fbno, *flen); 1292 1293 out_error: 1294 if (error) 1295 trace_xfs_refcount_find_shared_error(cur->bc_mp, 1296 cur->bc_private.a.agno, error, _RET_IP_); 1297 return error; 1298 } 1299 1300 /* 1301 * Recovering CoW Blocks After a Crash 1302 * 1303 * Due to the way that the copy on write mechanism works, there's a window of 1304 * opportunity in which we can lose track of allocated blocks during a crash. 1305 * Because CoW uses delayed allocation in the in-core CoW fork, writeback 1306 * causes blocks to be allocated and stored in the CoW fork. The blocks are 1307 * no longer in the free space btree but are not otherwise recorded anywhere 1308 * until the write completes and the blocks are mapped into the file. A crash 1309 * in between allocation and remapping results in the replacement blocks being 1310 * lost. This situation is exacerbated by the CoW extent size hint because 1311 * allocations can hang around for long time. 1312 * 1313 * However, there is a place where we can record these allocations before they 1314 * become mappings -- the reference count btree. The btree does not record 1315 * extents with refcount == 1, so we can record allocations with a refcount of 1316 * 1. Blocks being used for CoW writeout cannot be shared, so there should be 1317 * no conflict with shared block records. These mappings should be created 1318 * when we allocate blocks to the CoW fork and deleted when they're removed 1319 * from the CoW fork. 1320 * 1321 * Minor nit: records for in-progress CoW allocations and records for shared 1322 * extents must never be merged, to preserve the property that (except for CoW 1323 * allocations) there are no refcount btree entries with refcount == 1. The 1324 * only time this could potentially happen is when unsharing a block that's 1325 * adjacent to CoW allocations, so we must be careful to avoid this. 1326 * 1327 * At mount time we recover lost CoW allocations by searching the refcount 1328 * btree for these refcount == 1 mappings. These represent CoW allocations 1329 * that were in progress at the time the filesystem went down, so we can free 1330 * them to get the space back. 1331 * 1332 * This mechanism is superior to creating EFIs for unmapped CoW extents for 1333 * several reasons -- first, EFIs pin the tail of the log and would have to be 1334 * periodically relogged to avoid filling up the log. Second, CoW completions 1335 * will have to file an EFD and create new EFIs for whatever remains in the 1336 * CoW fork; this partially takes care of (1) but extent-size reservations 1337 * will have to periodically relog even if there's no writeout in progress. 1338 * This can happen if the CoW extent size hint is set, which you really want. 1339 * Third, EFIs cannot currently be automatically relogged into newer 1340 * transactions to advance the log tail. Fourth, stuffing the log full of 1341 * EFIs places an upper bound on the number of CoW allocations that can be 1342 * held filesystem-wide at any given time. Recording them in the refcount 1343 * btree doesn't require us to maintain any state in memory and doesn't pin 1344 * the log. 1345 */ 1346 /* 1347 * Adjust the refcounts of CoW allocations. These allocations are "magic" 1348 * in that they're not referenced anywhere else in the filesystem, so we 1349 * stash them in the refcount btree with a refcount of 1 until either file 1350 * remapping (or CoW cancellation) happens. 1351 */ 1352 STATIC int 1353 xfs_refcount_adjust_cow_extents( 1354 struct xfs_btree_cur *cur, 1355 xfs_agblock_t agbno, 1356 xfs_extlen_t aglen, 1357 enum xfs_refc_adjust_op adj) 1358 { 1359 struct xfs_refcount_irec ext, tmp; 1360 int error; 1361 int found_rec, found_tmp; 1362 1363 if (aglen == 0) 1364 return 0; 1365 1366 /* Find any overlapping refcount records */ 1367 error = xfs_refcount_lookup_ge(cur, agbno, &found_rec); 1368 if (error) 1369 goto out_error; 1370 error = xfs_refcount_get_rec(cur, &ext, &found_rec); 1371 if (error) 1372 goto out_error; 1373 if (!found_rec) { 1374 ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks + 1375 XFS_REFC_COW_START; 1376 ext.rc_blockcount = 0; 1377 ext.rc_refcount = 0; 1378 } 1379 1380 switch (adj) { 1381 case XFS_REFCOUNT_ADJUST_COW_ALLOC: 1382 /* Adding a CoW reservation, there should be nothing here. */ 1383 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1384 ext.rc_startblock >= agbno + aglen, out_error); 1385 1386 tmp.rc_startblock = agbno; 1387 tmp.rc_blockcount = aglen; 1388 tmp.rc_refcount = 1; 1389 trace_xfs_refcount_modify_extent(cur->bc_mp, 1390 cur->bc_private.a.agno, &tmp); 1391 1392 error = xfs_refcount_insert(cur, &tmp, 1393 &found_tmp); 1394 if (error) 1395 goto out_error; 1396 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1397 found_tmp == 1, out_error); 1398 break; 1399 case XFS_REFCOUNT_ADJUST_COW_FREE: 1400 /* Removing a CoW reservation, there should be one extent. */ 1401 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1402 ext.rc_startblock == agbno, out_error); 1403 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1404 ext.rc_blockcount == aglen, out_error); 1405 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1406 ext.rc_refcount == 1, out_error); 1407 1408 ext.rc_refcount = 0; 1409 trace_xfs_refcount_modify_extent(cur->bc_mp, 1410 cur->bc_private.a.agno, &ext); 1411 error = xfs_refcount_delete(cur, &found_rec); 1412 if (error) 1413 goto out_error; 1414 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, 1415 found_rec == 1, out_error); 1416 break; 1417 default: 1418 ASSERT(0); 1419 } 1420 1421 return error; 1422 out_error: 1423 trace_xfs_refcount_modify_extent_error(cur->bc_mp, 1424 cur->bc_private.a.agno, error, _RET_IP_); 1425 return error; 1426 } 1427 1428 /* 1429 * Add or remove refcount btree entries for CoW reservations. 1430 */ 1431 STATIC int 1432 xfs_refcount_adjust_cow( 1433 struct xfs_btree_cur *cur, 1434 xfs_agblock_t agbno, 1435 xfs_extlen_t aglen, 1436 enum xfs_refc_adjust_op adj) 1437 { 1438 bool shape_changed; 1439 int error; 1440 1441 agbno += XFS_REFC_COW_START; 1442 1443 /* 1444 * Ensure that no rcextents cross the boundary of the adjustment range. 1445 */ 1446 error = xfs_refcount_split_extent(cur, agbno, &shape_changed); 1447 if (error) 1448 goto out_error; 1449 1450 error = xfs_refcount_split_extent(cur, agbno + aglen, &shape_changed); 1451 if (error) 1452 goto out_error; 1453 1454 /* 1455 * Try to merge with the left or right extents of the range. 1456 */ 1457 error = xfs_refcount_merge_extents(cur, &agbno, &aglen, adj, 1458 XFS_FIND_RCEXT_COW, &shape_changed); 1459 if (error) 1460 goto out_error; 1461 1462 /* Now that we've taken care of the ends, adjust the middle extents */ 1463 error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj); 1464 if (error) 1465 goto out_error; 1466 1467 return 0; 1468 1469 out_error: 1470 trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_private.a.agno, 1471 error, _RET_IP_); 1472 return error; 1473 } 1474 1475 /* 1476 * Record a CoW allocation in the refcount btree. 1477 */ 1478 STATIC int 1479 __xfs_refcount_cow_alloc( 1480 struct xfs_btree_cur *rcur, 1481 xfs_agblock_t agbno, 1482 xfs_extlen_t aglen, 1483 struct xfs_defer_ops *dfops) 1484 { 1485 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1486 agbno, aglen); 1487 1488 /* Add refcount btree reservation */ 1489 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1490 XFS_REFCOUNT_ADJUST_COW_ALLOC); 1491 } 1492 1493 /* 1494 * Remove a CoW allocation from the refcount btree. 1495 */ 1496 STATIC int 1497 __xfs_refcount_cow_free( 1498 struct xfs_btree_cur *rcur, 1499 xfs_agblock_t agbno, 1500 xfs_extlen_t aglen, 1501 struct xfs_defer_ops *dfops) 1502 { 1503 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1504 agbno, aglen); 1505 1506 /* Remove refcount btree reservation */ 1507 return xfs_refcount_adjust_cow(rcur, agbno, aglen, 1508 XFS_REFCOUNT_ADJUST_COW_FREE); 1509 } 1510 1511 /* Record a CoW staging extent in the refcount btree. */ 1512 int 1513 xfs_refcount_alloc_cow_extent( 1514 struct xfs_mount *mp, 1515 struct xfs_defer_ops *dfops, 1516 xfs_fsblock_t fsb, 1517 xfs_extlen_t len) 1518 { 1519 int error; 1520 1521 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1522 return 0; 1523 1524 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1525 fsb, len); 1526 if (error) 1527 return error; 1528 1529 /* Add rmap entry */ 1530 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1531 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1532 } 1533 1534 /* Forget a CoW staging event in the refcount btree. */ 1535 int 1536 xfs_refcount_free_cow_extent( 1537 struct xfs_mount *mp, 1538 struct xfs_defer_ops *dfops, 1539 xfs_fsblock_t fsb, 1540 xfs_extlen_t len) 1541 { 1542 int error; 1543 1544 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1545 return 0; 1546 1547 /* Remove rmap entry */ 1548 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb), 1549 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW); 1550 if (error) 1551 return error; 1552 1553 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1554 fsb, len); 1555 } 1556 1557 struct xfs_refcount_recovery { 1558 struct list_head rr_list; 1559 struct xfs_refcount_irec rr_rrec; 1560 }; 1561 1562 /* Stuff an extent on the recovery list. */ 1563 STATIC int 1564 xfs_refcount_recover_extent( 1565 struct xfs_btree_cur *cur, 1566 union xfs_btree_rec *rec, 1567 void *priv) 1568 { 1569 struct list_head *debris = priv; 1570 struct xfs_refcount_recovery *rr; 1571 1572 if (be32_to_cpu(rec->refc.rc_refcount) != 1) 1573 return -EFSCORRUPTED; 1574 1575 rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP); 1576 xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); 1577 list_add_tail(&rr->rr_list, debris); 1578 1579 return 0; 1580 } 1581 1582 /* Find and remove leftover CoW reservations. */ 1583 int 1584 xfs_refcount_recover_cow_leftovers( 1585 struct xfs_mount *mp, 1586 xfs_agnumber_t agno) 1587 { 1588 struct xfs_trans *tp; 1589 struct xfs_btree_cur *cur; 1590 struct xfs_buf *agbp; 1591 struct xfs_refcount_recovery *rr, *n; 1592 struct list_head debris; 1593 union xfs_btree_irec low; 1594 union xfs_btree_irec high; 1595 struct xfs_defer_ops dfops; 1596 xfs_fsblock_t fsb; 1597 xfs_agblock_t agbno; 1598 int error; 1599 1600 if (mp->m_sb.sb_agblocks >= XFS_REFC_COW_START) 1601 return -EOPNOTSUPP; 1602 1603 INIT_LIST_HEAD(&debris); 1604 1605 /* 1606 * In this first part, we use an empty transaction to gather up 1607 * all the leftover CoW extents so that we can subsequently 1608 * delete them. The empty transaction is used to avoid 1609 * a buffer lock deadlock if there happens to be a loop in the 1610 * refcountbt because we're allowed to re-grab a buffer that is 1611 * already attached to our transaction. When we're done 1612 * recording the CoW debris we cancel the (empty) transaction 1613 * and everything goes away cleanly. 1614 */ 1615 error = xfs_trans_alloc_empty(mp, &tp); 1616 if (error) 1617 return error; 1618 1619 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 1620 if (error) 1621 goto out_trans; 1622 if (!agbp) { 1623 error = -ENOMEM; 1624 goto out_trans; 1625 } 1626 cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); 1627 1628 /* Find all the leftover CoW staging extents. */ 1629 memset(&low, 0, sizeof(low)); 1630 memset(&high, 0, sizeof(high)); 1631 low.rc.rc_startblock = XFS_REFC_COW_START; 1632 high.rc.rc_startblock = -1U; 1633 error = xfs_btree_query_range(cur, &low, &high, 1634 xfs_refcount_recover_extent, &debris); 1635 if (error) 1636 goto out_cursor; 1637 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); 1638 xfs_trans_brelse(tp, agbp); 1639 xfs_trans_cancel(tp); 1640 1641 /* Now iterate the list to free the leftovers */ 1642 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1643 /* Set up transaction. */ 1644 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp); 1645 if (error) 1646 goto out_free; 1647 1648 trace_xfs_refcount_recover_extent(mp, agno, &rr->rr_rrec); 1649 1650 /* Free the orphan record */ 1651 xfs_defer_init(&dfops, &fsb); 1652 agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START; 1653 fsb = XFS_AGB_TO_FSB(mp, agno, agbno); 1654 error = xfs_refcount_free_cow_extent(mp, &dfops, fsb, 1655 rr->rr_rrec.rc_blockcount); 1656 if (error) 1657 goto out_defer; 1658 1659 /* Free the block. */ 1660 xfs_bmap_add_free(mp, &dfops, fsb, 1661 rr->rr_rrec.rc_blockcount, NULL); 1662 1663 error = xfs_defer_finish(&tp, &dfops); 1664 if (error) 1665 goto out_defer; 1666 1667 error = xfs_trans_commit(tp); 1668 if (error) 1669 goto out_free; 1670 1671 list_del(&rr->rr_list); 1672 kmem_free(rr); 1673 } 1674 1675 return error; 1676 out_defer: 1677 xfs_defer_cancel(&dfops); 1678 out_trans: 1679 xfs_trans_cancel(tp); 1680 out_free: 1681 /* Free the leftover list */ 1682 list_for_each_entry_safe(rr, n, &debris, rr_list) { 1683 list_del(&rr->rr_list); 1684 kmem_free(rr); 1685 } 1686 return error; 1687 1688 out_cursor: 1689 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 1690 xfs_trans_brelse(tp, agbp); 1691 goto out_trans; 1692 } 1693 1694 /* Is there a record covering a given extent? */ 1695 int 1696 xfs_refcount_has_record( 1697 struct xfs_btree_cur *cur, 1698 xfs_agblock_t bno, 1699 xfs_extlen_t len, 1700 bool *exists) 1701 { 1702 union xfs_btree_irec low; 1703 union xfs_btree_irec high; 1704 1705 memset(&low, 0, sizeof(low)); 1706 low.rc.rc_startblock = bno; 1707 memset(&high, 0xFF, sizeof(high)); 1708 high.rc.rc_startblock = bno + len - 1; 1709 1710 return xfs_btree_has_record(cur, &low, &high, exists); 1711 } 1712