1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_bit.h" 12 #include "xfs_shared.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_trans.h" 16 #include "xfs_trans_priv.h" 17 #include "xfs_refcount_item.h" 18 #include "xfs_log.h" 19 #include "xfs_refcount.h" 20 21 22 kmem_zone_t *xfs_cui_zone; 23 kmem_zone_t *xfs_cud_zone; 24 25 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) 26 { 27 return container_of(lip, struct xfs_cui_log_item, cui_item); 28 } 29 30 void 31 xfs_cui_item_free( 32 struct xfs_cui_log_item *cuip) 33 { 34 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) 35 kmem_free(cuip); 36 else 37 kmem_zone_free(xfs_cui_zone, cuip); 38 } 39 40 /* 41 * Freeing the CUI requires that we remove it from the AIL if it has already 42 * been placed there. However, the CUI may not yet have been placed in the AIL 43 * when called by xfs_cui_release() from CUD processing due to the ordering of 44 * committed vs unpin operations in bulk insert operations. Hence the reference 45 * count to ensure only the last caller frees the CUI. 46 */ 47 void 48 xfs_cui_release( 49 struct xfs_cui_log_item *cuip) 50 { 51 ASSERT(atomic_read(&cuip->cui_refcount) > 0); 52 if (atomic_dec_and_test(&cuip->cui_refcount)) { 53 xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); 54 xfs_cui_item_free(cuip); 55 } 56 } 57 58 59 STATIC void 60 xfs_cui_item_size( 61 struct xfs_log_item *lip, 62 int *nvecs, 63 int *nbytes) 64 { 65 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 66 67 *nvecs += 1; 68 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents); 69 } 70 71 /* 72 * This is called to fill in the vector of log iovecs for the 73 * given cui log item. We use only 1 iovec, and we point that 74 * at the cui_log_format structure embedded in the cui item. 75 * It is at this point that we assert that all of the extent 76 * slots in the cui item have been filled. 77 */ 78 STATIC void 79 xfs_cui_item_format( 80 struct xfs_log_item *lip, 81 struct xfs_log_vec *lv) 82 { 83 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 84 struct xfs_log_iovec *vecp = NULL; 85 86 ASSERT(atomic_read(&cuip->cui_next_extent) == 87 cuip->cui_format.cui_nextents); 88 89 cuip->cui_format.cui_type = XFS_LI_CUI; 90 cuip->cui_format.cui_size = 1; 91 92 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, 93 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); 94 } 95 96 /* 97 * The unpin operation is the last place an CUI is manipulated in the log. It is 98 * either inserted in the AIL or aborted in the event of a log I/O error. In 99 * either case, the CUI transaction has been successfully committed to make it 100 * this far. Therefore, we expect whoever committed the CUI to either construct 101 * and commit the CUD or drop the CUD's reference in the event of error. Simply 102 * drop the log's CUI reference now that the log is done with it. 103 */ 104 STATIC void 105 xfs_cui_item_unpin( 106 struct xfs_log_item *lip, 107 int remove) 108 { 109 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 110 111 xfs_cui_release(cuip); 112 } 113 114 /* 115 * The CUI has been either committed or aborted if the transaction has been 116 * cancelled. If the transaction was cancelled, an CUD isn't going to be 117 * constructed and thus we free the CUI here directly. 118 */ 119 STATIC void 120 xfs_cui_item_release( 121 struct xfs_log_item *lip) 122 { 123 xfs_cui_release(CUI_ITEM(lip)); 124 } 125 126 static const struct xfs_item_ops xfs_cui_item_ops = { 127 .iop_size = xfs_cui_item_size, 128 .iop_format = xfs_cui_item_format, 129 .iop_unpin = xfs_cui_item_unpin, 130 .iop_release = xfs_cui_item_release, 131 }; 132 133 /* 134 * Allocate and initialize an cui item with the given number of extents. 135 */ 136 struct xfs_cui_log_item * 137 xfs_cui_init( 138 struct xfs_mount *mp, 139 uint nextents) 140 141 { 142 struct xfs_cui_log_item *cuip; 143 144 ASSERT(nextents > 0); 145 if (nextents > XFS_CUI_MAX_FAST_EXTENTS) 146 cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), 147 0); 148 else 149 cuip = kmem_zone_zalloc(xfs_cui_zone, 0); 150 151 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); 152 cuip->cui_format.cui_nextents = nextents; 153 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip; 154 atomic_set(&cuip->cui_next_extent, 0); 155 atomic_set(&cuip->cui_refcount, 2); 156 157 return cuip; 158 } 159 160 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) 161 { 162 return container_of(lip, struct xfs_cud_log_item, cud_item); 163 } 164 165 STATIC void 166 xfs_cud_item_size( 167 struct xfs_log_item *lip, 168 int *nvecs, 169 int *nbytes) 170 { 171 *nvecs += 1; 172 *nbytes += sizeof(struct xfs_cud_log_format); 173 } 174 175 /* 176 * This is called to fill in the vector of log iovecs for the 177 * given cud log item. We use only 1 iovec, and we point that 178 * at the cud_log_format structure embedded in the cud item. 179 * It is at this point that we assert that all of the extent 180 * slots in the cud item have been filled. 181 */ 182 STATIC void 183 xfs_cud_item_format( 184 struct xfs_log_item *lip, 185 struct xfs_log_vec *lv) 186 { 187 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 188 struct xfs_log_iovec *vecp = NULL; 189 190 cudp->cud_format.cud_type = XFS_LI_CUD; 191 cudp->cud_format.cud_size = 1; 192 193 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, 194 sizeof(struct xfs_cud_log_format)); 195 } 196 197 /* 198 * The CUD is either committed or aborted if the transaction is cancelled. If 199 * the transaction is cancelled, drop our reference to the CUI and free the 200 * CUD. 201 */ 202 STATIC void 203 xfs_cud_item_release( 204 struct xfs_log_item *lip) 205 { 206 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 207 208 xfs_cui_release(cudp->cud_cuip); 209 kmem_zone_free(xfs_cud_zone, cudp); 210 } 211 212 static const struct xfs_item_ops xfs_cud_item_ops = { 213 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 214 .iop_size = xfs_cud_item_size, 215 .iop_format = xfs_cud_item_format, 216 .iop_release = xfs_cud_item_release, 217 }; 218 219 static struct xfs_cud_log_item * 220 xfs_trans_get_cud( 221 struct xfs_trans *tp, 222 struct xfs_cui_log_item *cuip) 223 { 224 struct xfs_cud_log_item *cudp; 225 226 cudp = kmem_zone_zalloc(xfs_cud_zone, 0); 227 xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, 228 &xfs_cud_item_ops); 229 cudp->cud_cuip = cuip; 230 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 231 232 xfs_trans_add_item(tp, &cudp->cud_item); 233 return cudp; 234 } 235 236 /* 237 * Finish an refcount update and log it to the CUD. Note that the 238 * transaction is marked dirty regardless of whether the refcount 239 * update succeeds or fails to support the CUI/CUD lifecycle rules. 240 */ 241 static int 242 xfs_trans_log_finish_refcount_update( 243 struct xfs_trans *tp, 244 struct xfs_cud_log_item *cudp, 245 enum xfs_refcount_intent_type type, 246 xfs_fsblock_t startblock, 247 xfs_extlen_t blockcount, 248 xfs_fsblock_t *new_fsb, 249 xfs_extlen_t *new_len, 250 struct xfs_btree_cur **pcur) 251 { 252 int error; 253 254 error = xfs_refcount_finish_one(tp, type, startblock, 255 blockcount, new_fsb, new_len, pcur); 256 257 /* 258 * Mark the transaction dirty, even on error. This ensures the 259 * transaction is aborted, which: 260 * 261 * 1.) releases the CUI and frees the CUD 262 * 2.) shuts down the filesystem 263 */ 264 tp->t_flags |= XFS_TRANS_DIRTY; 265 set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); 266 267 return error; 268 } 269 270 /* Sort refcount intents by AG. */ 271 static int 272 xfs_refcount_update_diff_items( 273 void *priv, 274 struct list_head *a, 275 struct list_head *b) 276 { 277 struct xfs_mount *mp = priv; 278 struct xfs_refcount_intent *ra; 279 struct xfs_refcount_intent *rb; 280 281 ra = container_of(a, struct xfs_refcount_intent, ri_list); 282 rb = container_of(b, struct xfs_refcount_intent, ri_list); 283 return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - 284 XFS_FSB_TO_AGNO(mp, rb->ri_startblock); 285 } 286 287 /* Get an CUI. */ 288 STATIC void * 289 xfs_refcount_update_create_intent( 290 struct xfs_trans *tp, 291 unsigned int count) 292 { 293 struct xfs_cui_log_item *cuip; 294 295 ASSERT(tp != NULL); 296 ASSERT(count > 0); 297 298 cuip = xfs_cui_init(tp->t_mountp, count); 299 ASSERT(cuip != NULL); 300 301 /* 302 * Get a log_item_desc to point at the new item. 303 */ 304 xfs_trans_add_item(tp, &cuip->cui_item); 305 return cuip; 306 } 307 308 /* Set the phys extent flags for this reverse mapping. */ 309 static void 310 xfs_trans_set_refcount_flags( 311 struct xfs_phys_extent *refc, 312 enum xfs_refcount_intent_type type) 313 { 314 refc->pe_flags = 0; 315 switch (type) { 316 case XFS_REFCOUNT_INCREASE: 317 case XFS_REFCOUNT_DECREASE: 318 case XFS_REFCOUNT_ALLOC_COW: 319 case XFS_REFCOUNT_FREE_COW: 320 refc->pe_flags |= type; 321 break; 322 default: 323 ASSERT(0); 324 } 325 } 326 327 /* Log refcount updates in the intent item. */ 328 STATIC void 329 xfs_refcount_update_log_item( 330 struct xfs_trans *tp, 331 void *intent, 332 struct list_head *item) 333 { 334 struct xfs_cui_log_item *cuip = intent; 335 struct xfs_refcount_intent *refc; 336 uint next_extent; 337 struct xfs_phys_extent *ext; 338 339 refc = container_of(item, struct xfs_refcount_intent, ri_list); 340 341 tp->t_flags |= XFS_TRANS_DIRTY; 342 set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); 343 344 /* 345 * atomic_inc_return gives us the value after the increment; 346 * we want to use it as an array index so we need to subtract 1 from 347 * it. 348 */ 349 next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 350 ASSERT(next_extent < cuip->cui_format.cui_nextents); 351 ext = &cuip->cui_format.cui_extents[next_extent]; 352 ext->pe_startblock = refc->ri_startblock; 353 ext->pe_len = refc->ri_blockcount; 354 xfs_trans_set_refcount_flags(ext, refc->ri_type); 355 } 356 357 /* Get an CUD so we can process all the deferred refcount updates. */ 358 STATIC void * 359 xfs_refcount_update_create_done( 360 struct xfs_trans *tp, 361 void *intent, 362 unsigned int count) 363 { 364 return xfs_trans_get_cud(tp, intent); 365 } 366 367 /* Process a deferred refcount update. */ 368 STATIC int 369 xfs_refcount_update_finish_item( 370 struct xfs_trans *tp, 371 struct list_head *item, 372 void *done_item, 373 void **state) 374 { 375 struct xfs_refcount_intent *refc; 376 xfs_fsblock_t new_fsb; 377 xfs_extlen_t new_aglen; 378 int error; 379 380 refc = container_of(item, struct xfs_refcount_intent, ri_list); 381 error = xfs_trans_log_finish_refcount_update(tp, done_item, 382 refc->ri_type, 383 refc->ri_startblock, 384 refc->ri_blockcount, 385 &new_fsb, &new_aglen, 386 (struct xfs_btree_cur **)state); 387 /* Did we run out of reservation? Requeue what we didn't finish. */ 388 if (!error && new_aglen > 0) { 389 ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || 390 refc->ri_type == XFS_REFCOUNT_DECREASE); 391 refc->ri_startblock = new_fsb; 392 refc->ri_blockcount = new_aglen; 393 return -EAGAIN; 394 } 395 kmem_free(refc); 396 return error; 397 } 398 399 /* Clean up after processing deferred refcounts. */ 400 STATIC void 401 xfs_refcount_update_finish_cleanup( 402 struct xfs_trans *tp, 403 void *state, 404 int error) 405 { 406 struct xfs_btree_cur *rcur = state; 407 408 xfs_refcount_finish_one_cleanup(tp, rcur, error); 409 } 410 411 /* Abort all pending CUIs. */ 412 STATIC void 413 xfs_refcount_update_abort_intent( 414 void *intent) 415 { 416 xfs_cui_release(intent); 417 } 418 419 /* Cancel a deferred refcount update. */ 420 STATIC void 421 xfs_refcount_update_cancel_item( 422 struct list_head *item) 423 { 424 struct xfs_refcount_intent *refc; 425 426 refc = container_of(item, struct xfs_refcount_intent, ri_list); 427 kmem_free(refc); 428 } 429 430 const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 431 .max_items = XFS_CUI_MAX_FAST_EXTENTS, 432 .diff_items = xfs_refcount_update_diff_items, 433 .create_intent = xfs_refcount_update_create_intent, 434 .abort_intent = xfs_refcount_update_abort_intent, 435 .log_item = xfs_refcount_update_log_item, 436 .create_done = xfs_refcount_update_create_done, 437 .finish_item = xfs_refcount_update_finish_item, 438 .finish_cleanup = xfs_refcount_update_finish_cleanup, 439 .cancel_item = xfs_refcount_update_cancel_item, 440 }; 441 442 /* 443 * Process a refcount update intent item that was recovered from the log. 444 * We need to update the refcountbt. 445 */ 446 int 447 xfs_cui_recover( 448 struct xfs_trans *parent_tp, 449 struct xfs_cui_log_item *cuip) 450 { 451 int i; 452 int error = 0; 453 unsigned int refc_type; 454 struct xfs_phys_extent *refc; 455 xfs_fsblock_t startblock_fsb; 456 bool op_ok; 457 struct xfs_cud_log_item *cudp; 458 struct xfs_trans *tp; 459 struct xfs_btree_cur *rcur = NULL; 460 enum xfs_refcount_intent_type type; 461 xfs_fsblock_t new_fsb; 462 xfs_extlen_t new_len; 463 struct xfs_bmbt_irec irec; 464 bool requeue_only = false; 465 struct xfs_mount *mp = parent_tp->t_mountp; 466 467 ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); 468 469 /* 470 * First check the validity of the extents described by the 471 * CUI. If any are bad, then assume that all are bad and 472 * just toss the CUI. 473 */ 474 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 475 refc = &cuip->cui_format.cui_extents[i]; 476 startblock_fsb = XFS_BB_TO_FSB(mp, 477 XFS_FSB_TO_DADDR(mp, refc->pe_startblock)); 478 switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { 479 case XFS_REFCOUNT_INCREASE: 480 case XFS_REFCOUNT_DECREASE: 481 case XFS_REFCOUNT_ALLOC_COW: 482 case XFS_REFCOUNT_FREE_COW: 483 op_ok = true; 484 break; 485 default: 486 op_ok = false; 487 break; 488 } 489 if (!op_ok || startblock_fsb == 0 || 490 refc->pe_len == 0 || 491 startblock_fsb >= mp->m_sb.sb_dblocks || 492 refc->pe_len >= mp->m_sb.sb_agblocks || 493 (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) { 494 /* 495 * This will pull the CUI from the AIL and 496 * free the memory associated with it. 497 */ 498 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); 499 xfs_cui_release(cuip); 500 return -EIO; 501 } 502 } 503 504 /* 505 * Under normal operation, refcount updates are deferred, so we 506 * wouldn't be adding them directly to a transaction. All 507 * refcount updates manage reservation usage internally and 508 * dynamically by deferring work that won't fit in the 509 * transaction. Normally, any work that needs to be deferred 510 * gets attached to the same defer_ops that scheduled the 511 * refcount update. However, we're in log recovery here, so we 512 * we use the passed in defer_ops and to finish up any work that 513 * doesn't fit. We need to reserve enough blocks to handle a 514 * full btree split on either end of the refcount range. 515 */ 516 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 517 mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); 518 if (error) 519 return error; 520 /* 521 * Recovery stashes all deferred ops during intent processing and 522 * finishes them on completion. Transfer current dfops state to this 523 * transaction and transfer the result back before we return. 524 */ 525 xfs_defer_move(tp, parent_tp); 526 cudp = xfs_trans_get_cud(tp, cuip); 527 528 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 529 refc = &cuip->cui_format.cui_extents[i]; 530 refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; 531 switch (refc_type) { 532 case XFS_REFCOUNT_INCREASE: 533 case XFS_REFCOUNT_DECREASE: 534 case XFS_REFCOUNT_ALLOC_COW: 535 case XFS_REFCOUNT_FREE_COW: 536 type = refc_type; 537 break; 538 default: 539 error = -EFSCORRUPTED; 540 goto abort_error; 541 } 542 if (requeue_only) { 543 new_fsb = refc->pe_startblock; 544 new_len = refc->pe_len; 545 } else 546 error = xfs_trans_log_finish_refcount_update(tp, cudp, 547 type, refc->pe_startblock, refc->pe_len, 548 &new_fsb, &new_len, &rcur); 549 if (error) 550 goto abort_error; 551 552 /* Requeue what we didn't finish. */ 553 if (new_len > 0) { 554 irec.br_startblock = new_fsb; 555 irec.br_blockcount = new_len; 556 switch (type) { 557 case XFS_REFCOUNT_INCREASE: 558 xfs_refcount_increase_extent(tp, &irec); 559 break; 560 case XFS_REFCOUNT_DECREASE: 561 xfs_refcount_decrease_extent(tp, &irec); 562 break; 563 case XFS_REFCOUNT_ALLOC_COW: 564 xfs_refcount_alloc_cow_extent(tp, 565 irec.br_startblock, 566 irec.br_blockcount); 567 break; 568 case XFS_REFCOUNT_FREE_COW: 569 xfs_refcount_free_cow_extent(tp, 570 irec.br_startblock, 571 irec.br_blockcount); 572 break; 573 default: 574 ASSERT(0); 575 } 576 requeue_only = true; 577 } 578 } 579 580 xfs_refcount_finish_one_cleanup(tp, rcur, error); 581 set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); 582 xfs_defer_move(parent_tp, tp); 583 error = xfs_trans_commit(tp); 584 return error; 585 586 abort_error: 587 xfs_refcount_finish_one_cleanup(tp, rcur, error); 588 xfs_defer_move(parent_tp, tp); 589 xfs_trans_cancel(tp); 590 return error; 591 } 592