1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_bit.h" 12 #include "xfs_shared.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_trans.h" 16 #include "xfs_trans_priv.h" 17 #include "xfs_rmap_item.h" 18 #include "xfs_log.h" 19 #include "xfs_rmap.h" 20 #include "xfs_error.h" 21 22 kmem_zone_t *xfs_rui_zone; 23 kmem_zone_t *xfs_rud_zone; 24 25 static inline struct xfs_rui_log_item *RUI_ITEM(struct xfs_log_item *lip) 26 { 27 return container_of(lip, struct xfs_rui_log_item, rui_item); 28 } 29 30 void 31 xfs_rui_item_free( 32 struct xfs_rui_log_item *ruip) 33 { 34 if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) 35 kmem_free(ruip); 36 else 37 kmem_cache_free(xfs_rui_zone, ruip); 38 } 39 40 /* 41 * Freeing the RUI requires that we remove it from the AIL if it has already 42 * been placed there. However, the RUI may not yet have been placed in the AIL 43 * when called by xfs_rui_release() from RUD processing due to the ordering of 44 * committed vs unpin operations in bulk insert operations. Hence the reference 45 * count to ensure only the last caller frees the RUI. 46 */ 47 void 48 xfs_rui_release( 49 struct xfs_rui_log_item *ruip) 50 { 51 ASSERT(atomic_read(&ruip->rui_refcount) > 0); 52 if (atomic_dec_and_test(&ruip->rui_refcount)) { 53 xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); 54 xfs_rui_item_free(ruip); 55 } 56 } 57 58 STATIC void 59 xfs_rui_item_size( 60 struct xfs_log_item *lip, 61 int *nvecs, 62 int *nbytes) 63 { 64 struct xfs_rui_log_item *ruip = RUI_ITEM(lip); 65 66 *nvecs += 1; 67 *nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents); 68 } 69 70 /* 71 * This is called to fill in the vector of log iovecs for the 72 * given rui log item. We use only 1 iovec, and we point that 73 * at the rui_log_format structure embedded in the rui item. 74 * It is at this point that we assert that all of the extent 75 * slots in the rui item have been filled. 76 */ 77 STATIC void 78 xfs_rui_item_format( 79 struct xfs_log_item *lip, 80 struct xfs_log_vec *lv) 81 { 82 struct xfs_rui_log_item *ruip = RUI_ITEM(lip); 83 struct xfs_log_iovec *vecp = NULL; 84 85 ASSERT(atomic_read(&ruip->rui_next_extent) == 86 ruip->rui_format.rui_nextents); 87 88 ruip->rui_format.rui_type = XFS_LI_RUI; 89 ruip->rui_format.rui_size = 1; 90 91 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, 92 xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents)); 93 } 94 95 /* 96 * The unpin operation is the last place an RUI is manipulated in the log. It is 97 * either inserted in the AIL or aborted in the event of a log I/O error. In 98 * either case, the RUI transaction has been successfully committed to make it 99 * this far. Therefore, we expect whoever committed the RUI to either construct 100 * and commit the RUD or drop the RUD's reference in the event of error. Simply 101 * drop the log's RUI reference now that the log is done with it. 102 */ 103 STATIC void 104 xfs_rui_item_unpin( 105 struct xfs_log_item *lip, 106 int remove) 107 { 108 struct xfs_rui_log_item *ruip = RUI_ITEM(lip); 109 110 xfs_rui_release(ruip); 111 } 112 113 /* 114 * The RUI has been either committed or aborted if the transaction has been 115 * cancelled. If the transaction was cancelled, an RUD isn't going to be 116 * constructed and thus we free the RUI here directly. 117 */ 118 STATIC void 119 xfs_rui_item_release( 120 struct xfs_log_item *lip) 121 { 122 xfs_rui_release(RUI_ITEM(lip)); 123 } 124 125 static const struct xfs_item_ops xfs_rui_item_ops = { 126 .iop_size = xfs_rui_item_size, 127 .iop_format = xfs_rui_item_format, 128 .iop_unpin = xfs_rui_item_unpin, 129 .iop_release = xfs_rui_item_release, 130 }; 131 132 /* 133 * Allocate and initialize an rui item with the given number of extents. 134 */ 135 struct xfs_rui_log_item * 136 xfs_rui_init( 137 struct xfs_mount *mp, 138 uint nextents) 139 140 { 141 struct xfs_rui_log_item *ruip; 142 143 ASSERT(nextents > 0); 144 if (nextents > XFS_RUI_MAX_FAST_EXTENTS) 145 ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), 0); 146 else 147 ruip = kmem_zone_zalloc(xfs_rui_zone, 0); 148 149 xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); 150 ruip->rui_format.rui_nextents = nextents; 151 ruip->rui_format.rui_id = (uintptr_t)(void *)ruip; 152 atomic_set(&ruip->rui_next_extent, 0); 153 atomic_set(&ruip->rui_refcount, 2); 154 155 return ruip; 156 } 157 158 /* 159 * Copy an RUI format buffer from the given buf, and into the destination 160 * RUI format structure. The RUI/RUD items were designed not to need any 161 * special alignment handling. 162 */ 163 int 164 xfs_rui_copy_format( 165 struct xfs_log_iovec *buf, 166 struct xfs_rui_log_format *dst_rui_fmt) 167 { 168 struct xfs_rui_log_format *src_rui_fmt; 169 uint len; 170 171 src_rui_fmt = buf->i_addr; 172 len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); 173 174 if (buf->i_len != len) { 175 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); 176 return -EFSCORRUPTED; 177 } 178 179 memcpy(dst_rui_fmt, src_rui_fmt, len); 180 return 0; 181 } 182 183 static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip) 184 { 185 return container_of(lip, struct xfs_rud_log_item, rud_item); 186 } 187 188 STATIC void 189 xfs_rud_item_size( 190 struct xfs_log_item *lip, 191 int *nvecs, 192 int *nbytes) 193 { 194 *nvecs += 1; 195 *nbytes += sizeof(struct xfs_rud_log_format); 196 } 197 198 /* 199 * This is called to fill in the vector of log iovecs for the 200 * given rud log item. We use only 1 iovec, and we point that 201 * at the rud_log_format structure embedded in the rud item. 202 * It is at this point that we assert that all of the extent 203 * slots in the rud item have been filled. 204 */ 205 STATIC void 206 xfs_rud_item_format( 207 struct xfs_log_item *lip, 208 struct xfs_log_vec *lv) 209 { 210 struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 211 struct xfs_log_iovec *vecp = NULL; 212 213 rudp->rud_format.rud_type = XFS_LI_RUD; 214 rudp->rud_format.rud_size = 1; 215 216 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format, 217 sizeof(struct xfs_rud_log_format)); 218 } 219 220 /* 221 * The RUD is either committed or aborted if the transaction is cancelled. If 222 * the transaction is cancelled, drop our reference to the RUI and free the 223 * RUD. 224 */ 225 STATIC void 226 xfs_rud_item_release( 227 struct xfs_log_item *lip) 228 { 229 struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 230 231 xfs_rui_release(rudp->rud_ruip); 232 kmem_cache_free(xfs_rud_zone, rudp); 233 } 234 235 static const struct xfs_item_ops xfs_rud_item_ops = { 236 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, 237 .iop_size = xfs_rud_item_size, 238 .iop_format = xfs_rud_item_format, 239 .iop_release = xfs_rud_item_release, 240 }; 241 242 static struct xfs_rud_log_item * 243 xfs_trans_get_rud( 244 struct xfs_trans *tp, 245 struct xfs_rui_log_item *ruip) 246 { 247 struct xfs_rud_log_item *rudp; 248 249 rudp = kmem_zone_zalloc(xfs_rud_zone, 0); 250 xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, 251 &xfs_rud_item_ops); 252 rudp->rud_ruip = ruip; 253 rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; 254 255 xfs_trans_add_item(tp, &rudp->rud_item); 256 return rudp; 257 } 258 259 /* Set the map extent flags for this reverse mapping. */ 260 static void 261 xfs_trans_set_rmap_flags( 262 struct xfs_map_extent *rmap, 263 enum xfs_rmap_intent_type type, 264 int whichfork, 265 xfs_exntst_t state) 266 { 267 rmap->me_flags = 0; 268 if (state == XFS_EXT_UNWRITTEN) 269 rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; 270 if (whichfork == XFS_ATTR_FORK) 271 rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; 272 switch (type) { 273 case XFS_RMAP_MAP: 274 rmap->me_flags |= XFS_RMAP_EXTENT_MAP; 275 break; 276 case XFS_RMAP_MAP_SHARED: 277 rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; 278 break; 279 case XFS_RMAP_UNMAP: 280 rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; 281 break; 282 case XFS_RMAP_UNMAP_SHARED: 283 rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; 284 break; 285 case XFS_RMAP_CONVERT: 286 rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; 287 break; 288 case XFS_RMAP_CONVERT_SHARED: 289 rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; 290 break; 291 case XFS_RMAP_ALLOC: 292 rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; 293 break; 294 case XFS_RMAP_FREE: 295 rmap->me_flags |= XFS_RMAP_EXTENT_FREE; 296 break; 297 default: 298 ASSERT(0); 299 } 300 } 301 302 /* 303 * Finish an rmap update and log it to the RUD. Note that the transaction is 304 * marked dirty regardless of whether the rmap update succeeds or fails to 305 * support the RUI/RUD lifecycle rules. 306 */ 307 static int 308 xfs_trans_log_finish_rmap_update( 309 struct xfs_trans *tp, 310 struct xfs_rud_log_item *rudp, 311 enum xfs_rmap_intent_type type, 312 uint64_t owner, 313 int whichfork, 314 xfs_fileoff_t startoff, 315 xfs_fsblock_t startblock, 316 xfs_filblks_t blockcount, 317 xfs_exntst_t state, 318 struct xfs_btree_cur **pcur) 319 { 320 int error; 321 322 error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, 323 startblock, blockcount, state, pcur); 324 325 /* 326 * Mark the transaction dirty, even on error. This ensures the 327 * transaction is aborted, which: 328 * 329 * 1.) releases the RUI and frees the RUD 330 * 2.) shuts down the filesystem 331 */ 332 tp->t_flags |= XFS_TRANS_DIRTY; 333 set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); 334 335 return error; 336 } 337 338 /* Sort rmap intents by AG. */ 339 static int 340 xfs_rmap_update_diff_items( 341 void *priv, 342 struct list_head *a, 343 struct list_head *b) 344 { 345 struct xfs_mount *mp = priv; 346 struct xfs_rmap_intent *ra; 347 struct xfs_rmap_intent *rb; 348 349 ra = container_of(a, struct xfs_rmap_intent, ri_list); 350 rb = container_of(b, struct xfs_rmap_intent, ri_list); 351 return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - 352 XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); 353 } 354 355 /* Get an RUI. */ 356 STATIC void * 357 xfs_rmap_update_create_intent( 358 struct xfs_trans *tp, 359 unsigned int count) 360 { 361 struct xfs_rui_log_item *ruip; 362 363 ASSERT(tp != NULL); 364 ASSERT(count > 0); 365 366 ruip = xfs_rui_init(tp->t_mountp, count); 367 ASSERT(ruip != NULL); 368 369 /* 370 * Get a log_item_desc to point at the new item. 371 */ 372 xfs_trans_add_item(tp, &ruip->rui_item); 373 return ruip; 374 } 375 376 /* Log rmap updates in the intent item. */ 377 STATIC void 378 xfs_rmap_update_log_item( 379 struct xfs_trans *tp, 380 void *intent, 381 struct list_head *item) 382 { 383 struct xfs_rui_log_item *ruip = intent; 384 struct xfs_rmap_intent *rmap; 385 uint next_extent; 386 struct xfs_map_extent *map; 387 388 rmap = container_of(item, struct xfs_rmap_intent, ri_list); 389 390 tp->t_flags |= XFS_TRANS_DIRTY; 391 set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); 392 393 /* 394 * atomic_inc_return gives us the value after the increment; 395 * we want to use it as an array index so we need to subtract 1 from 396 * it. 397 */ 398 next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; 399 ASSERT(next_extent < ruip->rui_format.rui_nextents); 400 map = &ruip->rui_format.rui_extents[next_extent]; 401 map->me_owner = rmap->ri_owner; 402 map->me_startblock = rmap->ri_bmap.br_startblock; 403 map->me_startoff = rmap->ri_bmap.br_startoff; 404 map->me_len = rmap->ri_bmap.br_blockcount; 405 xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, 406 rmap->ri_bmap.br_state); 407 } 408 409 /* Get an RUD so we can process all the deferred rmap updates. */ 410 STATIC void * 411 xfs_rmap_update_create_done( 412 struct xfs_trans *tp, 413 void *intent, 414 unsigned int count) 415 { 416 return xfs_trans_get_rud(tp, intent); 417 } 418 419 /* Process a deferred rmap update. */ 420 STATIC int 421 xfs_rmap_update_finish_item( 422 struct xfs_trans *tp, 423 struct list_head *item, 424 void *done_item, 425 void **state) 426 { 427 struct xfs_rmap_intent *rmap; 428 int error; 429 430 rmap = container_of(item, struct xfs_rmap_intent, ri_list); 431 error = xfs_trans_log_finish_rmap_update(tp, done_item, 432 rmap->ri_type, 433 rmap->ri_owner, rmap->ri_whichfork, 434 rmap->ri_bmap.br_startoff, 435 rmap->ri_bmap.br_startblock, 436 rmap->ri_bmap.br_blockcount, 437 rmap->ri_bmap.br_state, 438 (struct xfs_btree_cur **)state); 439 kmem_free(rmap); 440 return error; 441 } 442 443 /* Clean up after processing deferred rmaps. */ 444 STATIC void 445 xfs_rmap_update_finish_cleanup( 446 struct xfs_trans *tp, 447 void *state, 448 int error) 449 { 450 struct xfs_btree_cur *rcur = state; 451 452 xfs_rmap_finish_one_cleanup(tp, rcur, error); 453 } 454 455 /* Abort all pending RUIs. */ 456 STATIC void 457 xfs_rmap_update_abort_intent( 458 void *intent) 459 { 460 xfs_rui_release(intent); 461 } 462 463 /* Cancel a deferred rmap update. */ 464 STATIC void 465 xfs_rmap_update_cancel_item( 466 struct list_head *item) 467 { 468 struct xfs_rmap_intent *rmap; 469 470 rmap = container_of(item, struct xfs_rmap_intent, ri_list); 471 kmem_free(rmap); 472 } 473 474 const struct xfs_defer_op_type xfs_rmap_update_defer_type = { 475 .max_items = XFS_RUI_MAX_FAST_EXTENTS, 476 .diff_items = xfs_rmap_update_diff_items, 477 .create_intent = xfs_rmap_update_create_intent, 478 .abort_intent = xfs_rmap_update_abort_intent, 479 .log_item = xfs_rmap_update_log_item, 480 .create_done = xfs_rmap_update_create_done, 481 .finish_item = xfs_rmap_update_finish_item, 482 .finish_cleanup = xfs_rmap_update_finish_cleanup, 483 .cancel_item = xfs_rmap_update_cancel_item, 484 }; 485 486 /* 487 * Process an rmap update intent item that was recovered from the log. 488 * We need to update the rmapbt. 489 */ 490 int 491 xfs_rui_recover( 492 struct xfs_mount *mp, 493 struct xfs_rui_log_item *ruip) 494 { 495 int i; 496 int error = 0; 497 struct xfs_map_extent *rmap; 498 xfs_fsblock_t startblock_fsb; 499 bool op_ok; 500 struct xfs_rud_log_item *rudp; 501 enum xfs_rmap_intent_type type; 502 int whichfork; 503 xfs_exntst_t state; 504 struct xfs_trans *tp; 505 struct xfs_btree_cur *rcur = NULL; 506 507 ASSERT(!test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags)); 508 509 /* 510 * First check the validity of the extents described by the 511 * RUI. If any are bad, then assume that all are bad and 512 * just toss the RUI. 513 */ 514 for (i = 0; i < ruip->rui_format.rui_nextents; i++) { 515 rmap = &ruip->rui_format.rui_extents[i]; 516 startblock_fsb = XFS_BB_TO_FSB(mp, 517 XFS_FSB_TO_DADDR(mp, rmap->me_startblock)); 518 switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { 519 case XFS_RMAP_EXTENT_MAP: 520 case XFS_RMAP_EXTENT_MAP_SHARED: 521 case XFS_RMAP_EXTENT_UNMAP: 522 case XFS_RMAP_EXTENT_UNMAP_SHARED: 523 case XFS_RMAP_EXTENT_CONVERT: 524 case XFS_RMAP_EXTENT_CONVERT_SHARED: 525 case XFS_RMAP_EXTENT_ALLOC: 526 case XFS_RMAP_EXTENT_FREE: 527 op_ok = true; 528 break; 529 default: 530 op_ok = false; 531 break; 532 } 533 if (!op_ok || startblock_fsb == 0 || 534 rmap->me_len == 0 || 535 startblock_fsb >= mp->m_sb.sb_dblocks || 536 rmap->me_len >= mp->m_sb.sb_agblocks || 537 (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) { 538 /* 539 * This will pull the RUI from the AIL and 540 * free the memory associated with it. 541 */ 542 set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); 543 xfs_rui_release(ruip); 544 return -EFSCORRUPTED; 545 } 546 } 547 548 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 549 mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp); 550 if (error) 551 return error; 552 rudp = xfs_trans_get_rud(tp, ruip); 553 554 for (i = 0; i < ruip->rui_format.rui_nextents; i++) { 555 rmap = &ruip->rui_format.rui_extents[i]; 556 state = (rmap->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ? 557 XFS_EXT_UNWRITTEN : XFS_EXT_NORM; 558 whichfork = (rmap->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ? 559 XFS_ATTR_FORK : XFS_DATA_FORK; 560 switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { 561 case XFS_RMAP_EXTENT_MAP: 562 type = XFS_RMAP_MAP; 563 break; 564 case XFS_RMAP_EXTENT_MAP_SHARED: 565 type = XFS_RMAP_MAP_SHARED; 566 break; 567 case XFS_RMAP_EXTENT_UNMAP: 568 type = XFS_RMAP_UNMAP; 569 break; 570 case XFS_RMAP_EXTENT_UNMAP_SHARED: 571 type = XFS_RMAP_UNMAP_SHARED; 572 break; 573 case XFS_RMAP_EXTENT_CONVERT: 574 type = XFS_RMAP_CONVERT; 575 break; 576 case XFS_RMAP_EXTENT_CONVERT_SHARED: 577 type = XFS_RMAP_CONVERT_SHARED; 578 break; 579 case XFS_RMAP_EXTENT_ALLOC: 580 type = XFS_RMAP_ALLOC; 581 break; 582 case XFS_RMAP_EXTENT_FREE: 583 type = XFS_RMAP_FREE; 584 break; 585 default: 586 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL); 587 error = -EFSCORRUPTED; 588 goto abort_error; 589 } 590 error = xfs_trans_log_finish_rmap_update(tp, rudp, type, 591 rmap->me_owner, whichfork, 592 rmap->me_startoff, rmap->me_startblock, 593 rmap->me_len, state, &rcur); 594 if (error) 595 goto abort_error; 596 597 } 598 599 xfs_rmap_finish_one_cleanup(tp, rcur, error); 600 set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags); 601 error = xfs_trans_commit(tp); 602 return error; 603 604 abort_error: 605 xfs_rmap_finish_one_cleanup(tp, rcur, error); 606 xfs_trans_cancel(tp); 607 return error; 608 } 609