1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 4 * Copyright (C) 2010 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_log_priv.h" 13 #include "xfs_trans_resv.h" 14 #include "xfs_mount.h" 15 #include "xfs_extent_busy.h" 16 #include "xfs_quota.h" 17 #include "xfs_trans.h" 18 #include "xfs_trans_priv.h" 19 #include "xfs_log.h" 20 #include "xfs_trace.h" 21 #include "xfs_error.h" 22 #include "xfs_defer.h" 23 24 kmem_zone_t *xfs_trans_zone; 25 26 #if defined(CONFIG_TRACEPOINTS) 27 static void 28 xfs_trans_trace_reservations( 29 struct xfs_mount *mp) 30 { 31 struct xfs_trans_res resv; 32 struct xfs_trans_res *res; 33 struct xfs_trans_res *end_res; 34 int i; 35 36 res = (struct xfs_trans_res *)M_RES(mp); 37 end_res = (struct xfs_trans_res *)(M_RES(mp) + 1); 38 for (i = 0; res < end_res; i++, res++) 39 trace_xfs_trans_resv_calc(mp, i, res); 40 xfs_log_get_max_trans_res(mp, &resv); 41 trace_xfs_trans_resv_calc(mp, -1, &resv); 42 } 43 #else 44 # define xfs_trans_trace_reservations(mp) 45 #endif 46 47 /* 48 * Initialize the precomputed transaction reservation values 49 * in the mount structure. 50 */ 51 void 52 xfs_trans_init( 53 struct xfs_mount *mp) 54 { 55 xfs_trans_resv_calc(mp, M_RES(mp)); 56 xfs_trans_trace_reservations(mp); 57 } 58 59 /* 60 * Free the transaction structure. If there is more clean up 61 * to do when the structure is freed, add it here. 62 */ 63 STATIC void 64 xfs_trans_free( 65 struct xfs_trans *tp) 66 { 67 xfs_extent_busy_sort(&tp->t_busy); 68 xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); 69 70 trace_xfs_trans_free(tp, _RET_IP_); 71 if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) 72 sb_end_intwrite(tp->t_mountp->m_super); 73 xfs_trans_free_dqinfo(tp); 74 kmem_cache_free(xfs_trans_zone, tp); 75 } 76 77 /* 78 * This is called to create a new transaction which will share the 79 * permanent log reservation of the given transaction. The remaining 80 * unused block and rt extent reservations are also inherited. This 81 * implies that the original transaction is no longer allowed to allocate 82 * blocks. Locks and log items, however, are no inherited. They must 83 * be added to the new transaction explicitly. 84 */ 85 STATIC struct xfs_trans * 86 xfs_trans_dup( 87 struct xfs_trans *tp) 88 { 89 struct xfs_trans *ntp; 90 91 trace_xfs_trans_dup(tp, _RET_IP_); 92 93 ntp = kmem_zone_zalloc(xfs_trans_zone, 0); 94 95 /* 96 * Initialize the new transaction structure. 97 */ 98 ntp->t_magic = XFS_TRANS_HEADER_MAGIC; 99 ntp->t_mountp = tp->t_mountp; 100 INIT_LIST_HEAD(&ntp->t_items); 101 INIT_LIST_HEAD(&ntp->t_busy); 102 INIT_LIST_HEAD(&ntp->t_dfops); 103 ntp->t_firstblock = NULLFSBLOCK; 104 105 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 106 ASSERT(tp->t_ticket != NULL); 107 108 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | 109 (tp->t_flags & XFS_TRANS_RESERVE) | 110 (tp->t_flags & XFS_TRANS_NO_WRITECOUNT); 111 /* We gave our writer reference to the new transaction */ 112 tp->t_flags |= XFS_TRANS_NO_WRITECOUNT; 113 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); 114 115 ASSERT(tp->t_blk_res >= tp->t_blk_res_used); 116 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; 117 tp->t_blk_res = tp->t_blk_res_used; 118 119 ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; 120 tp->t_rtx_res = tp->t_rtx_res_used; 121 ntp->t_pflags = tp->t_pflags; 122 123 /* move deferred ops over to the new tp */ 124 xfs_defer_move(ntp, tp); 125 126 xfs_trans_dup_dqinfo(tp, ntp); 127 return ntp; 128 } 129 130 /* 131 * This is called to reserve free disk blocks and log space for the 132 * given transaction. This must be done before allocating any resources 133 * within the transaction. 134 * 135 * This will return ENOSPC if there are not enough blocks available. 136 * It will sleep waiting for available log space. 137 * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which 138 * is used by long running transactions. If any one of the reservations 139 * fails then they will all be backed out. 140 * 141 * This does not do quota reservations. That typically is done by the 142 * caller afterwards. 143 */ 144 static int 145 xfs_trans_reserve( 146 struct xfs_trans *tp, 147 struct xfs_trans_res *resp, 148 uint blocks, 149 uint rtextents) 150 { 151 struct xfs_mount *mp = tp->t_mountp; 152 int error = 0; 153 bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 154 155 /* Mark this thread as being in a transaction */ 156 current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 157 158 /* 159 * Attempt to reserve the needed disk blocks by decrementing 160 * the number needed from the number available. This will 161 * fail if the count would go below zero. 162 */ 163 if (blocks > 0) { 164 error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); 165 if (error != 0) { 166 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 167 return -ENOSPC; 168 } 169 tp->t_blk_res += blocks; 170 } 171 172 /* 173 * Reserve the log space needed for this transaction. 174 */ 175 if (resp->tr_logres > 0) { 176 bool permanent = false; 177 178 ASSERT(tp->t_log_res == 0 || 179 tp->t_log_res == resp->tr_logres); 180 ASSERT(tp->t_log_count == 0 || 181 tp->t_log_count == resp->tr_logcount); 182 183 if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) { 184 tp->t_flags |= XFS_TRANS_PERM_LOG_RES; 185 permanent = true; 186 } else { 187 ASSERT(tp->t_ticket == NULL); 188 ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); 189 } 190 191 if (tp->t_ticket != NULL) { 192 ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES); 193 error = xfs_log_regrant(mp, tp->t_ticket); 194 } else { 195 error = xfs_log_reserve(mp, 196 resp->tr_logres, 197 resp->tr_logcount, 198 &tp->t_ticket, XFS_TRANSACTION, 199 permanent); 200 } 201 202 if (error) 203 goto undo_blocks; 204 205 tp->t_log_res = resp->tr_logres; 206 tp->t_log_count = resp->tr_logcount; 207 } 208 209 /* 210 * Attempt to reserve the needed realtime extents by decrementing 211 * the number needed from the number available. This will 212 * fail if the count would go below zero. 213 */ 214 if (rtextents > 0) { 215 error = xfs_mod_frextents(mp, -((int64_t)rtextents)); 216 if (error) { 217 error = -ENOSPC; 218 goto undo_log; 219 } 220 tp->t_rtx_res += rtextents; 221 } 222 223 return 0; 224 225 /* 226 * Error cases jump to one of these labels to undo any 227 * reservations which have already been performed. 228 */ 229 undo_log: 230 if (resp->tr_logres > 0) { 231 xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); 232 tp->t_ticket = NULL; 233 tp->t_log_res = 0; 234 tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; 235 } 236 237 undo_blocks: 238 if (blocks > 0) { 239 xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); 240 tp->t_blk_res = 0; 241 } 242 243 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 244 245 return error; 246 } 247 248 int 249 xfs_trans_alloc( 250 struct xfs_mount *mp, 251 struct xfs_trans_res *resp, 252 uint blocks, 253 uint rtextents, 254 uint flags, 255 struct xfs_trans **tpp) 256 { 257 struct xfs_trans *tp; 258 int error; 259 260 /* 261 * Allocate the handle before we do our freeze accounting and setting up 262 * GFP_NOFS allocation context so that we avoid lockdep false positives 263 * by doing GFP_KERNEL allocations inside sb_start_intwrite(). 264 */ 265 tp = kmem_zone_zalloc(xfs_trans_zone, 0); 266 if (!(flags & XFS_TRANS_NO_WRITECOUNT)) 267 sb_start_intwrite(mp->m_super); 268 269 /* 270 * Zero-reservation ("empty") transactions can't modify anything, so 271 * they're allowed to run while we're frozen. 272 */ 273 WARN_ON(resp->tr_logres > 0 && 274 mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); 275 276 tp->t_magic = XFS_TRANS_HEADER_MAGIC; 277 tp->t_flags = flags; 278 tp->t_mountp = mp; 279 INIT_LIST_HEAD(&tp->t_items); 280 INIT_LIST_HEAD(&tp->t_busy); 281 INIT_LIST_HEAD(&tp->t_dfops); 282 tp->t_firstblock = NULLFSBLOCK; 283 284 error = xfs_trans_reserve(tp, resp, blocks, rtextents); 285 if (error) { 286 xfs_trans_cancel(tp); 287 return error; 288 } 289 290 trace_xfs_trans_alloc(tp, _RET_IP_); 291 292 *tpp = tp; 293 return 0; 294 } 295 296 /* 297 * Create an empty transaction with no reservation. This is a defensive 298 * mechanism for routines that query metadata without actually modifying them -- 299 * if the metadata being queried is somehow cross-linked (think a btree block 300 * pointer that points higher in the tree), we risk deadlock. However, blocks 301 * grabbed as part of a transaction can be re-grabbed. The verifiers will 302 * notice the corrupt block and the operation will fail back to userspace 303 * without deadlocking. 304 * 305 * Note the zero-length reservation; this transaction MUST be cancelled without 306 * any dirty data. 307 * 308 * Callers should obtain freeze protection to avoid a conflict with fs freezing 309 * where we can be grabbing buffers at the same time that freeze is trying to 310 * drain the buffer LRU list. 311 */ 312 int 313 xfs_trans_alloc_empty( 314 struct xfs_mount *mp, 315 struct xfs_trans **tpp) 316 { 317 struct xfs_trans_res resv = {0}; 318 319 return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp); 320 } 321 322 /* 323 * Record the indicated change to the given field for application 324 * to the file system's superblock when the transaction commits. 325 * For now, just store the change in the transaction structure. 326 * 327 * Mark the transaction structure to indicate that the superblock 328 * needs to be updated before committing. 329 * 330 * Because we may not be keeping track of allocated/free inodes and 331 * used filesystem blocks in the superblock, we do not mark the 332 * superblock dirty in this transaction if we modify these fields. 333 * We still need to update the transaction deltas so that they get 334 * applied to the incore superblock, but we don't want them to 335 * cause the superblock to get locked and logged if these are the 336 * only fields in the superblock that the transaction modifies. 337 */ 338 void 339 xfs_trans_mod_sb( 340 xfs_trans_t *tp, 341 uint field, 342 int64_t delta) 343 { 344 uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY); 345 xfs_mount_t *mp = tp->t_mountp; 346 347 switch (field) { 348 case XFS_TRANS_SB_ICOUNT: 349 tp->t_icount_delta += delta; 350 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 351 flags &= ~XFS_TRANS_SB_DIRTY; 352 break; 353 case XFS_TRANS_SB_IFREE: 354 tp->t_ifree_delta += delta; 355 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 356 flags &= ~XFS_TRANS_SB_DIRTY; 357 break; 358 case XFS_TRANS_SB_FDBLOCKS: 359 /* 360 * Track the number of blocks allocated in the transaction. 361 * Make sure it does not exceed the number reserved. If so, 362 * shutdown as this can lead to accounting inconsistency. 363 */ 364 if (delta < 0) { 365 tp->t_blk_res_used += (uint)-delta; 366 if (tp->t_blk_res_used > tp->t_blk_res) 367 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 368 } 369 tp->t_fdblocks_delta += delta; 370 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 371 flags &= ~XFS_TRANS_SB_DIRTY; 372 break; 373 case XFS_TRANS_SB_RES_FDBLOCKS: 374 /* 375 * The allocation has already been applied to the 376 * in-core superblock's counter. This should only 377 * be applied to the on-disk superblock. 378 */ 379 tp->t_res_fdblocks_delta += delta; 380 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 381 flags &= ~XFS_TRANS_SB_DIRTY; 382 break; 383 case XFS_TRANS_SB_FREXTENTS: 384 /* 385 * Track the number of blocks allocated in the 386 * transaction. Make sure it does not exceed the 387 * number reserved. 388 */ 389 if (delta < 0) { 390 tp->t_rtx_res_used += (uint)-delta; 391 ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res); 392 } 393 tp->t_frextents_delta += delta; 394 break; 395 case XFS_TRANS_SB_RES_FREXTENTS: 396 /* 397 * The allocation has already been applied to the 398 * in-core superblock's counter. This should only 399 * be applied to the on-disk superblock. 400 */ 401 ASSERT(delta < 0); 402 tp->t_res_frextents_delta += delta; 403 break; 404 case XFS_TRANS_SB_DBLOCKS: 405 ASSERT(delta > 0); 406 tp->t_dblocks_delta += delta; 407 break; 408 case XFS_TRANS_SB_AGCOUNT: 409 ASSERT(delta > 0); 410 tp->t_agcount_delta += delta; 411 break; 412 case XFS_TRANS_SB_IMAXPCT: 413 tp->t_imaxpct_delta += delta; 414 break; 415 case XFS_TRANS_SB_REXTSIZE: 416 tp->t_rextsize_delta += delta; 417 break; 418 case XFS_TRANS_SB_RBMBLOCKS: 419 tp->t_rbmblocks_delta += delta; 420 break; 421 case XFS_TRANS_SB_RBLOCKS: 422 tp->t_rblocks_delta += delta; 423 break; 424 case XFS_TRANS_SB_REXTENTS: 425 tp->t_rextents_delta += delta; 426 break; 427 case XFS_TRANS_SB_REXTSLOG: 428 tp->t_rextslog_delta += delta; 429 break; 430 default: 431 ASSERT(0); 432 return; 433 } 434 435 tp->t_flags |= flags; 436 } 437 438 /* 439 * xfs_trans_apply_sb_deltas() is called from the commit code 440 * to bring the superblock buffer into the current transaction 441 * and modify it as requested by earlier calls to xfs_trans_mod_sb(). 442 * 443 * For now we just look at each field allowed to change and change 444 * it if necessary. 445 */ 446 STATIC void 447 xfs_trans_apply_sb_deltas( 448 xfs_trans_t *tp) 449 { 450 xfs_dsb_t *sbp; 451 xfs_buf_t *bp; 452 int whole = 0; 453 454 bp = xfs_trans_getsb(tp, tp->t_mountp); 455 sbp = bp->b_addr; 456 457 /* 458 * Check that superblock mods match the mods made to AGF counters. 459 */ 460 ASSERT((tp->t_fdblocks_delta + tp->t_res_fdblocks_delta) == 461 (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta + 462 tp->t_ag_btree_delta)); 463 464 /* 465 * Only update the superblock counters if we are logging them 466 */ 467 if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) { 468 if (tp->t_icount_delta) 469 be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta); 470 if (tp->t_ifree_delta) 471 be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta); 472 if (tp->t_fdblocks_delta) 473 be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta); 474 if (tp->t_res_fdblocks_delta) 475 be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); 476 } 477 478 if (tp->t_frextents_delta) 479 be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta); 480 if (tp->t_res_frextents_delta) 481 be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta); 482 483 if (tp->t_dblocks_delta) { 484 be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta); 485 whole = 1; 486 } 487 if (tp->t_agcount_delta) { 488 be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta); 489 whole = 1; 490 } 491 if (tp->t_imaxpct_delta) { 492 sbp->sb_imax_pct += tp->t_imaxpct_delta; 493 whole = 1; 494 } 495 if (tp->t_rextsize_delta) { 496 be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta); 497 whole = 1; 498 } 499 if (tp->t_rbmblocks_delta) { 500 be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta); 501 whole = 1; 502 } 503 if (tp->t_rblocks_delta) { 504 be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta); 505 whole = 1; 506 } 507 if (tp->t_rextents_delta) { 508 be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta); 509 whole = 1; 510 } 511 if (tp->t_rextslog_delta) { 512 sbp->sb_rextslog += tp->t_rextslog_delta; 513 whole = 1; 514 } 515 516 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); 517 if (whole) 518 /* 519 * Log the whole thing, the fields are noncontiguous. 520 */ 521 xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_dsb_t) - 1); 522 else 523 /* 524 * Since all the modifiable fields are contiguous, we 525 * can get away with this. 526 */ 527 xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), 528 offsetof(xfs_dsb_t, sb_frextents) + 529 sizeof(sbp->sb_frextents) - 1); 530 } 531 532 /* 533 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and 534 * apply superblock counter changes to the in-core superblock. The 535 * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT 536 * applied to the in-core superblock. The idea is that that has already been 537 * done. 538 * 539 * If we are not logging superblock counters, then the inode allocated/free and 540 * used block counts are not updated in the on disk superblock. In this case, 541 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we 542 * still need to update the incore superblock with the changes. 543 * 544 * Deltas for the inode count are +/-64, hence we use a large batch size of 128 545 * so we don't need to take the counter lock on every update. 546 */ 547 #define XFS_ICOUNT_BATCH 128 548 549 void 550 xfs_trans_unreserve_and_mod_sb( 551 struct xfs_trans *tp) 552 { 553 struct xfs_mount *mp = tp->t_mountp; 554 bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; 555 int64_t blkdelta = 0; 556 int64_t rtxdelta = 0; 557 int64_t idelta = 0; 558 int64_t ifreedelta = 0; 559 int error; 560 561 /* calculate deltas */ 562 if (tp->t_blk_res > 0) 563 blkdelta = tp->t_blk_res; 564 if ((tp->t_fdblocks_delta != 0) && 565 (xfs_sb_version_haslazysbcount(&mp->m_sb) || 566 (tp->t_flags & XFS_TRANS_SB_DIRTY))) 567 blkdelta += tp->t_fdblocks_delta; 568 569 if (tp->t_rtx_res > 0) 570 rtxdelta = tp->t_rtx_res; 571 if ((tp->t_frextents_delta != 0) && 572 (tp->t_flags & XFS_TRANS_SB_DIRTY)) 573 rtxdelta += tp->t_frextents_delta; 574 575 if (xfs_sb_version_haslazysbcount(&mp->m_sb) || 576 (tp->t_flags & XFS_TRANS_SB_DIRTY)) { 577 idelta = tp->t_icount_delta; 578 ifreedelta = tp->t_ifree_delta; 579 } 580 581 /* apply the per-cpu counters */ 582 if (blkdelta) { 583 error = xfs_mod_fdblocks(mp, blkdelta, rsvd); 584 ASSERT(!error); 585 } 586 587 if (idelta) { 588 percpu_counter_add_batch(&mp->m_icount, idelta, 589 XFS_ICOUNT_BATCH); 590 if (idelta < 0) 591 ASSERT(__percpu_counter_compare(&mp->m_icount, 0, 592 XFS_ICOUNT_BATCH) >= 0); 593 } 594 595 if (ifreedelta) { 596 percpu_counter_add(&mp->m_ifree, ifreedelta); 597 if (ifreedelta < 0) 598 ASSERT(percpu_counter_compare(&mp->m_ifree, 0) >= 0); 599 } 600 601 if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY)) 602 return; 603 604 /* apply remaining deltas */ 605 spin_lock(&mp->m_sb_lock); 606 mp->m_sb.sb_frextents += rtxdelta; 607 mp->m_sb.sb_dblocks += tp->t_dblocks_delta; 608 mp->m_sb.sb_agcount += tp->t_agcount_delta; 609 mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta; 610 mp->m_sb.sb_rextsize += tp->t_rextsize_delta; 611 mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta; 612 mp->m_sb.sb_rblocks += tp->t_rblocks_delta; 613 mp->m_sb.sb_rextents += tp->t_rextents_delta; 614 mp->m_sb.sb_rextslog += tp->t_rextslog_delta; 615 spin_unlock(&mp->m_sb_lock); 616 617 /* 618 * Debug checks outside of the spinlock so they don't lock up the 619 * machine if they fail. 620 */ 621 ASSERT(mp->m_sb.sb_imax_pct >= 0); 622 ASSERT(mp->m_sb.sb_rextslog >= 0); 623 return; 624 } 625 626 /* Add the given log item to the transaction's list of log items. */ 627 void 628 xfs_trans_add_item( 629 struct xfs_trans *tp, 630 struct xfs_log_item *lip) 631 { 632 ASSERT(lip->li_mountp == tp->t_mountp); 633 ASSERT(lip->li_ailp == tp->t_mountp->m_ail); 634 ASSERT(list_empty(&lip->li_trans)); 635 ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags)); 636 637 list_add_tail(&lip->li_trans, &tp->t_items); 638 trace_xfs_trans_add_item(tp, _RET_IP_); 639 } 640 641 /* 642 * Unlink the log item from the transaction. the log item is no longer 643 * considered dirty in this transaction, as the linked transaction has 644 * finished, either by abort or commit completion. 645 */ 646 void 647 xfs_trans_del_item( 648 struct xfs_log_item *lip) 649 { 650 clear_bit(XFS_LI_DIRTY, &lip->li_flags); 651 list_del_init(&lip->li_trans); 652 } 653 654 /* Detach and unlock all of the items in a transaction */ 655 static void 656 xfs_trans_free_items( 657 struct xfs_trans *tp, 658 bool abort) 659 { 660 struct xfs_log_item *lip, *next; 661 662 trace_xfs_trans_free_items(tp, _RET_IP_); 663 664 list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { 665 xfs_trans_del_item(lip); 666 if (abort) 667 set_bit(XFS_LI_ABORTED, &lip->li_flags); 668 if (lip->li_ops->iop_release) 669 lip->li_ops->iop_release(lip); 670 } 671 } 672 673 static inline void 674 xfs_log_item_batch_insert( 675 struct xfs_ail *ailp, 676 struct xfs_ail_cursor *cur, 677 struct xfs_log_item **log_items, 678 int nr_items, 679 xfs_lsn_t commit_lsn) 680 { 681 int i; 682 683 spin_lock(&ailp->ail_lock); 684 /* xfs_trans_ail_update_bulk drops ailp->ail_lock */ 685 xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); 686 687 for (i = 0; i < nr_items; i++) { 688 struct xfs_log_item *lip = log_items[i]; 689 690 if (lip->li_ops->iop_unpin) 691 lip->li_ops->iop_unpin(lip, 0); 692 } 693 } 694 695 /* 696 * Bulk operation version of xfs_trans_committed that takes a log vector of 697 * items to insert into the AIL. This uses bulk AIL insertion techniques to 698 * minimise lock traffic. 699 * 700 * If we are called with the aborted flag set, it is because a log write during 701 * a CIL checkpoint commit has failed. In this case, all the items in the 702 * checkpoint have already gone through iop_committed and iop_committing, which 703 * means that checkpoint commit abort handling is treated exactly the same 704 * as an iclog write error even though we haven't started any IO yet. Hence in 705 * this case all we need to do is iop_committed processing, followed by an 706 * iop_unpin(aborted) call. 707 * 708 * The AIL cursor is used to optimise the insert process. If commit_lsn is not 709 * at the end of the AIL, the insert cursor avoids the need to walk 710 * the AIL to find the insertion point on every xfs_log_item_batch_insert() 711 * call. This saves a lot of needless list walking and is a net win, even 712 * though it slightly increases that amount of AIL lock traffic to set it up 713 * and tear it down. 714 */ 715 void 716 xfs_trans_committed_bulk( 717 struct xfs_ail *ailp, 718 struct xfs_log_vec *log_vector, 719 xfs_lsn_t commit_lsn, 720 bool aborted) 721 { 722 #define LOG_ITEM_BATCH_SIZE 32 723 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; 724 struct xfs_log_vec *lv; 725 struct xfs_ail_cursor cur; 726 int i = 0; 727 728 spin_lock(&ailp->ail_lock); 729 xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); 730 spin_unlock(&ailp->ail_lock); 731 732 /* unpin all the log items */ 733 for (lv = log_vector; lv; lv = lv->lv_next ) { 734 struct xfs_log_item *lip = lv->lv_item; 735 xfs_lsn_t item_lsn; 736 737 if (aborted) 738 set_bit(XFS_LI_ABORTED, &lip->li_flags); 739 740 if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { 741 lip->li_ops->iop_release(lip); 742 continue; 743 } 744 745 if (lip->li_ops->iop_committed) 746 item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); 747 else 748 item_lsn = commit_lsn; 749 750 /* item_lsn of -1 means the item needs no further processing */ 751 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) 752 continue; 753 754 /* 755 * if we are aborting the operation, no point in inserting the 756 * object into the AIL as we are in a shutdown situation. 757 */ 758 if (aborted) { 759 ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount)); 760 if (lip->li_ops->iop_unpin) 761 lip->li_ops->iop_unpin(lip, 1); 762 continue; 763 } 764 765 if (item_lsn != commit_lsn) { 766 767 /* 768 * Not a bulk update option due to unusual item_lsn. 769 * Push into AIL immediately, rechecking the lsn once 770 * we have the ail lock. Then unpin the item. This does 771 * not affect the AIL cursor the bulk insert path is 772 * using. 773 */ 774 spin_lock(&ailp->ail_lock); 775 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) 776 xfs_trans_ail_update(ailp, lip, item_lsn); 777 else 778 spin_unlock(&ailp->ail_lock); 779 if (lip->li_ops->iop_unpin) 780 lip->li_ops->iop_unpin(lip, 0); 781 continue; 782 } 783 784 /* Item is a candidate for bulk AIL insert. */ 785 log_items[i++] = lv->lv_item; 786 if (i >= LOG_ITEM_BATCH_SIZE) { 787 xfs_log_item_batch_insert(ailp, &cur, log_items, 788 LOG_ITEM_BATCH_SIZE, commit_lsn); 789 i = 0; 790 } 791 } 792 793 /* make sure we insert the remainder! */ 794 if (i) 795 xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); 796 797 spin_lock(&ailp->ail_lock); 798 xfs_trans_ail_cursor_done(&cur); 799 spin_unlock(&ailp->ail_lock); 800 } 801 802 /* 803 * Commit the given transaction to the log. 804 * 805 * XFS disk error handling mechanism is not based on a typical 806 * transaction abort mechanism. Logically after the filesystem 807 * gets marked 'SHUTDOWN', we can't let any new transactions 808 * be durable - ie. committed to disk - because some metadata might 809 * be inconsistent. In such cases, this returns an error, and the 810 * caller may assume that all locked objects joined to the transaction 811 * have already been unlocked as if the commit had succeeded. 812 * Do not reference the transaction structure after this call. 813 */ 814 static int 815 __xfs_trans_commit( 816 struct xfs_trans *tp, 817 bool regrant) 818 { 819 struct xfs_mount *mp = tp->t_mountp; 820 xfs_lsn_t commit_lsn = -1; 821 int error = 0; 822 int sync = tp->t_flags & XFS_TRANS_SYNC; 823 824 trace_xfs_trans_commit(tp, _RET_IP_); 825 826 /* 827 * Finish deferred items on final commit. Only permanent transactions 828 * should ever have deferred ops. 829 */ 830 WARN_ON_ONCE(!list_empty(&tp->t_dfops) && 831 !(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); 832 if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) { 833 error = xfs_defer_finish_noroll(&tp); 834 if (error) 835 goto out_unreserve; 836 } 837 838 /* 839 * If there is nothing to be logged by the transaction, 840 * then unlock all of the items associated with the 841 * transaction and free the transaction structure. 842 * Also make sure to return any reserved blocks to 843 * the free pool. 844 */ 845 if (!(tp->t_flags & XFS_TRANS_DIRTY)) 846 goto out_unreserve; 847 848 if (XFS_FORCED_SHUTDOWN(mp)) { 849 error = -EIO; 850 goto out_unreserve; 851 } 852 853 ASSERT(tp->t_ticket != NULL); 854 855 /* 856 * If we need to update the superblock, then do it now. 857 */ 858 if (tp->t_flags & XFS_TRANS_SB_DIRTY) 859 xfs_trans_apply_sb_deltas(tp); 860 xfs_trans_apply_dquot_deltas(tp); 861 862 xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); 863 864 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 865 xfs_trans_free(tp); 866 867 /* 868 * If the transaction needs to be synchronous, then force the 869 * log out now and wait for it. 870 */ 871 if (sync) { 872 error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); 873 XFS_STATS_INC(mp, xs_trans_sync); 874 } else { 875 XFS_STATS_INC(mp, xs_trans_async); 876 } 877 878 return error; 879 880 out_unreserve: 881 xfs_trans_unreserve_and_mod_sb(tp); 882 883 /* 884 * It is indeed possible for the transaction to be not dirty but 885 * the dqinfo portion to be. All that means is that we have some 886 * (non-persistent) quota reservations that need to be unreserved. 887 */ 888 xfs_trans_unreserve_and_mod_dquots(tp); 889 if (tp->t_ticket) { 890 if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log)) 891 xfs_log_ticket_regrant(mp->m_log, tp->t_ticket); 892 else 893 xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); 894 tp->t_ticket = NULL; 895 } 896 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 897 xfs_trans_free_items(tp, !!error); 898 xfs_trans_free(tp); 899 900 XFS_STATS_INC(mp, xs_trans_empty); 901 return error; 902 } 903 904 int 905 xfs_trans_commit( 906 struct xfs_trans *tp) 907 { 908 return __xfs_trans_commit(tp, false); 909 } 910 911 /* 912 * Unlock all of the transaction's items and free the transaction. 913 * The transaction must not have modified any of its items, because 914 * there is no way to restore them to their previous state. 915 * 916 * If the transaction has made a log reservation, make sure to release 917 * it as well. 918 */ 919 void 920 xfs_trans_cancel( 921 struct xfs_trans *tp) 922 { 923 struct xfs_mount *mp = tp->t_mountp; 924 bool dirty = (tp->t_flags & XFS_TRANS_DIRTY); 925 926 trace_xfs_trans_cancel(tp, _RET_IP_); 927 928 if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) 929 xfs_defer_cancel(tp); 930 931 /* 932 * See if the caller is relying on us to shut down the 933 * filesystem. This happens in paths where we detect 934 * corruption and decide to give up. 935 */ 936 if (dirty && !XFS_FORCED_SHUTDOWN(mp)) { 937 XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); 938 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 939 } 940 #ifdef DEBUG 941 if (!dirty && !XFS_FORCED_SHUTDOWN(mp)) { 942 struct xfs_log_item *lip; 943 944 list_for_each_entry(lip, &tp->t_items, li_trans) 945 ASSERT(!(lip->li_type == XFS_LI_EFD)); 946 } 947 #endif 948 xfs_trans_unreserve_and_mod_sb(tp); 949 xfs_trans_unreserve_and_mod_dquots(tp); 950 951 if (tp->t_ticket) { 952 xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); 953 tp->t_ticket = NULL; 954 } 955 956 /* mark this thread as no longer being in a transaction */ 957 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 958 959 xfs_trans_free_items(tp, dirty); 960 xfs_trans_free(tp); 961 } 962 963 /* 964 * Roll from one trans in the sequence of PERMANENT transactions to 965 * the next: permanent transactions are only flushed out when 966 * committed with xfs_trans_commit(), but we still want as soon 967 * as possible to let chunks of it go to the log. So we commit the 968 * chunk we've been working on and get a new transaction to continue. 969 */ 970 int 971 xfs_trans_roll( 972 struct xfs_trans **tpp) 973 { 974 struct xfs_trans *trans = *tpp; 975 struct xfs_trans_res tres; 976 int error; 977 978 trace_xfs_trans_roll(trans, _RET_IP_); 979 980 /* 981 * Copy the critical parameters from one trans to the next. 982 */ 983 tres.tr_logres = trans->t_log_res; 984 tres.tr_logcount = trans->t_log_count; 985 986 *tpp = xfs_trans_dup(trans); 987 988 /* 989 * Commit the current transaction. 990 * If this commit failed, then it'd just unlock those items that 991 * are not marked ihold. That also means that a filesystem shutdown 992 * is in progress. The caller takes the responsibility to cancel 993 * the duplicate transaction that gets returned. 994 */ 995 error = __xfs_trans_commit(trans, true); 996 if (error) 997 return error; 998 999 /* 1000 * Reserve space in the log for the next transaction. 1001 * This also pushes items in the "AIL", the list of logged items, 1002 * out to disk if they are taking up space at the tail of the log 1003 * that we want to use. This requires that either nothing be locked 1004 * across this call, or that anything that is locked be logged in 1005 * the prior and the next transactions. 1006 */ 1007 tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; 1008 return xfs_trans_reserve(*tpp, &tres, 0, 0); 1009 } 1010