1 /* 2 * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it would be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write the Free Software Foundation, 15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 */ 17 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_log_format.h" 21 #include "xfs_shared.h" 22 #include "xfs_trans_resv.h" 23 #include "xfs_sb.h" 24 #include "xfs_ag.h" 25 #include "xfs_mount.h" 26 #include "xfs_error.h" 27 #include "xfs_alloc.h" 28 #include "xfs_extent_busy.h" 29 #include "xfs_discard.h" 30 #include "xfs_trans.h" 31 #include "xfs_trans_priv.h" 32 #include "xfs_log.h" 33 #include "xfs_log_priv.h" 34 35 /* 36 * Allocate a new ticket. Failing to get a new ticket makes it really hard to 37 * recover, so we don't allow failure here. Also, we allocate in a context that 38 * we don't want to be issuing transactions from, so we need to tell the 39 * allocation code this as well. 40 * 41 * We don't reserve any space for the ticket - we are going to steal whatever 42 * space we require from transactions as they commit. To ensure we reserve all 43 * the space required, we need to set the current reservation of the ticket to 44 * zero so that we know to steal the initial transaction overhead from the 45 * first transaction commit. 46 */ 47 static struct xlog_ticket * 48 xlog_cil_ticket_alloc( 49 struct xlog *log) 50 { 51 struct xlog_ticket *tic; 52 53 tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0, 54 KM_SLEEP|KM_NOFS); 55 tic->t_trans_type = XFS_TRANS_CHECKPOINT; 56 57 /* 58 * set the current reservation to zero so we know to steal the basic 59 * transaction overhead reservation from the first transaction commit. 60 */ 61 tic->t_curr_res = 0; 62 return tic; 63 } 64 65 /* 66 * After the first stage of log recovery is done, we know where the head and 67 * tail of the log are. We need this log initialisation done before we can 68 * initialise the first CIL checkpoint context. 69 * 70 * Here we allocate a log ticket to track space usage during a CIL push. This 71 * ticket is passed to xlog_write() directly so that we don't slowly leak log 72 * space by failing to account for space used by log headers and additional 73 * region headers for split regions. 74 */ 75 void 76 xlog_cil_init_post_recovery( 77 struct xlog *log) 78 { 79 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); 80 log->l_cilp->xc_ctx->sequence = 1; 81 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, 82 log->l_curr_block); 83 } 84 85 STATIC int 86 xlog_cil_lv_item_format( 87 struct xfs_log_item *lip, 88 struct xfs_log_vec *lv) 89 { 90 int index; 91 char *ptr; 92 93 /* format new vectors into array */ 94 lip->li_ops->iop_format(lip, lv->lv_iovecp); 95 96 /* copy data into existing array */ 97 ptr = lv->lv_buf; 98 for (index = 0; index < lv->lv_niovecs; index++) { 99 struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; 100 101 memcpy(ptr, vec->i_addr, vec->i_len); 102 vec->i_addr = ptr; 103 ptr += vec->i_len; 104 } 105 106 /* 107 * some size calculations for log vectors over-estimate, so the caller 108 * doesn't know the amount of space actually used by the item. Return 109 * the byte count to the caller so they can check and store it 110 * appropriately. 111 */ 112 return ptr - lv->lv_buf; 113 } 114 115 /* 116 * Prepare the log item for insertion into the CIL. Calculate the difference in 117 * log space and vectors it will consume, and if it is a new item pin it as 118 * well. 119 */ 120 STATIC void 121 xfs_cil_prepare_item( 122 struct xlog *log, 123 struct xfs_log_vec *lv, 124 struct xfs_log_vec *old_lv, 125 int *diff_len, 126 int *diff_iovecs) 127 { 128 /* Account for the new LV being passed in */ 129 if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { 130 *diff_len += lv->lv_buf_len; 131 *diff_iovecs += lv->lv_niovecs; 132 } 133 134 /* 135 * If there is no old LV, this is the first time we've seen the item in 136 * this CIL context and so we need to pin it. If we are replacing the 137 * old_lv, then remove the space it accounts for and free it. 138 */ 139 if (!old_lv) 140 lv->lv_item->li_ops->iop_pin(lv->lv_item); 141 else if (old_lv != lv) { 142 ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); 143 144 *diff_len -= old_lv->lv_buf_len; 145 *diff_iovecs -= old_lv->lv_niovecs; 146 kmem_free(old_lv); 147 } 148 149 /* attach new log vector to log item */ 150 lv->lv_item->li_lv = lv; 151 152 /* 153 * If this is the first time the item is being committed to the 154 * CIL, store the sequence number on the log item so we can 155 * tell in future commits whether this is the first checkpoint 156 * the item is being committed into. 157 */ 158 if (!lv->lv_item->li_seq) 159 lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; 160 } 161 162 /* 163 * Format log item into a flat buffers 164 * 165 * For delayed logging, we need to hold a formatted buffer containing all the 166 * changes on the log item. This enables us to relog the item in memory and 167 * write it out asynchronously without needing to relock the object that was 168 * modified at the time it gets written into the iclog. 169 * 170 * This function builds a vector for the changes in each log item in the 171 * transaction. It then works out the length of the buffer needed for each log 172 * item, allocates them and formats the vector for the item into the buffer. 173 * The buffer is then attached to the log item are then inserted into the 174 * Committed Item List for tracking until the next checkpoint is written out. 175 * 176 * We don't set up region headers during this process; we simply copy the 177 * regions into the flat buffer. We can do this because we still have to do a 178 * formatting step to write the regions into the iclog buffer. Writing the 179 * ophdrs during the iclog write means that we can support splitting large 180 * regions across iclog boundares without needing a change in the format of the 181 * item/region encapsulation. 182 * 183 * Hence what we need to do now is change the rewrite the vector array to point 184 * to the copied region inside the buffer we just allocated. This allows us to 185 * format the regions into the iclog as though they are being formatted 186 * directly out of the objects themselves. 187 */ 188 static void 189 xlog_cil_insert_format_items( 190 struct xlog *log, 191 struct xfs_trans *tp, 192 int *diff_len, 193 int *diff_iovecs) 194 { 195 struct xfs_log_item_desc *lidp; 196 197 198 /* Bail out if we didn't find a log item. */ 199 if (list_empty(&tp->t_items)) { 200 ASSERT(0); 201 return; 202 } 203 204 list_for_each_entry(lidp, &tp->t_items, lid_trans) { 205 struct xfs_log_item *lip = lidp->lid_item; 206 struct xfs_log_vec *lv; 207 struct xfs_log_vec *old_lv; 208 int niovecs = 0; 209 int nbytes = 0; 210 int buf_size; 211 bool ordered = false; 212 213 /* Skip items which aren't dirty in this transaction. */ 214 if (!(lidp->lid_flags & XFS_LID_DIRTY)) 215 continue; 216 217 /* get number of vecs and size of data to be stored */ 218 lip->li_ops->iop_size(lip, &niovecs, &nbytes); 219 220 /* Skip items that do not have any vectors for writing */ 221 if (!niovecs) 222 continue; 223 224 /* 225 * Ordered items need to be tracked but we do not wish to write 226 * them. We need a logvec to track the object, but we do not 227 * need an iovec or buffer to be allocated for copying data. 228 */ 229 if (niovecs == XFS_LOG_VEC_ORDERED) { 230 ordered = true; 231 niovecs = 0; 232 nbytes = 0; 233 } 234 235 /* grab the old item if it exists for reservation accounting */ 236 old_lv = lip->li_lv; 237 238 /* calc buffer size */ 239 buf_size = sizeof(struct xfs_log_vec) + nbytes + 240 niovecs * sizeof(struct xfs_log_iovec); 241 242 /* compare to existing item size */ 243 if (lip->li_lv && buf_size <= lip->li_lv->lv_size) { 244 /* same or smaller, optimise common overwrite case */ 245 lv = lip->li_lv; 246 lv->lv_next = NULL; 247 248 if (ordered) 249 goto insert; 250 251 /* 252 * set the item up as though it is a new insertion so 253 * that the space reservation accounting is correct. 254 */ 255 *diff_iovecs -= lv->lv_niovecs; 256 *diff_len -= lv->lv_buf_len; 257 258 /* Ensure the lv is set up according to ->iop_size */ 259 lv->lv_niovecs = niovecs; 260 lv->lv_buf = (char *)lv + buf_size - nbytes; 261 262 lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); 263 goto insert; 264 } 265 266 /* allocate new data chunk */ 267 lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); 268 lv->lv_item = lip; 269 lv->lv_size = buf_size; 270 lv->lv_niovecs = niovecs; 271 if (ordered) { 272 /* track as an ordered logvec */ 273 ASSERT(lip->li_lv == NULL); 274 lv->lv_buf_len = XFS_LOG_VEC_ORDERED; 275 goto insert; 276 } 277 278 /* The allocated iovec region lies beyond the log vector. */ 279 lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; 280 281 /* The allocated data region lies beyond the iovec region */ 282 lv->lv_buf = (char *)lv + buf_size - nbytes; 283 284 lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); 285 insert: 286 ASSERT(lv->lv_buf_len <= nbytes); 287 xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); 288 } 289 } 290 291 /* 292 * Insert the log items into the CIL and calculate the difference in space 293 * consumed by the item. Add the space to the checkpoint ticket and calculate 294 * if the change requires additional log metadata. If it does, take that space 295 * as well. Remove the amount of space we added to the checkpoint ticket from 296 * the current transaction ticket so that the accounting works out correctly. 297 */ 298 static void 299 xlog_cil_insert_items( 300 struct xlog *log, 301 struct xfs_trans *tp) 302 { 303 struct xfs_cil *cil = log->l_cilp; 304 struct xfs_cil_ctx *ctx = cil->xc_ctx; 305 struct xfs_log_item_desc *lidp; 306 int len = 0; 307 int diff_iovecs = 0; 308 int iclog_space; 309 310 ASSERT(tp); 311 312 /* 313 * We can do this safely because the context can't checkpoint until we 314 * are done so it doesn't matter exactly how we update the CIL. 315 */ 316 xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs); 317 318 /* 319 * Now (re-)position everything modified at the tail of the CIL. 320 * We do this here so we only need to take the CIL lock once during 321 * the transaction commit. 322 */ 323 spin_lock(&cil->xc_cil_lock); 324 list_for_each_entry(lidp, &tp->t_items, lid_trans) { 325 struct xfs_log_item *lip = lidp->lid_item; 326 327 /* Skip items which aren't dirty in this transaction. */ 328 if (!(lidp->lid_flags & XFS_LID_DIRTY)) 329 continue; 330 331 list_move_tail(&lip->li_cil, &cil->xc_cil); 332 } 333 334 /* account for space used by new iovec headers */ 335 len += diff_iovecs * sizeof(xlog_op_header_t); 336 ctx->nvecs += diff_iovecs; 337 338 /* attach the transaction to the CIL if it has any busy extents */ 339 if (!list_empty(&tp->t_busy)) 340 list_splice_init(&tp->t_busy, &ctx->busy_extents); 341 342 /* 343 * Now transfer enough transaction reservation to the context ticket 344 * for the checkpoint. The context ticket is special - the unit 345 * reservation has to grow as well as the current reservation as we 346 * steal from tickets so we can correctly determine the space used 347 * during the transaction commit. 348 */ 349 if (ctx->ticket->t_curr_res == 0) { 350 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; 351 tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res; 352 } 353 354 /* do we need space for more log record headers? */ 355 iclog_space = log->l_iclog_size - log->l_iclog_hsize; 356 if (len > 0 && (ctx->space_used / iclog_space != 357 (ctx->space_used + len) / iclog_space)) { 358 int hdrs; 359 360 hdrs = (len + iclog_space - 1) / iclog_space; 361 /* need to take into account split region headers, too */ 362 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); 363 ctx->ticket->t_unit_res += hdrs; 364 ctx->ticket->t_curr_res += hdrs; 365 tp->t_ticket->t_curr_res -= hdrs; 366 ASSERT(tp->t_ticket->t_curr_res >= len); 367 } 368 tp->t_ticket->t_curr_res -= len; 369 ctx->space_used += len; 370 371 spin_unlock(&cil->xc_cil_lock); 372 } 373 374 static void 375 xlog_cil_free_logvec( 376 struct xfs_log_vec *log_vector) 377 { 378 struct xfs_log_vec *lv; 379 380 for (lv = log_vector; lv; ) { 381 struct xfs_log_vec *next = lv->lv_next; 382 kmem_free(lv); 383 lv = next; 384 } 385 } 386 387 /* 388 * Mark all items committed and clear busy extents. We free the log vector 389 * chains in a separate pass so that we unpin the log items as quickly as 390 * possible. 391 */ 392 static void 393 xlog_cil_committed( 394 void *args, 395 int abort) 396 { 397 struct xfs_cil_ctx *ctx = args; 398 struct xfs_mount *mp = ctx->cil->xc_log->l_mp; 399 400 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, 401 ctx->start_lsn, abort); 402 403 xfs_extent_busy_sort(&ctx->busy_extents); 404 xfs_extent_busy_clear(mp, &ctx->busy_extents, 405 (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); 406 407 spin_lock(&ctx->cil->xc_push_lock); 408 list_del(&ctx->committing); 409 spin_unlock(&ctx->cil->xc_push_lock); 410 411 xlog_cil_free_logvec(ctx->lv_chain); 412 413 if (!list_empty(&ctx->busy_extents)) { 414 ASSERT(mp->m_flags & XFS_MOUNT_DISCARD); 415 416 xfs_discard_extents(mp, &ctx->busy_extents); 417 xfs_extent_busy_clear(mp, &ctx->busy_extents, false); 418 } 419 420 kmem_free(ctx); 421 } 422 423 /* 424 * Push the Committed Item List to the log. If @push_seq flag is zero, then it 425 * is a background flush and so we can chose to ignore it. Otherwise, if the 426 * current sequence is the same as @push_seq we need to do a flush. If 427 * @push_seq is less than the current sequence, then it has already been 428 * flushed and we don't need to do anything - the caller will wait for it to 429 * complete if necessary. 430 * 431 * @push_seq is a value rather than a flag because that allows us to do an 432 * unlocked check of the sequence number for a match. Hence we can allows log 433 * forces to run racily and not issue pushes for the same sequence twice. If we 434 * get a race between multiple pushes for the same sequence they will block on 435 * the first one and then abort, hence avoiding needless pushes. 436 */ 437 STATIC int 438 xlog_cil_push( 439 struct xlog *log) 440 { 441 struct xfs_cil *cil = log->l_cilp; 442 struct xfs_log_vec *lv; 443 struct xfs_cil_ctx *ctx; 444 struct xfs_cil_ctx *new_ctx; 445 struct xlog_in_core *commit_iclog; 446 struct xlog_ticket *tic; 447 int num_iovecs; 448 int error = 0; 449 struct xfs_trans_header thdr; 450 struct xfs_log_iovec lhdr; 451 struct xfs_log_vec lvhdr = { NULL }; 452 xfs_lsn_t commit_lsn; 453 xfs_lsn_t push_seq; 454 455 if (!cil) 456 return 0; 457 458 new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); 459 new_ctx->ticket = xlog_cil_ticket_alloc(log); 460 461 down_write(&cil->xc_ctx_lock); 462 ctx = cil->xc_ctx; 463 464 spin_lock(&cil->xc_push_lock); 465 push_seq = cil->xc_push_seq; 466 ASSERT(push_seq <= ctx->sequence); 467 468 /* 469 * Check if we've anything to push. If there is nothing, then we don't 470 * move on to a new sequence number and so we have to be able to push 471 * this sequence again later. 472 */ 473 if (list_empty(&cil->xc_cil)) { 474 cil->xc_push_seq = 0; 475 spin_unlock(&cil->xc_push_lock); 476 goto out_skip; 477 } 478 spin_unlock(&cil->xc_push_lock); 479 480 481 /* check for a previously pushed seqeunce */ 482 if (push_seq < cil->xc_ctx->sequence) 483 goto out_skip; 484 485 /* 486 * pull all the log vectors off the items in the CIL, and 487 * remove the items from the CIL. We don't need the CIL lock 488 * here because it's only needed on the transaction commit 489 * side which is currently locked out by the flush lock. 490 */ 491 lv = NULL; 492 num_iovecs = 0; 493 while (!list_empty(&cil->xc_cil)) { 494 struct xfs_log_item *item; 495 496 item = list_first_entry(&cil->xc_cil, 497 struct xfs_log_item, li_cil); 498 list_del_init(&item->li_cil); 499 if (!ctx->lv_chain) 500 ctx->lv_chain = item->li_lv; 501 else 502 lv->lv_next = item->li_lv; 503 lv = item->li_lv; 504 item->li_lv = NULL; 505 num_iovecs += lv->lv_niovecs; 506 } 507 508 /* 509 * initialise the new context and attach it to the CIL. Then attach 510 * the current context to the CIL committing lsit so it can be found 511 * during log forces to extract the commit lsn of the sequence that 512 * needs to be forced. 513 */ 514 INIT_LIST_HEAD(&new_ctx->committing); 515 INIT_LIST_HEAD(&new_ctx->busy_extents); 516 new_ctx->sequence = ctx->sequence + 1; 517 new_ctx->cil = cil; 518 cil->xc_ctx = new_ctx; 519 520 /* 521 * mirror the new sequence into the cil structure so that we can do 522 * unlocked checks against the current sequence in log forces without 523 * risking deferencing a freed context pointer. 524 */ 525 cil->xc_current_sequence = new_ctx->sequence; 526 527 /* 528 * The switch is now done, so we can drop the context lock and move out 529 * of a shared context. We can't just go straight to the commit record, 530 * though - we need to synchronise with previous and future commits so 531 * that the commit records are correctly ordered in the log to ensure 532 * that we process items during log IO completion in the correct order. 533 * 534 * For example, if we get an EFI in one checkpoint and the EFD in the 535 * next (e.g. due to log forces), we do not want the checkpoint with 536 * the EFD to be committed before the checkpoint with the EFI. Hence 537 * we must strictly order the commit records of the checkpoints so 538 * that: a) the checkpoint callbacks are attached to the iclogs in the 539 * correct order; and b) the checkpoints are replayed in correct order 540 * in log recovery. 541 * 542 * Hence we need to add this context to the committing context list so 543 * that higher sequences will wait for us to write out a commit record 544 * before they do. 545 */ 546 spin_lock(&cil->xc_push_lock); 547 list_add(&ctx->committing, &cil->xc_committing); 548 spin_unlock(&cil->xc_push_lock); 549 up_write(&cil->xc_ctx_lock); 550 551 /* 552 * Build a checkpoint transaction header and write it to the log to 553 * begin the transaction. We need to account for the space used by the 554 * transaction header here as it is not accounted for in xlog_write(). 555 * 556 * The LSN we need to pass to the log items on transaction commit is 557 * the LSN reported by the first log vector write. If we use the commit 558 * record lsn then we can move the tail beyond the grant write head. 559 */ 560 tic = ctx->ticket; 561 thdr.th_magic = XFS_TRANS_HEADER_MAGIC; 562 thdr.th_type = XFS_TRANS_CHECKPOINT; 563 thdr.th_tid = tic->t_tid; 564 thdr.th_num_items = num_iovecs; 565 lhdr.i_addr = &thdr; 566 lhdr.i_len = sizeof(xfs_trans_header_t); 567 lhdr.i_type = XLOG_REG_TYPE_TRANSHDR; 568 tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t); 569 570 lvhdr.lv_niovecs = 1; 571 lvhdr.lv_iovecp = &lhdr; 572 lvhdr.lv_next = ctx->lv_chain; 573 574 error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); 575 if (error) 576 goto out_abort_free_ticket; 577 578 /* 579 * now that we've written the checkpoint into the log, strictly 580 * order the commit records so replay will get them in the right order. 581 */ 582 restart: 583 spin_lock(&cil->xc_push_lock); 584 list_for_each_entry(new_ctx, &cil->xc_committing, committing) { 585 /* 586 * Higher sequences will wait for this one so skip them. 587 * Don't wait for own own sequence, either. 588 */ 589 if (new_ctx->sequence >= ctx->sequence) 590 continue; 591 if (!new_ctx->commit_lsn) { 592 /* 593 * It is still being pushed! Wait for the push to 594 * complete, then start again from the beginning. 595 */ 596 xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); 597 goto restart; 598 } 599 } 600 spin_unlock(&cil->xc_push_lock); 601 602 /* xfs_log_done always frees the ticket on error. */ 603 commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); 604 if (commit_lsn == -1) 605 goto out_abort; 606 607 /* attach all the transactions w/ busy extents to iclog */ 608 ctx->log_cb.cb_func = xlog_cil_committed; 609 ctx->log_cb.cb_arg = ctx; 610 error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb); 611 if (error) 612 goto out_abort; 613 614 /* 615 * now the checkpoint commit is complete and we've attached the 616 * callbacks to the iclog we can assign the commit LSN to the context 617 * and wake up anyone who is waiting for the commit to complete. 618 */ 619 spin_lock(&cil->xc_push_lock); 620 ctx->commit_lsn = commit_lsn; 621 wake_up_all(&cil->xc_commit_wait); 622 spin_unlock(&cil->xc_push_lock); 623 624 /* release the hounds! */ 625 return xfs_log_release_iclog(log->l_mp, commit_iclog); 626 627 out_skip: 628 up_write(&cil->xc_ctx_lock); 629 xfs_log_ticket_put(new_ctx->ticket); 630 kmem_free(new_ctx); 631 return 0; 632 633 out_abort_free_ticket: 634 xfs_log_ticket_put(tic); 635 out_abort: 636 xlog_cil_committed(ctx, XFS_LI_ABORTED); 637 return XFS_ERROR(EIO); 638 } 639 640 static void 641 xlog_cil_push_work( 642 struct work_struct *work) 643 { 644 struct xfs_cil *cil = container_of(work, struct xfs_cil, 645 xc_push_work); 646 xlog_cil_push(cil->xc_log); 647 } 648 649 /* 650 * We need to push CIL every so often so we don't cache more than we can fit in 651 * the log. The limit really is that a checkpoint can't be more than half the 652 * log (the current checkpoint is not allowed to overwrite the previous 653 * checkpoint), but commit latency and memory usage limit this to a smaller 654 * size. 655 */ 656 static void 657 xlog_cil_push_background( 658 struct xlog *log) 659 { 660 struct xfs_cil *cil = log->l_cilp; 661 662 /* 663 * The cil won't be empty because we are called while holding the 664 * context lock so whatever we added to the CIL will still be there 665 */ 666 ASSERT(!list_empty(&cil->xc_cil)); 667 668 /* 669 * don't do a background push if we haven't used up all the 670 * space available yet. 671 */ 672 if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) 673 return; 674 675 spin_lock(&cil->xc_push_lock); 676 if (cil->xc_push_seq < cil->xc_current_sequence) { 677 cil->xc_push_seq = cil->xc_current_sequence; 678 queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); 679 } 680 spin_unlock(&cil->xc_push_lock); 681 682 } 683 684 static void 685 xlog_cil_push_foreground( 686 struct xlog *log, 687 xfs_lsn_t push_seq) 688 { 689 struct xfs_cil *cil = log->l_cilp; 690 691 if (!cil) 692 return; 693 694 ASSERT(push_seq && push_seq <= cil->xc_current_sequence); 695 696 /* start on any pending background push to minimise wait time on it */ 697 flush_work(&cil->xc_push_work); 698 699 /* 700 * If the CIL is empty or we've already pushed the sequence then 701 * there's no work we need to do. 702 */ 703 spin_lock(&cil->xc_push_lock); 704 if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) { 705 spin_unlock(&cil->xc_push_lock); 706 return; 707 } 708 709 cil->xc_push_seq = push_seq; 710 spin_unlock(&cil->xc_push_lock); 711 712 /* do the push now */ 713 xlog_cil_push(log); 714 } 715 716 bool 717 xlog_cil_empty( 718 struct xlog *log) 719 { 720 struct xfs_cil *cil = log->l_cilp; 721 bool empty = false; 722 723 spin_lock(&cil->xc_push_lock); 724 if (list_empty(&cil->xc_cil)) 725 empty = true; 726 spin_unlock(&cil->xc_push_lock); 727 return empty; 728 } 729 730 /* 731 * Commit a transaction with the given vector to the Committed Item List. 732 * 733 * To do this, we need to format the item, pin it in memory if required and 734 * account for the space used by the transaction. Once we have done that we 735 * need to release the unused reservation for the transaction, attach the 736 * transaction to the checkpoint context so we carry the busy extents through 737 * to checkpoint completion, and then unlock all the items in the transaction. 738 * 739 * Called with the context lock already held in read mode to lock out 740 * background commit, returns without it held once background commits are 741 * allowed again. 742 */ 743 int 744 xfs_log_commit_cil( 745 struct xfs_mount *mp, 746 struct xfs_trans *tp, 747 xfs_lsn_t *commit_lsn, 748 int flags) 749 { 750 struct xlog *log = mp->m_log; 751 struct xfs_cil *cil = log->l_cilp; 752 int log_flags = 0; 753 754 if (flags & XFS_TRANS_RELEASE_LOG_RES) 755 log_flags = XFS_LOG_REL_PERM_RESERV; 756 757 /* lock out background commit */ 758 down_read(&cil->xc_ctx_lock); 759 760 xlog_cil_insert_items(log, tp); 761 762 /* check we didn't blow the reservation */ 763 if (tp->t_ticket->t_curr_res < 0) 764 xlog_print_tic_res(mp, tp->t_ticket); 765 766 tp->t_commit_lsn = cil->xc_ctx->sequence; 767 if (commit_lsn) 768 *commit_lsn = tp->t_commit_lsn; 769 770 xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 771 xfs_trans_unreserve_and_mod_sb(tp); 772 773 /* 774 * Once all the items of the transaction have been copied to the CIL, 775 * the items can be unlocked and freed. 776 * 777 * This needs to be done before we drop the CIL context lock because we 778 * have to update state in the log items and unlock them before they go 779 * to disk. If we don't, then the CIL checkpoint can race with us and 780 * we can run checkpoint completion before we've updated and unlocked 781 * the log items. This affects (at least) processing of stale buffers, 782 * inodes and EFIs. 783 */ 784 xfs_trans_free_items(tp, tp->t_commit_lsn, 0); 785 786 xlog_cil_push_background(log); 787 788 up_read(&cil->xc_ctx_lock); 789 return 0; 790 } 791 792 /* 793 * Conditionally push the CIL based on the sequence passed in. 794 * 795 * We only need to push if we haven't already pushed the sequence 796 * number given. Hence the only time we will trigger a push here is 797 * if the push sequence is the same as the current context. 798 * 799 * We return the current commit lsn to allow the callers to determine if a 800 * iclog flush is necessary following this call. 801 */ 802 xfs_lsn_t 803 xlog_cil_force_lsn( 804 struct xlog *log, 805 xfs_lsn_t sequence) 806 { 807 struct xfs_cil *cil = log->l_cilp; 808 struct xfs_cil_ctx *ctx; 809 xfs_lsn_t commit_lsn = NULLCOMMITLSN; 810 811 ASSERT(sequence <= cil->xc_current_sequence); 812 813 /* 814 * check to see if we need to force out the current context. 815 * xlog_cil_push() handles racing pushes for the same sequence, 816 * so no need to deal with it here. 817 */ 818 xlog_cil_push_foreground(log, sequence); 819 820 /* 821 * See if we can find a previous sequence still committing. 822 * We need to wait for all previous sequence commits to complete 823 * before allowing the force of push_seq to go ahead. Hence block 824 * on commits for those as well. 825 */ 826 restart: 827 spin_lock(&cil->xc_push_lock); 828 list_for_each_entry(ctx, &cil->xc_committing, committing) { 829 if (ctx->sequence > sequence) 830 continue; 831 if (!ctx->commit_lsn) { 832 /* 833 * It is still being pushed! Wait for the push to 834 * complete, then start again from the beginning. 835 */ 836 xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); 837 goto restart; 838 } 839 if (ctx->sequence != sequence) 840 continue; 841 /* found it! */ 842 commit_lsn = ctx->commit_lsn; 843 } 844 spin_unlock(&cil->xc_push_lock); 845 return commit_lsn; 846 } 847 848 /* 849 * Check if the current log item was first committed in this sequence. 850 * We can't rely on just the log item being in the CIL, we have to check 851 * the recorded commit sequence number. 852 * 853 * Note: for this to be used in a non-racy manner, it has to be called with 854 * CIL flushing locked out. As a result, it should only be used during the 855 * transaction commit process when deciding what to format into the item. 856 */ 857 bool 858 xfs_log_item_in_current_chkpt( 859 struct xfs_log_item *lip) 860 { 861 struct xfs_cil_ctx *ctx; 862 863 if (list_empty(&lip->li_cil)) 864 return false; 865 866 ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; 867 868 /* 869 * li_seq is written on the first commit of a log item to record the 870 * first checkpoint it is written to. Hence if it is different to the 871 * current sequence, we're in a new checkpoint. 872 */ 873 if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) 874 return false; 875 return true; 876 } 877 878 /* 879 * Perform initial CIL structure initialisation. 880 */ 881 int 882 xlog_cil_init( 883 struct xlog *log) 884 { 885 struct xfs_cil *cil; 886 struct xfs_cil_ctx *ctx; 887 888 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); 889 if (!cil) 890 return ENOMEM; 891 892 ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL); 893 if (!ctx) { 894 kmem_free(cil); 895 return ENOMEM; 896 } 897 898 INIT_WORK(&cil->xc_push_work, xlog_cil_push_work); 899 INIT_LIST_HEAD(&cil->xc_cil); 900 INIT_LIST_HEAD(&cil->xc_committing); 901 spin_lock_init(&cil->xc_cil_lock); 902 spin_lock_init(&cil->xc_push_lock); 903 init_rwsem(&cil->xc_ctx_lock); 904 init_waitqueue_head(&cil->xc_commit_wait); 905 906 INIT_LIST_HEAD(&ctx->committing); 907 INIT_LIST_HEAD(&ctx->busy_extents); 908 ctx->sequence = 1; 909 ctx->cil = cil; 910 cil->xc_ctx = ctx; 911 cil->xc_current_sequence = ctx->sequence; 912 913 cil->xc_log = log; 914 log->l_cilp = cil; 915 return 0; 916 } 917 918 void 919 xlog_cil_destroy( 920 struct xlog *log) 921 { 922 if (log->l_cilp->xc_ctx) { 923 if (log->l_cilp->xc_ctx->ticket) 924 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); 925 kmem_free(log->l_cilp->xc_ctx); 926 } 927 928 ASSERT(list_empty(&log->l_cilp->xc_cil)); 929 kmem_free(log->l_cilp); 930 } 931 932