xref: /openbmc/linux/fs/xfs/xfs_log_cil.c (revision 5d4a2e29)
1 /*
2  * Copyright (c) 2010 Red Hat, Inc. All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write the Free Software Foundation,
15  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16  */
17 
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_trans_priv.h"
26 #include "xfs_log_priv.h"
27 #include "xfs_sb.h"
28 #include "xfs_ag.h"
29 #include "xfs_dir2.h"
30 #include "xfs_dmapi.h"
31 #include "xfs_mount.h"
32 #include "xfs_error.h"
33 #include "xfs_alloc.h"
34 
35 /*
36  * Perform initial CIL structure initialisation. If the CIL is not
37  * enabled in this filesystem, ensure the log->l_cilp is null so
38  * we can check this conditional to determine if we are doing delayed
39  * logging or not.
40  */
41 int
42 xlog_cil_init(
43 	struct log	*log)
44 {
45 	struct xfs_cil	*cil;
46 	struct xfs_cil_ctx *ctx;
47 
48 	log->l_cilp = NULL;
49 	if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
50 		return 0;
51 
52 	cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
53 	if (!cil)
54 		return ENOMEM;
55 
56 	ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
57 	if (!ctx) {
58 		kmem_free(cil);
59 		return ENOMEM;
60 	}
61 
62 	INIT_LIST_HEAD(&cil->xc_cil);
63 	INIT_LIST_HEAD(&cil->xc_committing);
64 	spin_lock_init(&cil->xc_cil_lock);
65 	init_rwsem(&cil->xc_ctx_lock);
66 	sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait");
67 
68 	INIT_LIST_HEAD(&ctx->committing);
69 	INIT_LIST_HEAD(&ctx->busy_extents);
70 	ctx->sequence = 1;
71 	ctx->cil = cil;
72 	cil->xc_ctx = ctx;
73 
74 	cil->xc_log = log;
75 	log->l_cilp = cil;
76 	return 0;
77 }
78 
79 void
80 xlog_cil_destroy(
81 	struct log	*log)
82 {
83 	if (!log->l_cilp)
84 		return;
85 
86 	if (log->l_cilp->xc_ctx) {
87 		if (log->l_cilp->xc_ctx->ticket)
88 			xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
89 		kmem_free(log->l_cilp->xc_ctx);
90 	}
91 
92 	ASSERT(list_empty(&log->l_cilp->xc_cil));
93 	kmem_free(log->l_cilp);
94 }
95 
96 /*
97  * Allocate a new ticket. Failing to get a new ticket makes it really hard to
98  * recover, so we don't allow failure here. Also, we allocate in a context that
99  * we don't want to be issuing transactions from, so we need to tell the
100  * allocation code this as well.
101  *
102  * We don't reserve any space for the ticket - we are going to steal whatever
103  * space we require from transactions as they commit. To ensure we reserve all
104  * the space required, we need to set the current reservation of the ticket to
105  * zero so that we know to steal the initial transaction overhead from the
106  * first transaction commit.
107  */
108 static struct xlog_ticket *
109 xlog_cil_ticket_alloc(
110 	struct log	*log)
111 {
112 	struct xlog_ticket *tic;
113 
114 	tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
115 				KM_SLEEP|KM_NOFS);
116 	tic->t_trans_type = XFS_TRANS_CHECKPOINT;
117 
118 	/*
119 	 * set the current reservation to zero so we know to steal the basic
120 	 * transaction overhead reservation from the first transaction commit.
121 	 */
122 	tic->t_curr_res = 0;
123 	return tic;
124 }
125 
126 /*
127  * After the first stage of log recovery is done, we know where the head and
128  * tail of the log are. We need this log initialisation done before we can
129  * initialise the first CIL checkpoint context.
130  *
131  * Here we allocate a log ticket to track space usage during a CIL push.  This
132  * ticket is passed to xlog_write() directly so that we don't slowly leak log
133  * space by failing to account for space used by log headers and additional
134  * region headers for split regions.
135  */
136 void
137 xlog_cil_init_post_recovery(
138 	struct log	*log)
139 {
140 	if (!log->l_cilp)
141 		return;
142 
143 	log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
144 	log->l_cilp->xc_ctx->sequence = 1;
145 	log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
146 								log->l_curr_block);
147 }
148 
149 /*
150  * Insert the log item into the CIL and calculate the difference in space
151  * consumed by the item. Add the space to the checkpoint ticket and calculate
152  * if the change requires additional log metadata. If it does, take that space
153  * as well. Remove the amount of space we addded to the checkpoint ticket from
154  * the current transaction ticket so that the accounting works out correctly.
155  *
156  * If this is the first time the item is being placed into the CIL in this
157  * context, pin it so it can't be written to disk until the CIL is flushed to
158  * the iclog and the iclog written to disk.
159  */
160 static void
161 xlog_cil_insert(
162 	struct log		*log,
163 	struct xlog_ticket	*ticket,
164 	struct xfs_log_item	*item,
165 	struct xfs_log_vec	*lv)
166 {
167 	struct xfs_cil		*cil = log->l_cilp;
168 	struct xfs_log_vec	*old = lv->lv_item->li_lv;
169 	struct xfs_cil_ctx	*ctx = cil->xc_ctx;
170 	int			len;
171 	int			diff_iovecs;
172 	int			iclog_space;
173 
174 	if (old) {
175 		/* existing lv on log item, space used is a delta */
176 		ASSERT(!list_empty(&item->li_cil));
177 		ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
178 
179 		len = lv->lv_buf_len - old->lv_buf_len;
180 		diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
181 		kmem_free(old->lv_buf);
182 		kmem_free(old);
183 	} else {
184 		/* new lv, must pin the log item */
185 		ASSERT(!lv->lv_item->li_lv);
186 		ASSERT(list_empty(&item->li_cil));
187 
188 		len = lv->lv_buf_len;
189 		diff_iovecs = lv->lv_niovecs;
190 		IOP_PIN(lv->lv_item);
191 
192 	}
193 	len += diff_iovecs * sizeof(xlog_op_header_t);
194 
195 	/* attach new log vector to log item */
196 	lv->lv_item->li_lv = lv;
197 
198 	spin_lock(&cil->xc_cil_lock);
199 	list_move_tail(&item->li_cil, &cil->xc_cil);
200 	ctx->nvecs += diff_iovecs;
201 
202 	/*
203 	 * If this is the first time the item is being committed to the CIL,
204 	 * store the sequence number on the log item so we can tell
205 	 * in future commits whether this is the first checkpoint the item is
206 	 * being committed into.
207 	 */
208 	if (!item->li_seq)
209 		item->li_seq = ctx->sequence;
210 
211 	/*
212 	 * Now transfer enough transaction reservation to the context ticket
213 	 * for the checkpoint. The context ticket is special - the unit
214 	 * reservation has to grow as well as the current reservation as we
215 	 * steal from tickets so we can correctly determine the space used
216 	 * during the transaction commit.
217 	 */
218 	if (ctx->ticket->t_curr_res == 0) {
219 		/* first commit in checkpoint, steal the header reservation */
220 		ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
221 		ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
222 		ticket->t_curr_res -= ctx->ticket->t_unit_res;
223 	}
224 
225 	/* do we need space for more log record headers? */
226 	iclog_space = log->l_iclog_size - log->l_iclog_hsize;
227 	if (len > 0 && (ctx->space_used / iclog_space !=
228 				(ctx->space_used + len) / iclog_space)) {
229 		int hdrs;
230 
231 		hdrs = (len + iclog_space - 1) / iclog_space;
232 		/* need to take into account split region headers, too */
233 		hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
234 		ctx->ticket->t_unit_res += hdrs;
235 		ctx->ticket->t_curr_res += hdrs;
236 		ticket->t_curr_res -= hdrs;
237 		ASSERT(ticket->t_curr_res >= len);
238 	}
239 	ticket->t_curr_res -= len;
240 	ctx->space_used += len;
241 
242 	spin_unlock(&cil->xc_cil_lock);
243 }
244 
245 /*
246  * Format log item into a flat buffers
247  *
248  * For delayed logging, we need to hold a formatted buffer containing all the
249  * changes on the log item. This enables us to relog the item in memory and
250  * write it out asynchronously without needing to relock the object that was
251  * modified at the time it gets written into the iclog.
252  *
253  * This function builds a vector for the changes in each log item in the
254  * transaction. It then works out the length of the buffer needed for each log
255  * item, allocates them and formats the vector for the item into the buffer.
256  * The buffer is then attached to the log item are then inserted into the
257  * Committed Item List for tracking until the next checkpoint is written out.
258  *
259  * We don't set up region headers during this process; we simply copy the
260  * regions into the flat buffer. We can do this because we still have to do a
261  * formatting step to write the regions into the iclog buffer.  Writing the
262  * ophdrs during the iclog write means that we can support splitting large
263  * regions across iclog boundares without needing a change in the format of the
264  * item/region encapsulation.
265  *
266  * Hence what we need to do now is change the rewrite the vector array to point
267  * to the copied region inside the buffer we just allocated. This allows us to
268  * format the regions into the iclog as though they are being formatted
269  * directly out of the objects themselves.
270  */
271 static void
272 xlog_cil_format_items(
273 	struct log		*log,
274 	struct xfs_log_vec	*log_vector,
275 	struct xlog_ticket	*ticket,
276 	xfs_lsn_t		*start_lsn)
277 {
278 	struct xfs_log_vec *lv;
279 
280 	if (start_lsn)
281 		*start_lsn = log->l_cilp->xc_ctx->sequence;
282 
283 	ASSERT(log_vector);
284 	for (lv = log_vector; lv; lv = lv->lv_next) {
285 		void	*ptr;
286 		int	index;
287 		int	len = 0;
288 
289 		/* build the vector array and calculate it's length */
290 		IOP_FORMAT(lv->lv_item, lv->lv_iovecp);
291 		for (index = 0; index < lv->lv_niovecs; index++)
292 			len += lv->lv_iovecp[index].i_len;
293 
294 		lv->lv_buf_len = len;
295 		lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
296 		ptr = lv->lv_buf;
297 
298 		for (index = 0; index < lv->lv_niovecs; index++) {
299 			struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
300 
301 			memcpy(ptr, vec->i_addr, vec->i_len);
302 			vec->i_addr = ptr;
303 			ptr += vec->i_len;
304 		}
305 		ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
306 
307 		xlog_cil_insert(log, ticket, lv->lv_item, lv);
308 	}
309 }
310 
311 static void
312 xlog_cil_free_logvec(
313 	struct xfs_log_vec	*log_vector)
314 {
315 	struct xfs_log_vec	*lv;
316 
317 	for (lv = log_vector; lv; ) {
318 		struct xfs_log_vec *next = lv->lv_next;
319 		kmem_free(lv->lv_buf);
320 		kmem_free(lv);
321 		lv = next;
322 	}
323 }
324 
325 /*
326  * Commit a transaction with the given vector to the Committed Item List.
327  *
328  * To do this, we need to format the item, pin it in memory if required and
329  * account for the space used by the transaction. Once we have done that we
330  * need to release the unused reservation for the transaction, attach the
331  * transaction to the checkpoint context so we carry the busy extents through
332  * to checkpoint completion, and then unlock all the items in the transaction.
333  *
334  * For more specific information about the order of operations in
335  * xfs_log_commit_cil() please refer to the comments in
336  * xfs_trans_commit_iclog().
337  *
338  * Called with the context lock already held in read mode to lock out
339  * background commit, returns without it held once background commits are
340  * allowed again.
341  */
342 int
343 xfs_log_commit_cil(
344 	struct xfs_mount	*mp,
345 	struct xfs_trans	*tp,
346 	struct xfs_log_vec	*log_vector,
347 	xfs_lsn_t		*commit_lsn,
348 	int			flags)
349 {
350 	struct log		*log = mp->m_log;
351 	int			log_flags = 0;
352 	int			push = 0;
353 
354 	if (flags & XFS_TRANS_RELEASE_LOG_RES)
355 		log_flags = XFS_LOG_REL_PERM_RESERV;
356 
357 	if (XLOG_FORCED_SHUTDOWN(log)) {
358 		xlog_cil_free_logvec(log_vector);
359 		return XFS_ERROR(EIO);
360 	}
361 
362 	/* lock out background commit */
363 	down_read(&log->l_cilp->xc_ctx_lock);
364 	xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
365 
366 	/* check we didn't blow the reservation */
367 	if (tp->t_ticket->t_curr_res < 0)
368 		xlog_print_tic_res(log->l_mp, tp->t_ticket);
369 
370 	/* attach the transaction to the CIL if it has any busy extents */
371 	if (!list_empty(&tp->t_busy)) {
372 		spin_lock(&log->l_cilp->xc_cil_lock);
373 		list_splice_init(&tp->t_busy,
374 					&log->l_cilp->xc_ctx->busy_extents);
375 		spin_unlock(&log->l_cilp->xc_cil_lock);
376 	}
377 
378 	tp->t_commit_lsn = *commit_lsn;
379 	xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
380 	xfs_trans_unreserve_and_mod_sb(tp);
381 
382 	/* check for background commit before unlock */
383 	if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
384 		push = 1;
385 	up_read(&log->l_cilp->xc_ctx_lock);
386 
387 	/*
388 	 * We need to push CIL every so often so we don't cache more than we
389 	 * can fit in the log. The limit really is that a checkpoint can't be
390 	 * more than half the log (the current checkpoint is not allowed to
391 	 * overwrite the previous checkpoint), but commit latency and memory
392 	 * usage limit this to a smaller size in most cases.
393 	 */
394 	if (push)
395 		xlog_cil_push(log, 0);
396 	return 0;
397 }
398 
399 /*
400  * Mark all items committed and clear busy extents. We free the log vector
401  * chains in a separate pass so that we unpin the log items as quickly as
402  * possible.
403  */
404 static void
405 xlog_cil_committed(
406 	void	*args,
407 	int	abort)
408 {
409 	struct xfs_cil_ctx	*ctx = args;
410 	struct xfs_log_vec	*lv;
411 	int			abortflag = abort ? XFS_LI_ABORTED : 0;
412 	struct xfs_busy_extent	*busyp, *n;
413 
414 	/* unpin all the log items */
415 	for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) {
416 		xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
417 							abortflag);
418 	}
419 
420 	list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
421 		xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
422 
423 	spin_lock(&ctx->cil->xc_cil_lock);
424 	list_del(&ctx->committing);
425 	spin_unlock(&ctx->cil->xc_cil_lock);
426 
427 	xlog_cil_free_logvec(ctx->lv_chain);
428 	kmem_free(ctx);
429 }
430 
431 /*
432  * Push the Committed Item List to the log. If the push_now flag is not set,
433  * then it is a background flush and so we can chose to ignore it.
434  */
435 int
436 xlog_cil_push(
437 	struct log		*log,
438 	int			push_now)
439 {
440 	struct xfs_cil		*cil = log->l_cilp;
441 	struct xfs_log_vec	*lv;
442 	struct xfs_cil_ctx	*ctx;
443 	struct xfs_cil_ctx	*new_ctx;
444 	struct xlog_in_core	*commit_iclog;
445 	struct xlog_ticket	*tic;
446 	int			num_lv;
447 	int			num_iovecs;
448 	int			len;
449 	int			error = 0;
450 	struct xfs_trans_header thdr;
451 	struct xfs_log_iovec	lhdr;
452 	struct xfs_log_vec	lvhdr = { NULL };
453 	xfs_lsn_t		commit_lsn;
454 
455 	if (!cil)
456 		return 0;
457 
458 	new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
459 	new_ctx->ticket = xlog_cil_ticket_alloc(log);
460 
461 	/* lock out transaction commit, but don't block on background push */
462 	if (!down_write_trylock(&cil->xc_ctx_lock)) {
463 		if (!push_now)
464 			goto out_free_ticket;
465 		down_write(&cil->xc_ctx_lock);
466 	}
467 	ctx = cil->xc_ctx;
468 
469 	/* check if we've anything to push */
470 	if (list_empty(&cil->xc_cil))
471 		goto out_skip;
472 
473 	/* check for spurious background flush */
474 	if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
475 		goto out_skip;
476 
477 	/*
478 	 * pull all the log vectors off the items in the CIL, and
479 	 * remove the items from the CIL. We don't need the CIL lock
480 	 * here because it's only needed on the transaction commit
481 	 * side which is currently locked out by the flush lock.
482 	 */
483 	lv = NULL;
484 	num_lv = 0;
485 	num_iovecs = 0;
486 	len = 0;
487 	while (!list_empty(&cil->xc_cil)) {
488 		struct xfs_log_item	*item;
489 		int			i;
490 
491 		item = list_first_entry(&cil->xc_cil,
492 					struct xfs_log_item, li_cil);
493 		list_del_init(&item->li_cil);
494 		if (!ctx->lv_chain)
495 			ctx->lv_chain = item->li_lv;
496 		else
497 			lv->lv_next = item->li_lv;
498 		lv = item->li_lv;
499 		item->li_lv = NULL;
500 
501 		num_lv++;
502 		num_iovecs += lv->lv_niovecs;
503 		for (i = 0; i < lv->lv_niovecs; i++)
504 			len += lv->lv_iovecp[i].i_len;
505 	}
506 
507 	/*
508 	 * initialise the new context and attach it to the CIL. Then attach
509 	 * the current context to the CIL committing lsit so it can be found
510 	 * during log forces to extract the commit lsn of the sequence that
511 	 * needs to be forced.
512 	 */
513 	INIT_LIST_HEAD(&new_ctx->committing);
514 	INIT_LIST_HEAD(&new_ctx->busy_extents);
515 	new_ctx->sequence = ctx->sequence + 1;
516 	new_ctx->cil = cil;
517 	cil->xc_ctx = new_ctx;
518 
519 	/*
520 	 * The switch is now done, so we can drop the context lock and move out
521 	 * of a shared context. We can't just go straight to the commit record,
522 	 * though - we need to synchronise with previous and future commits so
523 	 * that the commit records are correctly ordered in the log to ensure
524 	 * that we process items during log IO completion in the correct order.
525 	 *
526 	 * For example, if we get an EFI in one checkpoint and the EFD in the
527 	 * next (e.g. due to log forces), we do not want the checkpoint with
528 	 * the EFD to be committed before the checkpoint with the EFI.  Hence
529 	 * we must strictly order the commit records of the checkpoints so
530 	 * that: a) the checkpoint callbacks are attached to the iclogs in the
531 	 * correct order; and b) the checkpoints are replayed in correct order
532 	 * in log recovery.
533 	 *
534 	 * Hence we need to add this context to the committing context list so
535 	 * that higher sequences will wait for us to write out a commit record
536 	 * before they do.
537 	 */
538 	spin_lock(&cil->xc_cil_lock);
539 	list_add(&ctx->committing, &cil->xc_committing);
540 	spin_unlock(&cil->xc_cil_lock);
541 	up_write(&cil->xc_ctx_lock);
542 
543 	/*
544 	 * Build a checkpoint transaction header and write it to the log to
545 	 * begin the transaction. We need to account for the space used by the
546 	 * transaction header here as it is not accounted for in xlog_write().
547 	 *
548 	 * The LSN we need to pass to the log items on transaction commit is
549 	 * the LSN reported by the first log vector write. If we use the commit
550 	 * record lsn then we can move the tail beyond the grant write head.
551 	 */
552 	tic = ctx->ticket;
553 	thdr.th_magic = XFS_TRANS_HEADER_MAGIC;
554 	thdr.th_type = XFS_TRANS_CHECKPOINT;
555 	thdr.th_tid = tic->t_tid;
556 	thdr.th_num_items = num_iovecs;
557 	lhdr.i_addr = (xfs_caddr_t)&thdr;
558 	lhdr.i_len = sizeof(xfs_trans_header_t);
559 	lhdr.i_type = XLOG_REG_TYPE_TRANSHDR;
560 	tic->t_curr_res -= lhdr.i_len + sizeof(xlog_op_header_t);
561 
562 	lvhdr.lv_niovecs = 1;
563 	lvhdr.lv_iovecp = &lhdr;
564 	lvhdr.lv_next = ctx->lv_chain;
565 
566 	error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
567 	if (error)
568 		goto out_abort;
569 
570 	/*
571 	 * now that we've written the checkpoint into the log, strictly
572 	 * order the commit records so replay will get them in the right order.
573 	 */
574 restart:
575 	spin_lock(&cil->xc_cil_lock);
576 	list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
577 		/*
578 		 * Higher sequences will wait for this one so skip them.
579 		 * Don't wait for own own sequence, either.
580 		 */
581 		if (new_ctx->sequence >= ctx->sequence)
582 			continue;
583 		if (!new_ctx->commit_lsn) {
584 			/*
585 			 * It is still being pushed! Wait for the push to
586 			 * complete, then start again from the beginning.
587 			 */
588 			sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
589 			goto restart;
590 		}
591 	}
592 	spin_unlock(&cil->xc_cil_lock);
593 
594 	commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
595 	if (error || commit_lsn == -1)
596 		goto out_abort;
597 
598 	/* attach all the transactions w/ busy extents to iclog */
599 	ctx->log_cb.cb_func = xlog_cil_committed;
600 	ctx->log_cb.cb_arg = ctx;
601 	error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb);
602 	if (error)
603 		goto out_abort;
604 
605 	/*
606 	 * now the checkpoint commit is complete and we've attached the
607 	 * callbacks to the iclog we can assign the commit LSN to the context
608 	 * and wake up anyone who is waiting for the commit to complete.
609 	 */
610 	spin_lock(&cil->xc_cil_lock);
611 	ctx->commit_lsn = commit_lsn;
612 	sv_broadcast(&cil->xc_commit_wait);
613 	spin_unlock(&cil->xc_cil_lock);
614 
615 	/* release the hounds! */
616 	return xfs_log_release_iclog(log->l_mp, commit_iclog);
617 
618 out_skip:
619 	up_write(&cil->xc_ctx_lock);
620 out_free_ticket:
621 	xfs_log_ticket_put(new_ctx->ticket);
622 	kmem_free(new_ctx);
623 	return 0;
624 
625 out_abort:
626 	xlog_cil_committed(ctx, XFS_LI_ABORTED);
627 	return XFS_ERROR(EIO);
628 }
629 
630 /*
631  * Conditionally push the CIL based on the sequence passed in.
632  *
633  * We only need to push if we haven't already pushed the sequence
634  * number given. Hence the only time we will trigger a push here is
635  * if the push sequence is the same as the current context.
636  *
637  * We return the current commit lsn to allow the callers to determine if a
638  * iclog flush is necessary following this call.
639  *
640  * XXX: Initially, just push the CIL unconditionally and return whatever
641  * commit lsn is there. It'll be empty, so this is broken for now.
642  */
643 xfs_lsn_t
644 xlog_cil_push_lsn(
645 	struct log	*log,
646 	xfs_lsn_t	push_seq)
647 {
648 	struct xfs_cil		*cil = log->l_cilp;
649 	struct xfs_cil_ctx	*ctx;
650 	xfs_lsn_t		commit_lsn = NULLCOMMITLSN;
651 
652 restart:
653 	down_write(&cil->xc_ctx_lock);
654 	ASSERT(push_seq <= cil->xc_ctx->sequence);
655 
656 	/* check to see if we need to force out the current context */
657 	if (push_seq == cil->xc_ctx->sequence) {
658 		up_write(&cil->xc_ctx_lock);
659 		xlog_cil_push(log, 1);
660 		goto restart;
661 	}
662 
663 	/*
664 	 * See if we can find a previous sequence still committing.
665 	 * We can drop the flush lock as soon as we have the cil lock
666 	 * because we are now only comparing contexts protected by
667 	 * the cil lock.
668 	 *
669 	 * We need to wait for all previous sequence commits to complete
670 	 * before allowing the force of push_seq to go ahead. Hence block
671 	 * on commits for those as well.
672 	 */
673 	spin_lock(&cil->xc_cil_lock);
674 	up_write(&cil->xc_ctx_lock);
675 	list_for_each_entry(ctx, &cil->xc_committing, committing) {
676 		if (ctx->sequence > push_seq)
677 			continue;
678 		if (!ctx->commit_lsn) {
679 			/*
680 			 * It is still being pushed! Wait for the push to
681 			 * complete, then start again from the beginning.
682 			 */
683 			sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
684 			goto restart;
685 		}
686 		if (ctx->sequence != push_seq)
687 			continue;
688 		/* found it! */
689 		commit_lsn = ctx->commit_lsn;
690 	}
691 	spin_unlock(&cil->xc_cil_lock);
692 	return commit_lsn;
693 }
694 
695 /*
696  * Check if the current log item was first committed in this sequence.
697  * We can't rely on just the log item being in the CIL, we have to check
698  * the recorded commit sequence number.
699  *
700  * Note: for this to be used in a non-racy manner, it has to be called with
701  * CIL flushing locked out. As a result, it should only be used during the
702  * transaction commit process when deciding what to format into the item.
703  */
704 bool
705 xfs_log_item_in_current_chkpt(
706 	struct xfs_log_item *lip)
707 {
708 	struct xfs_cil_ctx *ctx;
709 
710 	if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
711 		return false;
712 	if (list_empty(&lip->li_cil))
713 		return false;
714 
715 	ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
716 
717 	/*
718 	 * li_seq is written on the first commit of a log item to record the
719 	 * first checkpoint it is written to. Hence if it is different to the
720 	 * current sequence, we're in a new checkpoint.
721 	 */
722 	if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
723 		return false;
724 	return true;
725 }
726