xref: /openbmc/linux/fs/xfs/xfs_trans_ail.c (revision 545e4006)
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dmapi.h"
27 #include "xfs_mount.h"
28 #include "xfs_trans_priv.h"
29 #include "xfs_error.h"
30 
31 STATIC void xfs_ail_insert(xfs_ail_t *, xfs_log_item_t *);
32 STATIC xfs_log_item_t * xfs_ail_delete(xfs_ail_t *, xfs_log_item_t *);
33 STATIC xfs_log_item_t * xfs_ail_min(xfs_ail_t *);
34 STATIC xfs_log_item_t * xfs_ail_next(xfs_ail_t *, xfs_log_item_t *);
35 
36 #ifdef DEBUG
37 STATIC void xfs_ail_check(xfs_ail_t *, xfs_log_item_t *);
38 #else
39 #define	xfs_ail_check(a,l)
40 #endif /* DEBUG */
41 
42 
43 /*
44  * This is called by the log manager code to determine the LSN
45  * of the tail of the log.  This is exactly the LSN of the first
46  * item in the AIL.  If the AIL is empty, then this function
47  * returns 0.
48  *
49  * We need the AIL lock in order to get a coherent read of the
50  * lsn of the last item in the AIL.
51  */
52 xfs_lsn_t
53 xfs_trans_tail_ail(
54 	xfs_mount_t	*mp)
55 {
56 	xfs_lsn_t	lsn;
57 	xfs_log_item_t	*lip;
58 
59 	spin_lock(&mp->m_ail_lock);
60 	lip = xfs_ail_min(&mp->m_ail);
61 	if (lip == NULL) {
62 		lsn = (xfs_lsn_t)0;
63 	} else {
64 		lsn = lip->li_lsn;
65 	}
66 	spin_unlock(&mp->m_ail_lock);
67 
68 	return lsn;
69 }
70 
71 /*
72  * xfs_trans_push_ail
73  *
74  * This routine is called to move the tail of the AIL forward.  It does this by
75  * trying to flush items in the AIL whose lsns are below the given
76  * threshold_lsn.
77  *
78  * the push is run asynchronously in a separate thread, so we return the tail
79  * of the log right now instead of the tail after the push. This means we will
80  * either continue right away, or we will sleep waiting on the async thread to
81  * do it's work.
82  *
83  * We do this unlocked - we only need to know whether there is anything in the
84  * AIL at the time we are called. We don't need to access the contents of
85  * any of the objects, so the lock is not needed.
86  */
87 void
88 xfs_trans_push_ail(
89 	xfs_mount_t		*mp,
90 	xfs_lsn_t		threshold_lsn)
91 {
92 	xfs_log_item_t		*lip;
93 
94 	lip = xfs_ail_min(&mp->m_ail);
95 	if (lip && !XFS_FORCED_SHUTDOWN(mp)) {
96 		if (XFS_LSN_CMP(threshold_lsn, mp->m_ail.xa_target) > 0)
97 			xfsaild_wakeup(mp, threshold_lsn);
98 	}
99 }
100 
101 /*
102  * Return the item in the AIL with the current lsn.
103  * Return the current tree generation number for use
104  * in calls to xfs_trans_next_ail().
105  */
106 STATIC xfs_log_item_t *
107 xfs_trans_first_push_ail(
108 	xfs_mount_t	*mp,
109 	int		*gen,
110 	xfs_lsn_t	lsn)
111 {
112 	xfs_log_item_t	*lip;
113 
114 	lip = xfs_ail_min(&mp->m_ail);
115 	*gen = (int)mp->m_ail.xa_gen;
116 	if (lsn == 0)
117 		return lip;
118 
119 	list_for_each_entry(lip, &mp->m_ail.xa_ail, li_ail) {
120 		if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
121 			return lip;
122 	}
123 
124 	return NULL;
125 }
126 
127 /*
128  * Function that does the work of pushing on the AIL
129  */
130 long
131 xfsaild_push(
132 	xfs_mount_t	*mp,
133 	xfs_lsn_t	*last_lsn)
134 {
135 	long		tout = 1000; /* milliseconds */
136 	xfs_lsn_t	last_pushed_lsn = *last_lsn;
137 	xfs_lsn_t	target =  mp->m_ail.xa_target;
138 	xfs_lsn_t	lsn;
139 	xfs_log_item_t	*lip;
140 	int		gen;
141 	int		restarts;
142 	int		flush_log, count, stuck;
143 
144 #define	XFS_TRANS_PUSH_AIL_RESTARTS	10
145 
146 	spin_lock(&mp->m_ail_lock);
147 	lip = xfs_trans_first_push_ail(mp, &gen, *last_lsn);
148 	if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
149 		/*
150 		 * AIL is empty or our push has reached the end.
151 		 */
152 		spin_unlock(&mp->m_ail_lock);
153 		last_pushed_lsn = 0;
154 		goto out;
155 	}
156 
157 	XFS_STATS_INC(xs_push_ail);
158 
159 	/*
160 	 * While the item we are looking at is below the given threshold
161 	 * try to flush it out. We'd like not to stop until we've at least
162 	 * tried to push on everything in the AIL with an LSN less than
163 	 * the given threshold.
164 	 *
165 	 * However, we will stop after a certain number of pushes and wait
166 	 * for a reduced timeout to fire before pushing further. This
167 	 * prevents use from spinning when we can't do anything or there is
168 	 * lots of contention on the AIL lists.
169 	 */
170 	tout = 10;
171 	lsn = lip->li_lsn;
172 	flush_log = stuck = count = restarts = 0;
173 	while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
174 		int	lock_result;
175 		/*
176 		 * If we can lock the item without sleeping, unlock the AIL
177 		 * lock and flush the item.  Then re-grab the AIL lock so we
178 		 * can look for the next item on the AIL. List changes are
179 		 * handled by the AIL lookup functions internally
180 		 *
181 		 * If we can't lock the item, either its holder will flush it
182 		 * or it is already being flushed or it is being relogged.  In
183 		 * any of these case it is being taken care of and we can just
184 		 * skip to the next item in the list.
185 		 */
186 		lock_result = IOP_TRYLOCK(lip);
187 		spin_unlock(&mp->m_ail_lock);
188 		switch (lock_result) {
189 		case XFS_ITEM_SUCCESS:
190 			XFS_STATS_INC(xs_push_ail_success);
191 			IOP_PUSH(lip);
192 			last_pushed_lsn = lsn;
193 			break;
194 
195 		case XFS_ITEM_PUSHBUF:
196 			XFS_STATS_INC(xs_push_ail_pushbuf);
197 			IOP_PUSHBUF(lip);
198 			last_pushed_lsn = lsn;
199 			break;
200 
201 		case XFS_ITEM_PINNED:
202 			XFS_STATS_INC(xs_push_ail_pinned);
203 			stuck++;
204 			flush_log = 1;
205 			break;
206 
207 		case XFS_ITEM_LOCKED:
208 			XFS_STATS_INC(xs_push_ail_locked);
209 			last_pushed_lsn = lsn;
210 			stuck++;
211 			break;
212 
213 		case XFS_ITEM_FLUSHING:
214 			XFS_STATS_INC(xs_push_ail_flushing);
215 			last_pushed_lsn = lsn;
216 			stuck++;
217 			break;
218 
219 		default:
220 			ASSERT(0);
221 			break;
222 		}
223 
224 		spin_lock(&mp->m_ail_lock);
225 		/* should we bother continuing? */
226 		if (XFS_FORCED_SHUTDOWN(mp))
227 			break;
228 		ASSERT(mp->m_log);
229 
230 		count++;
231 
232 		/*
233 		 * Are there too many items we can't do anything with?
234 		 * If we we are skipping too many items because we can't flush
235 		 * them or they are already being flushed, we back off and
236 		 * given them time to complete whatever operation is being
237 		 * done. i.e. remove pressure from the AIL while we can't make
238 		 * progress so traversals don't slow down further inserts and
239 		 * removals to/from the AIL.
240 		 *
241 		 * The value of 100 is an arbitrary magic number based on
242 		 * observation.
243 		 */
244 		if (stuck > 100)
245 			break;
246 
247 		lip = xfs_trans_next_ail(mp, lip, &gen, &restarts);
248 		if (lip == NULL)
249 			break;
250 		if (restarts > XFS_TRANS_PUSH_AIL_RESTARTS)
251 			break;
252 		lsn = lip->li_lsn;
253 	}
254 	spin_unlock(&mp->m_ail_lock);
255 
256 	if (flush_log) {
257 		/*
258 		 * If something we need to push out was pinned, then
259 		 * push out the log so it will become unpinned and
260 		 * move forward in the AIL.
261 		 */
262 		XFS_STATS_INC(xs_push_ail_flush);
263 		xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
264 	}
265 
266 	if (!count) {
267 		/* We're past our target or empty, so idle */
268 		tout = 1000;
269 	} else if (XFS_LSN_CMP(lsn, target) >= 0) {
270 		/*
271 		 * We reached the target so wait a bit longer for I/O to
272 		 * complete and remove pushed items from the AIL before we
273 		 * start the next scan from the start of the AIL.
274 		 */
275 		tout += 20;
276 		last_pushed_lsn = 0;
277 	} else if ((restarts > XFS_TRANS_PUSH_AIL_RESTARTS) ||
278 		   ((stuck * 100) / count > 90)) {
279 		/*
280 		 * Either there is a lot of contention on the AIL or we
281 		 * are stuck due to operations in progress. "Stuck" in this
282 		 * case is defined as >90% of the items we tried to push
283 		 * were stuck.
284 		 *
285 		 * Backoff a bit more to allow some I/O to complete before
286 		 * continuing from where we were.
287 		 */
288 		tout += 10;
289 	}
290 out:
291 	*last_lsn = last_pushed_lsn;
292 	return tout;
293 }	/* xfsaild_push */
294 
295 
296 /*
297  * This is to be called when an item is unlocked that may have
298  * been in the AIL.  It will wake up the first member of the AIL
299  * wait list if this item's unlocking might allow it to progress.
300  * If the item is in the AIL, then we need to get the AIL lock
301  * while doing our checking so we don't race with someone going
302  * to sleep waiting for this event in xfs_trans_push_ail().
303  */
304 void
305 xfs_trans_unlocked_item(
306 	xfs_mount_t	*mp,
307 	xfs_log_item_t	*lip)
308 {
309 	xfs_log_item_t	*min_lip;
310 
311 	/*
312 	 * If we're forcibly shutting down, we may have
313 	 * unlocked log items arbitrarily. The last thing
314 	 * we want to do is to move the tail of the log
315 	 * over some potentially valid data.
316 	 */
317 	if (!(lip->li_flags & XFS_LI_IN_AIL) ||
318 	    XFS_FORCED_SHUTDOWN(mp)) {
319 		return;
320 	}
321 
322 	/*
323 	 * This is the one case where we can call into xfs_ail_min()
324 	 * without holding the AIL lock because we only care about the
325 	 * case where we are at the tail of the AIL.  If the object isn't
326 	 * at the tail, it doesn't matter what result we get back.  This
327 	 * is slightly racy because since we were just unlocked, we could
328 	 * go to sleep between the call to xfs_ail_min and the call to
329 	 * xfs_log_move_tail, have someone else lock us, commit to us disk,
330 	 * move us out of the tail of the AIL, and then we wake up.  However,
331 	 * the call to xfs_log_move_tail() doesn't do anything if there's
332 	 * not enough free space to wake people up so we're safe calling it.
333 	 */
334 	min_lip = xfs_ail_min(&mp->m_ail);
335 
336 	if (min_lip == lip)
337 		xfs_log_move_tail(mp, 1);
338 }	/* xfs_trans_unlocked_item */
339 
340 
341 /*
342  * Update the position of the item in the AIL with the new
343  * lsn.  If it is not yet in the AIL, add it.  Otherwise, move
344  * it to its new position by removing it and re-adding it.
345  *
346  * Wakeup anyone with an lsn less than the item's lsn.  If the item
347  * we move in the AIL is the minimum one, update the tail lsn in the
348  * log manager.
349  *
350  * Increment the AIL's generation count to indicate that the tree
351  * has changed.
352  *
353  * This function must be called with the AIL lock held.  The lock
354  * is dropped before returning.
355  */
356 void
357 xfs_trans_update_ail(
358 	xfs_mount_t	*mp,
359 	xfs_log_item_t	*lip,
360 	xfs_lsn_t	lsn) __releases(mp->m_ail_lock)
361 {
362 	xfs_log_item_t		*dlip=NULL;
363 	xfs_log_item_t		*mlip;	/* ptr to minimum lip */
364 
365 	mlip = xfs_ail_min(&mp->m_ail);
366 
367 	if (lip->li_flags & XFS_LI_IN_AIL) {
368 		dlip = xfs_ail_delete(&mp->m_ail, lip);
369 		ASSERT(dlip == lip);
370 	} else {
371 		lip->li_flags |= XFS_LI_IN_AIL;
372 	}
373 
374 	lip->li_lsn = lsn;
375 
376 	xfs_ail_insert(&mp->m_ail, lip);
377 	mp->m_ail.xa_gen++;
378 
379 	if (mlip == dlip) {
380 		mlip = xfs_ail_min(&mp->m_ail);
381 		spin_unlock(&mp->m_ail_lock);
382 		xfs_log_move_tail(mp, mlip->li_lsn);
383 	} else {
384 		spin_unlock(&mp->m_ail_lock);
385 	}
386 
387 
388 }	/* xfs_trans_update_ail */
389 
390 /*
391  * Delete the given item from the AIL.  It must already be in
392  * the AIL.
393  *
394  * Wakeup anyone with an lsn less than item's lsn.    If the item
395  * we delete in the AIL is the minimum one, update the tail lsn in the
396  * log manager.
397  *
398  * Clear the IN_AIL flag from the item, reset its lsn to 0, and
399  * bump the AIL's generation count to indicate that the tree
400  * has changed.
401  *
402  * This function must be called with the AIL lock held.  The lock
403  * is dropped before returning.
404  */
405 void
406 xfs_trans_delete_ail(
407 	xfs_mount_t	*mp,
408 	xfs_log_item_t	*lip) __releases(mp->m_ail_lock)
409 {
410 	xfs_log_item_t		*dlip;
411 	xfs_log_item_t		*mlip;
412 
413 	if (lip->li_flags & XFS_LI_IN_AIL) {
414 		mlip = xfs_ail_min(&mp->m_ail);
415 		dlip = xfs_ail_delete(&mp->m_ail, lip);
416 		ASSERT(dlip == lip);
417 
418 
419 		lip->li_flags &= ~XFS_LI_IN_AIL;
420 		lip->li_lsn = 0;
421 		mp->m_ail.xa_gen++;
422 
423 		if (mlip == dlip) {
424 			mlip = xfs_ail_min(&mp->m_ail);
425 			spin_unlock(&mp->m_ail_lock);
426 			xfs_log_move_tail(mp, (mlip ? mlip->li_lsn : 0));
427 		} else {
428 			spin_unlock(&mp->m_ail_lock);
429 		}
430 	}
431 	else {
432 		/*
433 		 * If the file system is not being shutdown, we are in
434 		 * serious trouble if we get to this stage.
435 		 */
436 		if (XFS_FORCED_SHUTDOWN(mp))
437 			spin_unlock(&mp->m_ail_lock);
438 		else {
439 			xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
440 		"%s: attempting to delete a log item that is not in the AIL",
441 					__func__);
442 			spin_unlock(&mp->m_ail_lock);
443 			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
444 		}
445 	}
446 }
447 
448 
449 
450 /*
451  * Return the item in the AIL with the smallest lsn.
452  * Return the current tree generation number for use
453  * in calls to xfs_trans_next_ail().
454  */
455 xfs_log_item_t *
456 xfs_trans_first_ail(
457 	xfs_mount_t	*mp,
458 	int		*gen)
459 {
460 	xfs_log_item_t	*lip;
461 
462 	lip = xfs_ail_min(&mp->m_ail);
463 	*gen = (int)mp->m_ail.xa_gen;
464 
465 	return lip;
466 }
467 
468 /*
469  * If the generation count of the tree has not changed since the
470  * caller last took something from the AIL, then return the elmt
471  * in the tree which follows the one given.  If the count has changed,
472  * then return the minimum elmt of the AIL and bump the restarts counter
473  * if one is given.
474  */
475 xfs_log_item_t *
476 xfs_trans_next_ail(
477 	xfs_mount_t	*mp,
478 	xfs_log_item_t	*lip,
479 	int		*gen,
480 	int		*restarts)
481 {
482 	xfs_log_item_t	*nlip;
483 
484 	ASSERT(mp && lip && gen);
485 	if (mp->m_ail.xa_gen == *gen) {
486 		nlip = xfs_ail_next(&mp->m_ail, lip);
487 	} else {
488 		nlip = xfs_ail_min(&mp->m_ail);
489 		*gen = (int)mp->m_ail.xa_gen;
490 		if (restarts != NULL) {
491 			XFS_STATS_INC(xs_push_ail_restarts);
492 			(*restarts)++;
493 		}
494 	}
495 
496 	return (nlip);
497 }
498 
499 
500 /*
501  * The active item list (AIL) is a doubly linked list of log
502  * items sorted by ascending lsn.  The base of the list is
503  * a forw/back pointer pair embedded in the xfs mount structure.
504  * The base is initialized with both pointers pointing to the
505  * base.  This case always needs to be distinguished, because
506  * the base has no lsn to look at.  We almost always insert
507  * at the end of the list, so on inserts we search from the
508  * end of the list to find where the new item belongs.
509  */
510 
511 /*
512  * Initialize the doubly linked list to point only to itself.
513  */
514 int
515 xfs_trans_ail_init(
516 	xfs_mount_t	*mp)
517 {
518 	INIT_LIST_HEAD(&mp->m_ail.xa_ail);
519 	return xfsaild_start(mp);
520 }
521 
522 void
523 xfs_trans_ail_destroy(
524 	xfs_mount_t	*mp)
525 {
526 	xfsaild_stop(mp);
527 }
528 
529 /*
530  * Insert the given log item into the AIL.
531  * We almost always insert at the end of the list, so on inserts
532  * we search from the end of the list to find where the
533  * new item belongs.
534  */
535 STATIC void
536 xfs_ail_insert(
537 	xfs_ail_t	*ailp,
538 	xfs_log_item_t	*lip)
539 /* ARGSUSED */
540 {
541 	xfs_log_item_t	*next_lip;
542 
543 	/*
544 	 * If the list is empty, just insert the item.
545 	 */
546 	if (list_empty(&ailp->xa_ail)) {
547 		list_add(&lip->li_ail, &ailp->xa_ail);
548 		return;
549 	}
550 
551 	list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) {
552 		if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0)
553 			break;
554 	}
555 
556 	ASSERT((&next_lip->li_ail == &ailp->xa_ail) ||
557 	       (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) <= 0));
558 
559 	list_add(&lip->li_ail, &next_lip->li_ail);
560 
561 	xfs_ail_check(ailp, lip);
562 	return;
563 }
564 
565 /*
566  * Delete the given item from the AIL.  Return a pointer to the item.
567  */
568 /*ARGSUSED*/
569 STATIC xfs_log_item_t *
570 xfs_ail_delete(
571 	xfs_ail_t	*ailp,
572 	xfs_log_item_t	*lip)
573 /* ARGSUSED */
574 {
575 	xfs_ail_check(ailp, lip);
576 
577 	list_del(&lip->li_ail);
578 
579 	return lip;
580 }
581 
582 /*
583  * Return a pointer to the first item in the AIL.
584  * If the AIL is empty, then return NULL.
585  */
586 STATIC xfs_log_item_t *
587 xfs_ail_min(
588 	xfs_ail_t	*ailp)
589 /* ARGSUSED */
590 {
591 	if (list_empty(&ailp->xa_ail))
592 		return NULL;
593 
594 	return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
595 }
596 
597 /*
598  * Return a pointer to the item which follows
599  * the given item in the AIL.  If the given item
600  * is the last item in the list, then return NULL.
601  */
602 STATIC xfs_log_item_t *
603 xfs_ail_next(
604 	xfs_ail_t	*ailp,
605 	xfs_log_item_t	*lip)
606 /* ARGSUSED */
607 {
608 	if (lip->li_ail.next == &ailp->xa_ail)
609 		return NULL;
610 
611 	return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
612 }
613 
614 #ifdef DEBUG
615 /*
616  * Check that the list is sorted as it should be.
617  */
618 STATIC void
619 xfs_ail_check(
620 	xfs_ail_t 	*ailp,
621 	xfs_log_item_t	*lip)
622 {
623 	xfs_log_item_t	*prev_lip;
624 
625 	if (list_empty(&ailp->xa_ail))
626 		return;
627 
628 	/*
629 	 * Check the next and previous entries are valid.
630 	 */
631 	ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
632 	prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
633 	if (&prev_lip->li_ail != &ailp->xa_ail)
634 		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
635 
636 	prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
637 	if (&prev_lip->li_ail != &ailp->xa_ail)
638 		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
639 
640 
641 #ifdef XFS_TRANS_DEBUG
642 	/*
643 	 * Walk the list checking lsn ordering, and that every entry has the
644 	 * XFS_LI_IN_AIL flag set. This is really expensive, so only do it
645 	 * when specifically debugging the transaction subsystem.
646 	 */
647 	prev_lip = list_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
648 	list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
649 		if (&prev_lip->li_ail != &ailp->xa_ail)
650 			ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
651 		ASSERT((lip->li_flags & XFS_LI_IN_AIL) != 0);
652 		prev_lip = lip;
653 	}
654 #endif /* XFS_TRANS_DEBUG */
655 }
656 #endif /* DEBUG */
657