xref: /openbmc/linux/fs/xfs/xfs_trans_buf.c (revision 87c2ce3b)
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir_sf.h"
35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h"
38 #include "xfs_inode.h"
39 #include "xfs_buf_item.h"
40 #include "xfs_trans_priv.h"
41 #include "xfs_error.h"
42 #include "xfs_rw.h"
43 
44 
45 STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
46 		xfs_daddr_t, int);
47 STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *,
48 		xfs_daddr_t, int);
49 
50 
51 /*
52  * Get and lock the buffer for the caller if it is not already
53  * locked within the given transaction.  If it is already locked
54  * within the transaction, just increment its lock recursion count
55  * and return a pointer to it.
56  *
57  * Use the fast path function xfs_trans_buf_item_match() or the buffer
58  * cache routine incore_match() to find the buffer
59  * if it is already owned by this transaction.
60  *
61  * If we don't already own the buffer, use get_buf() to get it.
62  * If it doesn't yet have an associated xfs_buf_log_item structure,
63  * then allocate one and add the item to this transaction.
64  *
65  * If the transaction pointer is NULL, make this just a normal
66  * get_buf() call.
67  */
68 xfs_buf_t *
69 xfs_trans_get_buf(xfs_trans_t	*tp,
70 		  xfs_buftarg_t	*target_dev,
71 		  xfs_daddr_t	blkno,
72 		  int		len,
73 		  uint		flags)
74 {
75 	xfs_buf_t		*bp;
76 	xfs_buf_log_item_t	*bip;
77 
78 	if (flags == 0)
79 		flags = XFS_BUF_LOCK | XFS_BUF_MAPPED;
80 
81 	/*
82 	 * Default to a normal get_buf() call if the tp is NULL.
83 	 */
84 	if (tp == NULL) {
85 		bp = xfs_buf_get_flags(target_dev, blkno, len,
86 							flags | BUF_BUSY);
87 		return(bp);
88 	}
89 
90 	/*
91 	 * If we find the buffer in the cache with this transaction
92 	 * pointer in its b_fsprivate2 field, then we know we already
93 	 * have it locked.  In this case we just increment the lock
94 	 * recursion count and return the buffer to the caller.
95 	 */
96 	if (tp->t_items.lic_next == NULL) {
97 		bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
98 	} else {
99 		bp  = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len);
100 	}
101 	if (bp != NULL) {
102 		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
103 		if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
104 			xfs_buftrace("TRANS GET RECUR SHUT", bp);
105 			XFS_BUF_SUPER_STALE(bp);
106 		}
107 		/*
108 		 * If the buffer is stale then it was binval'ed
109 		 * since last read.  This doesn't matter since the
110 		 * caller isn't allowed to use the data anyway.
111 		 */
112 		else if (XFS_BUF_ISSTALE(bp)) {
113 			xfs_buftrace("TRANS GET RECUR STALE", bp);
114 			ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
115 		}
116 		ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
117 		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
118 		ASSERT(bip != NULL);
119 		ASSERT(atomic_read(&bip->bli_refcount) > 0);
120 		bip->bli_recur++;
121 		xfs_buftrace("TRANS GET RECUR", bp);
122 		xfs_buf_item_trace("GET RECUR", bip);
123 		return (bp);
124 	}
125 
126 	/*
127 	 * We always specify the BUF_BUSY flag within a transaction so
128 	 * that get_buf does not try to push out a delayed write buffer
129 	 * which might cause another transaction to take place (if the
130 	 * buffer was delayed alloc).  Such recursive transactions can
131 	 * easily deadlock with our current transaction as well as cause
132 	 * us to run out of stack space.
133 	 */
134 	bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY);
135 	if (bp == NULL) {
136 		return NULL;
137 	}
138 
139 	ASSERT(!XFS_BUF_GETERROR(bp));
140 
141 	/*
142 	 * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
143 	 * it doesn't have one yet, then allocate one and initialize it.
144 	 * The checks to see if one is there are in xfs_buf_item_init().
145 	 */
146 	xfs_buf_item_init(bp, tp->t_mountp);
147 
148 	/*
149 	 * Set the recursion count for the buffer within this transaction
150 	 * to 0.
151 	 */
152 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
153 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
154 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
155 	ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
156 	bip->bli_recur = 0;
157 
158 	/*
159 	 * Take a reference for this transaction on the buf item.
160 	 */
161 	atomic_inc(&bip->bli_refcount);
162 
163 	/*
164 	 * Get a log_item_desc to point at the new item.
165 	 */
166 	(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
167 
168 	/*
169 	 * Initialize b_fsprivate2 so we can find it with incore_match()
170 	 * above.
171 	 */
172 	XFS_BUF_SET_FSPRIVATE2(bp, tp);
173 
174 	xfs_buftrace("TRANS GET", bp);
175 	xfs_buf_item_trace("GET", bip);
176 	return (bp);
177 }
178 
179 /*
180  * Get and lock the superblock buffer of this file system for the
181  * given transaction.
182  *
183  * We don't need to use incore_match() here, because the superblock
184  * buffer is a private buffer which we keep a pointer to in the
185  * mount structure.
186  */
187 xfs_buf_t *
188 xfs_trans_getsb(xfs_trans_t	*tp,
189 		struct xfs_mount *mp,
190 		int		flags)
191 {
192 	xfs_buf_t		*bp;
193 	xfs_buf_log_item_t	*bip;
194 
195 	/*
196 	 * Default to just trying to lock the superblock buffer
197 	 * if tp is NULL.
198 	 */
199 	if (tp == NULL) {
200 		return (xfs_getsb(mp, flags));
201 	}
202 
203 	/*
204 	 * If the superblock buffer already has this transaction
205 	 * pointer in its b_fsprivate2 field, then we know we already
206 	 * have it locked.  In this case we just increment the lock
207 	 * recursion count and return the buffer to the caller.
208 	 */
209 	bp = mp->m_sb_bp;
210 	if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) {
211 		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
212 		ASSERT(bip != NULL);
213 		ASSERT(atomic_read(&bip->bli_refcount) > 0);
214 		bip->bli_recur++;
215 		xfs_buf_item_trace("GETSB RECUR", bip);
216 		return (bp);
217 	}
218 
219 	bp = xfs_getsb(mp, flags);
220 	if (bp == NULL) {
221 		return NULL;
222 	}
223 
224 	/*
225 	 * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
226 	 * it doesn't have one yet, then allocate one and initialize it.
227 	 * The checks to see if one is there are in xfs_buf_item_init().
228 	 */
229 	xfs_buf_item_init(bp, mp);
230 
231 	/*
232 	 * Set the recursion count for the buffer within this transaction
233 	 * to 0.
234 	 */
235 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
236 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
237 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
238 	ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
239 	bip->bli_recur = 0;
240 
241 	/*
242 	 * Take a reference for this transaction on the buf item.
243 	 */
244 	atomic_inc(&bip->bli_refcount);
245 
246 	/*
247 	 * Get a log_item_desc to point at the new item.
248 	 */
249 	(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
250 
251 	/*
252 	 * Initialize b_fsprivate2 so we can find it with incore_match()
253 	 * above.
254 	 */
255 	XFS_BUF_SET_FSPRIVATE2(bp, tp);
256 
257 	xfs_buf_item_trace("GETSB", bip);
258 	return (bp);
259 }
260 
261 #ifdef DEBUG
262 xfs_buftarg_t *xfs_error_target;
263 int	xfs_do_error;
264 int	xfs_req_num;
265 int	xfs_error_mod = 33;
266 #endif
267 
268 /*
269  * Get and lock the buffer for the caller if it is not already
270  * locked within the given transaction.  If it has not yet been
271  * read in, read it from disk. If it is already locked
272  * within the transaction and already read in, just increment its
273  * lock recursion count and return a pointer to it.
274  *
275  * Use the fast path function xfs_trans_buf_item_match() or the buffer
276  * cache routine incore_match() to find the buffer
277  * if it is already owned by this transaction.
278  *
279  * If we don't already own the buffer, use read_buf() to get it.
280  * If it doesn't yet have an associated xfs_buf_log_item structure,
281  * then allocate one and add the item to this transaction.
282  *
283  * If the transaction pointer is NULL, make this just a normal
284  * read_buf() call.
285  */
286 int
287 xfs_trans_read_buf(
288 	xfs_mount_t	*mp,
289 	xfs_trans_t	*tp,
290 	xfs_buftarg_t	*target,
291 	xfs_daddr_t	blkno,
292 	int		len,
293 	uint		flags,
294 	xfs_buf_t	**bpp)
295 {
296 	xfs_buf_t		*bp;
297 	xfs_buf_log_item_t	*bip;
298 	int			error;
299 
300 	if (flags == 0)
301 		flags = XFS_BUF_LOCK | XFS_BUF_MAPPED;
302 
303 	/*
304 	 * Default to a normal get_buf() call if the tp is NULL.
305 	 */
306 	if (tp == NULL) {
307 		bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
308 		if (!bp)
309 			return XFS_ERROR(ENOMEM);
310 
311 		if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
312 			xfs_ioerror_alert("xfs_trans_read_buf", mp,
313 					  bp, blkno);
314 			error = XFS_BUF_GETERROR(bp);
315 			xfs_buf_relse(bp);
316 			return error;
317 		}
318 #ifdef DEBUG
319 		if (xfs_do_error && (bp != NULL)) {
320 			if (xfs_error_target == target) {
321 				if (((xfs_req_num++) % xfs_error_mod) == 0) {
322 					xfs_buf_relse(bp);
323 					printk("Returning error!\n");
324 					return XFS_ERROR(EIO);
325 				}
326 			}
327 		}
328 #endif
329 		if (XFS_FORCED_SHUTDOWN(mp))
330 			goto shutdown_abort;
331 		*bpp = bp;
332 		return 0;
333 	}
334 
335 	/*
336 	 * If we find the buffer in the cache with this transaction
337 	 * pointer in its b_fsprivate2 field, then we know we already
338 	 * have it locked.  If it is already read in we just increment
339 	 * the lock recursion count and return the buffer to the caller.
340 	 * If the buffer is not yet read in, then we read it in, increment
341 	 * the lock recursion count, and return it to the caller.
342 	 */
343 	if (tp->t_items.lic_next == NULL) {
344 		bp = xfs_trans_buf_item_match(tp, target, blkno, len);
345 	} else {
346 		bp = xfs_trans_buf_item_match_all(tp, target, blkno, len);
347 	}
348 	if (bp != NULL) {
349 		ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
350 		ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
351 		ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
352 		ASSERT((XFS_BUF_ISERROR(bp)) == 0);
353 		if (!(XFS_BUF_ISDONE(bp))) {
354 			xfs_buftrace("READ_BUF_INCORE !DONE", bp);
355 			ASSERT(!XFS_BUF_ISASYNC(bp));
356 			XFS_BUF_READ(bp);
357 			xfsbdstrat(tp->t_mountp, bp);
358 			xfs_iowait(bp);
359 			if (XFS_BUF_GETERROR(bp) != 0) {
360 				xfs_ioerror_alert("xfs_trans_read_buf", mp,
361 						  bp, blkno);
362 				error = XFS_BUF_GETERROR(bp);
363 				xfs_buf_relse(bp);
364 				/*
365 				 * We can gracefully recover from most
366 				 * read errors. Ones we can't are those
367 				 * that happen after the transaction's
368 				 * already dirty.
369 				 */
370 				if (tp->t_flags & XFS_TRANS_DIRTY)
371 					xfs_force_shutdown(tp->t_mountp,
372 							   XFS_METADATA_IO_ERROR);
373 				return error;
374 			}
375 		}
376 		/*
377 		 * We never locked this buf ourselves, so we shouldn't
378 		 * brelse it either. Just get out.
379 		 */
380 		if (XFS_FORCED_SHUTDOWN(mp)) {
381 			xfs_buftrace("READ_BUF_INCORE XFSSHUTDN", bp);
382 			*bpp = NULL;
383 			return XFS_ERROR(EIO);
384 		}
385 
386 
387 		bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
388 		bip->bli_recur++;
389 
390 		ASSERT(atomic_read(&bip->bli_refcount) > 0);
391 		xfs_buf_item_trace("READ RECUR", bip);
392 		*bpp = bp;
393 		return 0;
394 	}
395 
396 	/*
397 	 * We always specify the BUF_BUSY flag within a transaction so
398 	 * that get_buf does not try to push out a delayed write buffer
399 	 * which might cause another transaction to take place (if the
400 	 * buffer was delayed alloc).  Such recursive transactions can
401 	 * easily deadlock with our current transaction as well as cause
402 	 * us to run out of stack space.
403 	 */
404 	bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
405 	if (bp == NULL) {
406 		*bpp = NULL;
407 		return 0;
408 	}
409 	if (XFS_BUF_GETERROR(bp) != 0) {
410 	    XFS_BUF_SUPER_STALE(bp);
411 		xfs_buftrace("READ ERROR", bp);
412 		error = XFS_BUF_GETERROR(bp);
413 
414 		xfs_ioerror_alert("xfs_trans_read_buf", mp,
415 				  bp, blkno);
416 		if (tp->t_flags & XFS_TRANS_DIRTY)
417 			xfs_force_shutdown(tp->t_mountp, XFS_METADATA_IO_ERROR);
418 		xfs_buf_relse(bp);
419 		return error;
420 	}
421 #ifdef DEBUG
422 	if (xfs_do_error && !(tp->t_flags & XFS_TRANS_DIRTY)) {
423 		if (xfs_error_target == target) {
424 			if (((xfs_req_num++) % xfs_error_mod) == 0) {
425 				xfs_force_shutdown(tp->t_mountp,
426 						   XFS_METADATA_IO_ERROR);
427 				xfs_buf_relse(bp);
428 				printk("Returning error in trans!\n");
429 				return XFS_ERROR(EIO);
430 			}
431 		}
432 	}
433 #endif
434 	if (XFS_FORCED_SHUTDOWN(mp))
435 		goto shutdown_abort;
436 
437 	/*
438 	 * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
439 	 * it doesn't have one yet, then allocate one and initialize it.
440 	 * The checks to see if one is there are in xfs_buf_item_init().
441 	 */
442 	xfs_buf_item_init(bp, tp->t_mountp);
443 
444 	/*
445 	 * Set the recursion count for the buffer within this transaction
446 	 * to 0.
447 	 */
448 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
449 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
450 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
451 	ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
452 	bip->bli_recur = 0;
453 
454 	/*
455 	 * Take a reference for this transaction on the buf item.
456 	 */
457 	atomic_inc(&bip->bli_refcount);
458 
459 	/*
460 	 * Get a log_item_desc to point at the new item.
461 	 */
462 	(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
463 
464 	/*
465 	 * Initialize b_fsprivate2 so we can find it with incore_match()
466 	 * above.
467 	 */
468 	XFS_BUF_SET_FSPRIVATE2(bp, tp);
469 
470 	xfs_buftrace("TRANS READ", bp);
471 	xfs_buf_item_trace("READ", bip);
472 	*bpp = bp;
473 	return 0;
474 
475 shutdown_abort:
476 	/*
477 	 * the theory here is that buffer is good but we're
478 	 * bailing out because the filesystem is being forcibly
479 	 * shut down.  So we should leave the b_flags alone since
480 	 * the buffer's not staled and just get out.
481 	 */
482 #if defined(DEBUG)
483 	if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
484 		cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp);
485 #endif
486 	ASSERT((XFS_BUF_BFLAGS(bp) & (XFS_B_STALE|XFS_B_DELWRI)) !=
487 						(XFS_B_STALE|XFS_B_DELWRI));
488 
489 	xfs_buftrace("READ_BUF XFSSHUTDN", bp);
490 	xfs_buf_relse(bp);
491 	*bpp = NULL;
492 	return XFS_ERROR(EIO);
493 }
494 
495 
496 /*
497  * Release the buffer bp which was previously acquired with one of the
498  * xfs_trans_... buffer allocation routines if the buffer has not
499  * been modified within this transaction.  If the buffer is modified
500  * within this transaction, do decrement the recursion count but do
501  * not release the buffer even if the count goes to 0.  If the buffer is not
502  * modified within the transaction, decrement the recursion count and
503  * release the buffer if the recursion count goes to 0.
504  *
505  * If the buffer is to be released and it was not modified before
506  * this transaction began, then free the buf_log_item associated with it.
507  *
508  * If the transaction pointer is NULL, make this just a normal
509  * brelse() call.
510  */
511 void
512 xfs_trans_brelse(xfs_trans_t	*tp,
513 		 xfs_buf_t	*bp)
514 {
515 	xfs_buf_log_item_t	*bip;
516 	xfs_log_item_t		*lip;
517 	xfs_log_item_desc_t	*lidp;
518 
519 	/*
520 	 * Default to a normal brelse() call if the tp is NULL.
521 	 */
522 	if (tp == NULL) {
523 		ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
524 		/*
525 		 * If there's a buf log item attached to the buffer,
526 		 * then let the AIL know that the buffer is being
527 		 * unlocked.
528 		 */
529 		if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
530 			lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
531 			if (lip->li_type == XFS_LI_BUF) {
532 				bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
533 				xfs_trans_unlocked_item(
534 						bip->bli_item.li_mountp,
535 						lip);
536 			}
537 		}
538 		xfs_buf_relse(bp);
539 		return;
540 	}
541 
542 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
543 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
544 	ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
545 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
546 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
547 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
548 
549 	/*
550 	 * Find the item descriptor pointing to this buffer's
551 	 * log item.  It must be there.
552 	 */
553 	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
554 	ASSERT(lidp != NULL);
555 
556 	/*
557 	 * If the release is just for a recursive lock,
558 	 * then decrement the count and return.
559 	 */
560 	if (bip->bli_recur > 0) {
561 		bip->bli_recur--;
562 		xfs_buf_item_trace("RELSE RECUR", bip);
563 		return;
564 	}
565 
566 	/*
567 	 * If the buffer is dirty within this transaction, we can't
568 	 * release it until we commit.
569 	 */
570 	if (lidp->lid_flags & XFS_LID_DIRTY) {
571 		xfs_buf_item_trace("RELSE DIRTY", bip);
572 		return;
573 	}
574 
575 	/*
576 	 * If the buffer has been invalidated, then we can't release
577 	 * it until the transaction commits to disk unless it is re-dirtied
578 	 * as part of this transaction.  This prevents us from pulling
579 	 * the item from the AIL before we should.
580 	 */
581 	if (bip->bli_flags & XFS_BLI_STALE) {
582 		xfs_buf_item_trace("RELSE STALE", bip);
583 		return;
584 	}
585 
586 	ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
587 	xfs_buf_item_trace("RELSE", bip);
588 
589 	/*
590 	 * Free up the log item descriptor tracking the released item.
591 	 */
592 	xfs_trans_free_item(tp, lidp);
593 
594 	/*
595 	 * Clear the hold flag in the buf log item if it is set.
596 	 * We wouldn't want the next user of the buffer to
597 	 * get confused.
598 	 */
599 	if (bip->bli_flags & XFS_BLI_HOLD) {
600 		bip->bli_flags &= ~XFS_BLI_HOLD;
601 	}
602 
603 	/*
604 	 * Drop our reference to the buf log item.
605 	 */
606 	atomic_dec(&bip->bli_refcount);
607 
608 	/*
609 	 * If the buf item is not tracking data in the log, then
610 	 * we must free it before releasing the buffer back to the
611 	 * free pool.  Before releasing the buffer to the free pool,
612 	 * clear the transaction pointer in b_fsprivate2 to dissolve
613 	 * its relation to this transaction.
614 	 */
615 	if (!xfs_buf_item_dirty(bip)) {
616 /***
617 		ASSERT(bp->b_pincount == 0);
618 ***/
619 		ASSERT(atomic_read(&bip->bli_refcount) == 0);
620 		ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
621 		ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
622 		xfs_buf_item_relse(bp);
623 		bip = NULL;
624 	}
625 	XFS_BUF_SET_FSPRIVATE2(bp, NULL);
626 
627 	/*
628 	 * If we've still got a buf log item on the buffer, then
629 	 * tell the AIL that the buffer is being unlocked.
630 	 */
631 	if (bip != NULL) {
632 		xfs_trans_unlocked_item(bip->bli_item.li_mountp,
633 					(xfs_log_item_t*)bip);
634 	}
635 
636 	xfs_buf_relse(bp);
637 	return;
638 }
639 
640 /*
641  * Add the locked buffer to the transaction.
642  * The buffer must be locked, and it cannot be associated with any
643  * transaction.
644  *
645  * If the buffer does not yet have a buf log item associated with it,
646  * then allocate one for it.  Then add the buf item to the transaction.
647  */
648 void
649 xfs_trans_bjoin(xfs_trans_t	*tp,
650 		xfs_buf_t	*bp)
651 {
652 	xfs_buf_log_item_t	*bip;
653 
654 	ASSERT(XFS_BUF_ISBUSY(bp));
655 	ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
656 
657 	/*
658 	 * The xfs_buf_log_item pointer is stored in b_fsprivate.  If
659 	 * it doesn't have one yet, then allocate one and initialize it.
660 	 * The checks to see if one is there are in xfs_buf_item_init().
661 	 */
662 	xfs_buf_item_init(bp, tp->t_mountp);
663 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
664 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
665 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
666 	ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
667 
668 	/*
669 	 * Take a reference for this transaction on the buf item.
670 	 */
671 	atomic_inc(&bip->bli_refcount);
672 
673 	/*
674 	 * Get a log_item_desc to point at the new item.
675 	 */
676 	(void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
677 
678 	/*
679 	 * Initialize b_fsprivate2 so we can find it with incore_match()
680 	 * in xfs_trans_get_buf() and friends above.
681 	 */
682 	XFS_BUF_SET_FSPRIVATE2(bp, tp);
683 
684 	xfs_buf_item_trace("BJOIN", bip);
685 }
686 
687 /*
688  * Mark the buffer as not needing to be unlocked when the buf item's
689  * IOP_UNLOCK() routine is called.  The buffer must already be locked
690  * and associated with the given transaction.
691  */
692 /* ARGSUSED */
693 void
694 xfs_trans_bhold(xfs_trans_t	*tp,
695 		xfs_buf_t	*bp)
696 {
697 	xfs_buf_log_item_t	*bip;
698 
699 	ASSERT(XFS_BUF_ISBUSY(bp));
700 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
701 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
702 
703 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
704 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
705 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
706 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
707 	bip->bli_flags |= XFS_BLI_HOLD;
708 	xfs_buf_item_trace("BHOLD", bip);
709 }
710 
711 /*
712  * Cancel the previous buffer hold request made on this buffer
713  * for this transaction.
714  */
715 void
716 xfs_trans_bhold_release(xfs_trans_t	*tp,
717 			xfs_buf_t	*bp)
718 {
719 	xfs_buf_log_item_t	*bip;
720 
721 	ASSERT(XFS_BUF_ISBUSY(bp));
722 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
723 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
724 
725 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
726 	ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
727 	ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
728 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
729 	ASSERT(bip->bli_flags & XFS_BLI_HOLD);
730 	bip->bli_flags &= ~XFS_BLI_HOLD;
731 	xfs_buf_item_trace("BHOLD RELEASE", bip);
732 }
733 
734 /*
735  * This is called to mark bytes first through last inclusive of the given
736  * buffer as needing to be logged when the transaction is committed.
737  * The buffer must already be associated with the given transaction.
738  *
739  * First and last are numbers relative to the beginning of this buffer,
740  * so the first byte in the buffer is numbered 0 regardless of the
741  * value of b_blkno.
742  */
743 void
744 xfs_trans_log_buf(xfs_trans_t	*tp,
745 		  xfs_buf_t	*bp,
746 		  uint		first,
747 		  uint		last)
748 {
749 	xfs_buf_log_item_t	*bip;
750 	xfs_log_item_desc_t	*lidp;
751 
752 	ASSERT(XFS_BUF_ISBUSY(bp));
753 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
754 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
755 	ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
756 	ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) ||
757 	       (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks));
758 
759 	/*
760 	 * Mark the buffer as needing to be written out eventually,
761 	 * and set its iodone function to remove the buffer's buf log
762 	 * item from the AIL and free it when the buffer is flushed
763 	 * to disk.  See xfs_buf_attach_iodone() for more details
764 	 * on li_cb and xfs_buf_iodone_callbacks().
765 	 * If we end up aborting this transaction, we trap this buffer
766 	 * inside the b_bdstrat callback so that this won't get written to
767 	 * disk.
768 	 */
769 	XFS_BUF_DELAYWRITE(bp);
770 	XFS_BUF_DONE(bp);
771 
772 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
773 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
774 	XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
775 	bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))xfs_buf_iodone;
776 
777 	/*
778 	 * If we invalidated the buffer within this transaction, then
779 	 * cancel the invalidation now that we're dirtying the buffer
780 	 * again.  There are no races with the code in xfs_buf_item_unpin(),
781 	 * because we have a reference to the buffer this entire time.
782 	 */
783 	if (bip->bli_flags & XFS_BLI_STALE) {
784 		xfs_buf_item_trace("BLOG UNSTALE", bip);
785 		bip->bli_flags &= ~XFS_BLI_STALE;
786 		ASSERT(XFS_BUF_ISSTALE(bp));
787 		XFS_BUF_UNSTALE(bp);
788 		bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL;
789 	}
790 
791 	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
792 	ASSERT(lidp != NULL);
793 
794 	tp->t_flags |= XFS_TRANS_DIRTY;
795 	lidp->lid_flags |= XFS_LID_DIRTY;
796 	lidp->lid_flags &= ~XFS_LID_BUF_STALE;
797 	bip->bli_flags |= XFS_BLI_LOGGED;
798 	xfs_buf_item_log(bip, first, last);
799 	xfs_buf_item_trace("BLOG", bip);
800 }
801 
802 
803 /*
804  * This called to invalidate a buffer that is being used within
805  * a transaction.  Typically this is because the blocks in the
806  * buffer are being freed, so we need to prevent it from being
807  * written out when we're done.  Allowing it to be written again
808  * might overwrite data in the free blocks if they are reallocated
809  * to a file.
810  *
811  * We prevent the buffer from being written out by clearing the
812  * B_DELWRI flag.  We can't always
813  * get rid of the buf log item at this point, though, because
814  * the buffer may still be pinned by another transaction.  If that
815  * is the case, then we'll wait until the buffer is committed to
816  * disk for the last time (we can tell by the ref count) and
817  * free it in xfs_buf_item_unpin().  Until it is cleaned up we
818  * will keep the buffer locked so that the buffer and buf log item
819  * are not reused.
820  */
821 void
822 xfs_trans_binval(
823 	xfs_trans_t	*tp,
824 	xfs_buf_t	*bp)
825 {
826 	xfs_log_item_desc_t	*lidp;
827 	xfs_buf_log_item_t	*bip;
828 
829 	ASSERT(XFS_BUF_ISBUSY(bp));
830 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
831 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
832 
833 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
834 	lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
835 	ASSERT(lidp != NULL);
836 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
837 
838 	if (bip->bli_flags & XFS_BLI_STALE) {
839 		/*
840 		 * If the buffer is already invalidated, then
841 		 * just return.
842 		 */
843 		ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
844 		ASSERT(XFS_BUF_ISSTALE(bp));
845 		ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
846 		ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF));
847 		ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
848 		ASSERT(lidp->lid_flags & XFS_LID_DIRTY);
849 		ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
850 		xfs_buftrace("XFS_BINVAL RECUR", bp);
851 		xfs_buf_item_trace("BINVAL RECUR", bip);
852 		return;
853 	}
854 
855 	/*
856 	 * Clear the dirty bit in the buffer and set the STALE flag
857 	 * in the buf log item.  The STALE flag will be used in
858 	 * xfs_buf_item_unpin() to determine if it should clean up
859 	 * when the last reference to the buf item is given up.
860 	 * We set the XFS_BLI_CANCEL flag in the buf log format structure
861 	 * and log the buf item.  This will be used at recovery time
862 	 * to determine that copies of the buffer in the log before
863 	 * this should not be replayed.
864 	 * We mark the item descriptor and the transaction dirty so
865 	 * that we'll hold the buffer until after the commit.
866 	 *
867 	 * Since we're invalidating the buffer, we also clear the state
868 	 * about which parts of the buffer have been logged.  We also
869 	 * clear the flag indicating that this is an inode buffer since
870 	 * the data in the buffer will no longer be valid.
871 	 *
872 	 * We set the stale bit in the buffer as well since we're getting
873 	 * rid of it.
874 	 */
875 	XFS_BUF_UNDELAYWRITE(bp);
876 	XFS_BUF_STALE(bp);
877 	bip->bli_flags |= XFS_BLI_STALE;
878 	bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY);
879 	bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
880 	bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
881 	memset((char *)(bip->bli_format.blf_data_map), 0,
882 	      (bip->bli_format.blf_map_size * sizeof(uint)));
883 	lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE;
884 	tp->t_flags |= XFS_TRANS_DIRTY;
885 	xfs_buftrace("XFS_BINVAL", bp);
886 	xfs_buf_item_trace("BINVAL", bip);
887 }
888 
889 /*
890  * This call is used to indicate that the buffer contains on-disk
891  * inodes which must be handled specially during recovery.  They
892  * require special handling because only the di_next_unlinked from
893  * the inodes in the buffer should be recovered.  The rest of the
894  * data in the buffer is logged via the inodes themselves.
895  *
896  * All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log
897  * format structure so that we'll know what to do at recovery time.
898  */
899 /* ARGSUSED */
900 void
901 xfs_trans_inode_buf(
902 	xfs_trans_t	*tp,
903 	xfs_buf_t	*bp)
904 {
905 	xfs_buf_log_item_t	*bip;
906 
907 	ASSERT(XFS_BUF_ISBUSY(bp));
908 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
909 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
910 
911 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
912 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
913 
914 	bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF;
915 }
916 
917 /*
918  * This call is used to indicate that the buffer is going to
919  * be staled and was an inode buffer. This means it gets
920  * special processing during unpin - where any inodes
921  * associated with the buffer should be removed from ail.
922  * There is also special processing during recovery,
923  * any replay of the inodes in the buffer needs to be
924  * prevented as the buffer may have been reused.
925  */
926 void
927 xfs_trans_stale_inode_buf(
928 	xfs_trans_t	*tp,
929 	xfs_buf_t	*bp)
930 {
931 	xfs_buf_log_item_t	*bip;
932 
933 	ASSERT(XFS_BUF_ISBUSY(bp));
934 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
935 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
936 
937 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
938 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
939 
940 	bip->bli_flags |= XFS_BLI_STALE_INODE;
941 	bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))
942 		xfs_buf_iodone;
943 }
944 
945 
946 
947 /*
948  * Mark the buffer as being one which contains newly allocated
949  * inodes.  We need to make sure that even if this buffer is
950  * relogged as an 'inode buf' we still recover all of the inode
951  * images in the face of a crash.  This works in coordination with
952  * xfs_buf_item_committed() to ensure that the buffer remains in the
953  * AIL at its original location even after it has been relogged.
954  */
955 /* ARGSUSED */
956 void
957 xfs_trans_inode_alloc_buf(
958 	xfs_trans_t	*tp,
959 	xfs_buf_t	*bp)
960 {
961 	xfs_buf_log_item_t	*bip;
962 
963 	ASSERT(XFS_BUF_ISBUSY(bp));
964 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
965 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
966 
967 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
968 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
969 
970 	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
971 }
972 
973 
974 /*
975  * Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
976  * dquots. However, unlike in inode buffer recovery, dquot buffers get
977  * recovered in their entirety. (Hence, no XFS_BLI_DQUOT_ALLOC_BUF flag).
978  * The only thing that makes dquot buffers different from regular
979  * buffers is that we must not replay dquot bufs when recovering
980  * if a _corresponding_ quotaoff has happened. We also have to distinguish
981  * between usr dquot bufs and grp dquot bufs, because usr and grp quotas
982  * can be turned off independently.
983  */
984 /* ARGSUSED */
985 void
986 xfs_trans_dquot_buf(
987 	xfs_trans_t	*tp,
988 	xfs_buf_t	*bp,
989 	uint		type)
990 {
991 	xfs_buf_log_item_t	*bip;
992 
993 	ASSERT(XFS_BUF_ISBUSY(bp));
994 	ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
995 	ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
996 	ASSERT(type == XFS_BLI_UDQUOT_BUF ||
997 	       type == XFS_BLI_PDQUOT_BUF ||
998 	       type == XFS_BLI_GDQUOT_BUF);
999 
1000 	bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
1001 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
1002 
1003 	bip->bli_format.blf_flags |= type;
1004 }
1005 
1006 /*
1007  * Check to see if a buffer matching the given parameters is already
1008  * a part of the given transaction.  Only check the first, embedded
1009  * chunk, since we don't want to spend all day scanning large transactions.
1010  */
1011 STATIC xfs_buf_t *
1012 xfs_trans_buf_item_match(
1013 	xfs_trans_t	*tp,
1014 	xfs_buftarg_t	*target,
1015 	xfs_daddr_t	blkno,
1016 	int		len)
1017 {
1018 	xfs_log_item_chunk_t	*licp;
1019 	xfs_log_item_desc_t	*lidp;
1020 	xfs_buf_log_item_t	*blip;
1021 	xfs_buf_t		*bp;
1022 	int			i;
1023 
1024 	bp = NULL;
1025 	len = BBTOB(len);
1026 	licp = &tp->t_items;
1027 	if (!XFS_LIC_ARE_ALL_FREE(licp)) {
1028 		for (i = 0; i < licp->lic_unused; i++) {
1029 			/*
1030 			 * Skip unoccupied slots.
1031 			 */
1032 			if (XFS_LIC_ISFREE(licp, i)) {
1033 				continue;
1034 			}
1035 
1036 			lidp = XFS_LIC_SLOT(licp, i);
1037 			blip = (xfs_buf_log_item_t *)lidp->lid_item;
1038 			if (blip->bli_item.li_type != XFS_LI_BUF) {
1039 				continue;
1040 			}
1041 
1042 			bp = blip->bli_buf;
1043 			if ((XFS_BUF_TARGET(bp) == target) &&
1044 			    (XFS_BUF_ADDR(bp) == blkno) &&
1045 			    (XFS_BUF_COUNT(bp) == len)) {
1046 				/*
1047 				 * We found it.  Break out and
1048 				 * return the pointer to the buffer.
1049 				 */
1050 				break;
1051 			} else {
1052 				bp = NULL;
1053 			}
1054 		}
1055 	}
1056 	return bp;
1057 }
1058 
1059 /*
1060  * Check to see if a buffer matching the given parameters is already
1061  * a part of the given transaction.  Check all the chunks, we
1062  * want to be thorough.
1063  */
1064 STATIC xfs_buf_t *
1065 xfs_trans_buf_item_match_all(
1066 	xfs_trans_t	*tp,
1067 	xfs_buftarg_t	*target,
1068 	xfs_daddr_t	blkno,
1069 	int		len)
1070 {
1071 	xfs_log_item_chunk_t	*licp;
1072 	xfs_log_item_desc_t	*lidp;
1073 	xfs_buf_log_item_t	*blip;
1074 	xfs_buf_t		*bp;
1075 	int			i;
1076 
1077 	bp = NULL;
1078 	len = BBTOB(len);
1079 	for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
1080 		if (XFS_LIC_ARE_ALL_FREE(licp)) {
1081 			ASSERT(licp == &tp->t_items);
1082 			ASSERT(licp->lic_next == NULL);
1083 			return NULL;
1084 		}
1085 		for (i = 0; i < licp->lic_unused; i++) {
1086 			/*
1087 			 * Skip unoccupied slots.
1088 			 */
1089 			if (XFS_LIC_ISFREE(licp, i)) {
1090 				continue;
1091 			}
1092 
1093 			lidp = XFS_LIC_SLOT(licp, i);
1094 			blip = (xfs_buf_log_item_t *)lidp->lid_item;
1095 			if (blip->bli_item.li_type != XFS_LI_BUF) {
1096 				continue;
1097 			}
1098 
1099 			bp = blip->bli_buf;
1100 			if ((XFS_BUF_TARGET(bp) == target) &&
1101 			    (XFS_BUF_ADDR(bp) == blkno) &&
1102 			    (XFS_BUF_COUNT(bp) == len)) {
1103 				/*
1104 				 * We found it.  Break out and
1105 				 * return the pointer to the buffer.
1106 				 */
1107 				return bp;
1108 			}
1109 		}
1110 	}
1111 	return NULL;
1112 }
1113