xref: /openbmc/linux/fs/xfs/xfs_buf.c (revision 1a48327c)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
2c59d87c4SChristoph Hellwig /*
3c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4c59d87c4SChristoph Hellwig  * All Rights Reserved.
5c59d87c4SChristoph Hellwig  */
6c59d87c4SChristoph Hellwig #include "xfs.h"
7c59d87c4SChristoph Hellwig #include <linux/backing-dev.h>
86f643c57SShiyang Ruan #include <linux/dax.h>
9c59d87c4SChristoph Hellwig 
105467b34bSDarrick J. Wong #include "xfs_shared.h"
114fb6e8adSChristoph Hellwig #include "xfs_format.h"
12239880efSDave Chinner #include "xfs_log_format.h"
137fd36c44SDave Chinner #include "xfs_trans_resv.h"
14c59d87c4SChristoph Hellwig #include "xfs_mount.h"
15c59d87c4SChristoph Hellwig #include "xfs_trace.h"
16239880efSDave Chinner #include "xfs_log.h"
179fe5c77cSDave Chinner #include "xfs_log_recover.h"
1801728b44SDave Chinner #include "xfs_log_priv.h"
19f593bf14SDave Chinner #include "xfs_trans.h"
20f593bf14SDave Chinner #include "xfs_buf_item.h"
21e9e899a2SDarrick J. Wong #include "xfs_errortag.h"
227561d27eSBrian Foster #include "xfs_error.h"
239bbafc71SDave Chinner #include "xfs_ag.h"
24c59d87c4SChristoph Hellwig 
25231f91abSDave Chinner struct kmem_cache *xfs_buf_cache;
26c59d87c4SChristoph Hellwig 
2737fd1678SDave Chinner /*
2837fd1678SDave Chinner  * Locking orders
2937fd1678SDave Chinner  *
3037fd1678SDave Chinner  * xfs_buf_ioacct_inc:
3137fd1678SDave Chinner  * xfs_buf_ioacct_dec:
3237fd1678SDave Chinner  *	b_sema (caller holds)
3337fd1678SDave Chinner  *	  b_lock
3437fd1678SDave Chinner  *
3537fd1678SDave Chinner  * xfs_buf_stale:
3637fd1678SDave Chinner  *	b_sema (caller holds)
3737fd1678SDave Chinner  *	  b_lock
3837fd1678SDave Chinner  *	    lru_lock
3937fd1678SDave Chinner  *
4037fd1678SDave Chinner  * xfs_buf_rele:
4137fd1678SDave Chinner  *	b_lock
4237fd1678SDave Chinner  *	  pag_buf_lock
4337fd1678SDave Chinner  *	    lru_lock
4437fd1678SDave Chinner  *
4510fb9ac1SBrian Foster  * xfs_buftarg_drain_rele
4637fd1678SDave Chinner  *	lru_lock
4737fd1678SDave Chinner  *	  b_lock (trylock due to inversion)
4837fd1678SDave Chinner  *
4937fd1678SDave Chinner  * xfs_buftarg_isolate
5037fd1678SDave Chinner  *	lru_lock
5137fd1678SDave Chinner  *	  b_lock (trylock due to inversion)
5237fd1678SDave Chinner  */
53c59d87c4SChristoph Hellwig 
5426e32875SChristoph Hellwig static int __xfs_buf_submit(struct xfs_buf *bp, bool wait);
5526e32875SChristoph Hellwig 
5626e32875SChristoph Hellwig static inline int
xfs_buf_submit(struct xfs_buf * bp)5726e32875SChristoph Hellwig xfs_buf_submit(
5826e32875SChristoph Hellwig 	struct xfs_buf		*bp)
5926e32875SChristoph Hellwig {
6026e32875SChristoph Hellwig 	return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC));
6126e32875SChristoph Hellwig }
6226e32875SChristoph Hellwig 
63c59d87c4SChristoph Hellwig static inline int
xfs_buf_is_vmapped(struct xfs_buf * bp)64c59d87c4SChristoph Hellwig xfs_buf_is_vmapped(
65c59d87c4SChristoph Hellwig 	struct xfs_buf	*bp)
66c59d87c4SChristoph Hellwig {
67c59d87c4SChristoph Hellwig 	/*
68c59d87c4SChristoph Hellwig 	 * Return true if the buffer is vmapped.
69c59d87c4SChristoph Hellwig 	 *
70611c9946SDave Chinner 	 * b_addr is null if the buffer is not mapped, but the code is clever
71611c9946SDave Chinner 	 * enough to know it doesn't have to map a single page, so the check has
72611c9946SDave Chinner 	 * to be both for b_addr and bp->b_page_count > 1.
73c59d87c4SChristoph Hellwig 	 */
74611c9946SDave Chinner 	return bp->b_addr && bp->b_page_count > 1;
75c59d87c4SChristoph Hellwig }
76c59d87c4SChristoph Hellwig 
77c59d87c4SChristoph Hellwig static inline int
xfs_buf_vmap_len(struct xfs_buf * bp)78c59d87c4SChristoph Hellwig xfs_buf_vmap_len(
79c59d87c4SChristoph Hellwig 	struct xfs_buf	*bp)
80c59d87c4SChristoph Hellwig {
8154cd3aa6SChristoph Hellwig 	return (bp->b_page_count * PAGE_SIZE);
82c59d87c4SChristoph Hellwig }
83c59d87c4SChristoph Hellwig 
84c59d87c4SChristoph Hellwig /*
859c7504aaSBrian Foster  * Bump the I/O in flight count on the buftarg if we haven't yet done so for
869c7504aaSBrian Foster  * this buffer. The count is incremented once per buffer (per hold cycle)
879c7504aaSBrian Foster  * because the corresponding decrement is deferred to buffer release. Buffers
889c7504aaSBrian Foster  * can undergo I/O multiple times in a hold-release cycle and per buffer I/O
899c7504aaSBrian Foster  * tracking adds unnecessary overhead. This is used for sychronization purposes
9010fb9ac1SBrian Foster  * with unmount (see xfs_buftarg_drain()), so all we really need is a count of
919c7504aaSBrian Foster  * in-flight buffers.
929c7504aaSBrian Foster  *
939c7504aaSBrian Foster  * Buffers that are never released (e.g., superblock, iclog buffers) must set
949c7504aaSBrian Foster  * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count
959c7504aaSBrian Foster  * never reaches zero and unmount hangs indefinitely.
969c7504aaSBrian Foster  */
979c7504aaSBrian Foster static inline void
xfs_buf_ioacct_inc(struct xfs_buf * bp)989c7504aaSBrian Foster xfs_buf_ioacct_inc(
999c7504aaSBrian Foster 	struct xfs_buf	*bp)
1009c7504aaSBrian Foster {
10163db7c81SBrian Foster 	if (bp->b_flags & XBF_NO_IOACCT)
1029c7504aaSBrian Foster 		return;
1039c7504aaSBrian Foster 
1049c7504aaSBrian Foster 	ASSERT(bp->b_flags & XBF_ASYNC);
10563db7c81SBrian Foster 	spin_lock(&bp->b_lock);
10663db7c81SBrian Foster 	if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) {
10763db7c81SBrian Foster 		bp->b_state |= XFS_BSTATE_IN_FLIGHT;
1089c7504aaSBrian Foster 		percpu_counter_inc(&bp->b_target->bt_io_count);
1099c7504aaSBrian Foster 	}
11063db7c81SBrian Foster 	spin_unlock(&bp->b_lock);
11163db7c81SBrian Foster }
1129c7504aaSBrian Foster 
1139c7504aaSBrian Foster /*
1149c7504aaSBrian Foster  * Clear the in-flight state on a buffer about to be released to the LRU or
1159c7504aaSBrian Foster  * freed and unaccount from the buftarg.
1169c7504aaSBrian Foster  */
1179c7504aaSBrian Foster static inline void
__xfs_buf_ioacct_dec(struct xfs_buf * bp)11863db7c81SBrian Foster __xfs_buf_ioacct_dec(
11963db7c81SBrian Foster 	struct xfs_buf	*bp)
12063db7c81SBrian Foster {
12195989c46SBrian Foster 	lockdep_assert_held(&bp->b_lock);
12263db7c81SBrian Foster 
12363db7c81SBrian Foster 	if (bp->b_state & XFS_BSTATE_IN_FLIGHT) {
12463db7c81SBrian Foster 		bp->b_state &= ~XFS_BSTATE_IN_FLIGHT;
12563db7c81SBrian Foster 		percpu_counter_dec(&bp->b_target->bt_io_count);
12663db7c81SBrian Foster 	}
12763db7c81SBrian Foster }
12863db7c81SBrian Foster 
12963db7c81SBrian Foster static inline void
xfs_buf_ioacct_dec(struct xfs_buf * bp)1309c7504aaSBrian Foster xfs_buf_ioacct_dec(
1319c7504aaSBrian Foster 	struct xfs_buf	*bp)
1329c7504aaSBrian Foster {
13363db7c81SBrian Foster 	spin_lock(&bp->b_lock);
13463db7c81SBrian Foster 	__xfs_buf_ioacct_dec(bp);
13563db7c81SBrian Foster 	spin_unlock(&bp->b_lock);
1369c7504aaSBrian Foster }
1379c7504aaSBrian Foster 
1389c7504aaSBrian Foster /*
139c59d87c4SChristoph Hellwig  * When we mark a buffer stale, we remove the buffer from the LRU and clear the
140c59d87c4SChristoph Hellwig  * b_lru_ref count so that the buffer is freed immediately when the buffer
141c59d87c4SChristoph Hellwig  * reference count falls to zero. If the buffer is already on the LRU, we need
142c59d87c4SChristoph Hellwig  * to remove the reference that LRU holds on the buffer.
143c59d87c4SChristoph Hellwig  *
144c59d87c4SChristoph Hellwig  * This prevents build-up of stale buffers on the LRU.
145c59d87c4SChristoph Hellwig  */
146c59d87c4SChristoph Hellwig void
xfs_buf_stale(struct xfs_buf * bp)147c59d87c4SChristoph Hellwig xfs_buf_stale(
148c59d87c4SChristoph Hellwig 	struct xfs_buf	*bp)
149c59d87c4SChristoph Hellwig {
15043ff2122SChristoph Hellwig 	ASSERT(xfs_buf_islocked(bp));
15143ff2122SChristoph Hellwig 
152c59d87c4SChristoph Hellwig 	bp->b_flags |= XBF_STALE;
15343ff2122SChristoph Hellwig 
15443ff2122SChristoph Hellwig 	/*
15543ff2122SChristoph Hellwig 	 * Clear the delwri status so that a delwri queue walker will not
15643ff2122SChristoph Hellwig 	 * flush this buffer to disk now that it is stale. The delwri queue has
15743ff2122SChristoph Hellwig 	 * a reference to the buffer, so this is safe to do.
15843ff2122SChristoph Hellwig 	 */
15943ff2122SChristoph Hellwig 	bp->b_flags &= ~_XBF_DELWRI_Q;
16043ff2122SChristoph Hellwig 
1619c7504aaSBrian Foster 	/*
1629c7504aaSBrian Foster 	 * Once the buffer is marked stale and unlocked, a subsequent lookup
1639c7504aaSBrian Foster 	 * could reset b_flags. There is no guarantee that the buffer is
1649c7504aaSBrian Foster 	 * unaccounted (released to LRU) before that occurs. Drop in-flight
1659c7504aaSBrian Foster 	 * status now to preserve accounting consistency.
1669c7504aaSBrian Foster 	 */
167a4082357SDave Chinner 	spin_lock(&bp->b_lock);
16863db7c81SBrian Foster 	__xfs_buf_ioacct_dec(bp);
16963db7c81SBrian Foster 
170a4082357SDave Chinner 	atomic_set(&bp->b_lru_ref, 0);
171a4082357SDave Chinner 	if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
172e80dfa19SDave Chinner 	    (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
173c59d87c4SChristoph Hellwig 		atomic_dec(&bp->b_hold);
174e80dfa19SDave Chinner 
175c59d87c4SChristoph Hellwig 	ASSERT(atomic_read(&bp->b_hold) >= 1);
176a4082357SDave Chinner 	spin_unlock(&bp->b_lock);
177c59d87c4SChristoph Hellwig }
178c59d87c4SChristoph Hellwig 
1793e85c868SDave Chinner static int
xfs_buf_get_maps(struct xfs_buf * bp,int map_count)1803e85c868SDave Chinner xfs_buf_get_maps(
1813e85c868SDave Chinner 	struct xfs_buf		*bp,
1823e85c868SDave Chinner 	int			map_count)
1833e85c868SDave Chinner {
1843e85c868SDave Chinner 	ASSERT(bp->b_maps == NULL);
1853e85c868SDave Chinner 	bp->b_map_count = map_count;
1863e85c868SDave Chinner 
1873e85c868SDave Chinner 	if (map_count == 1) {
188f4b42421SMark Tinguely 		bp->b_maps = &bp->__b_map;
1893e85c868SDave Chinner 		return 0;
1903e85c868SDave Chinner 	}
1913e85c868SDave Chinner 
1923e85c868SDave Chinner 	bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
1933e85c868SDave Chinner 				KM_NOFS);
1943e85c868SDave Chinner 	if (!bp->b_maps)
1952451337dSDave Chinner 		return -ENOMEM;
1963e85c868SDave Chinner 	return 0;
1973e85c868SDave Chinner }
1983e85c868SDave Chinner 
1993e85c868SDave Chinner /*
2003e85c868SDave Chinner  *	Frees b_pages if it was allocated.
2013e85c868SDave Chinner  */
2023e85c868SDave Chinner static void
xfs_buf_free_maps(struct xfs_buf * bp)2033e85c868SDave Chinner xfs_buf_free_maps(
2043e85c868SDave Chinner 	struct xfs_buf	*bp)
2053e85c868SDave Chinner {
206f4b42421SMark Tinguely 	if (bp->b_maps != &bp->__b_map) {
2073e85c868SDave Chinner 		kmem_free(bp->b_maps);
2083e85c868SDave Chinner 		bp->b_maps = NULL;
2093e85c868SDave Chinner 	}
2103e85c868SDave Chinner }
2113e85c868SDave Chinner 
21232dff5e5SDarrick J. Wong static int
_xfs_buf_alloc(struct xfs_buftarg * target,struct xfs_buf_map * map,int nmaps,xfs_buf_flags_t flags,struct xfs_buf ** bpp)2133e85c868SDave Chinner _xfs_buf_alloc(
2144347b9d7SChristoph Hellwig 	struct xfs_buftarg	*target,
2153e85c868SDave Chinner 	struct xfs_buf_map	*map,
2163e85c868SDave Chinner 	int			nmaps,
21732dff5e5SDarrick J. Wong 	xfs_buf_flags_t		flags,
21832dff5e5SDarrick J. Wong 	struct xfs_buf		**bpp)
219c59d87c4SChristoph Hellwig {
2204347b9d7SChristoph Hellwig 	struct xfs_buf		*bp;
2213e85c868SDave Chinner 	int			error;
2223e85c868SDave Chinner 	int			i;
2234347b9d7SChristoph Hellwig 
22432dff5e5SDarrick J. Wong 	*bpp = NULL;
225182696fbSDarrick J. Wong 	bp = kmem_cache_zalloc(xfs_buf_cache, GFP_NOFS | __GFP_NOFAIL);
2264347b9d7SChristoph Hellwig 
227c59d87c4SChristoph Hellwig 	/*
22812bcb3f7SDave Chinner 	 * We don't want certain flags to appear in b_flags unless they are
22912bcb3f7SDave Chinner 	 * specifically set by later operations on the buffer.
230c59d87c4SChristoph Hellwig 	 */
231611c9946SDave Chinner 	flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD);
232c59d87c4SChristoph Hellwig 
233c59d87c4SChristoph Hellwig 	atomic_set(&bp->b_hold, 1);
234c59d87c4SChristoph Hellwig 	atomic_set(&bp->b_lru_ref, 1);
235c59d87c4SChristoph Hellwig 	init_completion(&bp->b_iowait);
236c59d87c4SChristoph Hellwig 	INIT_LIST_HEAD(&bp->b_lru);
237c59d87c4SChristoph Hellwig 	INIT_LIST_HEAD(&bp->b_list);
238643c8c05SCarlos Maiolino 	INIT_LIST_HEAD(&bp->b_li_list);
239c59d87c4SChristoph Hellwig 	sema_init(&bp->b_sema, 0); /* held, no waiters */
240a4082357SDave Chinner 	spin_lock_init(&bp->b_lock);
241c59d87c4SChristoph Hellwig 	bp->b_target = target;
242dbd329f1SChristoph Hellwig 	bp->b_mount = target->bt_mount;
2433e85c868SDave Chinner 	bp->b_flags = flags;
244de1cbee4SDave Chinner 
245c59d87c4SChristoph Hellwig 	/*
246aa0e8833SDave Chinner 	 * Set length and io_length to the same value initially.
247aa0e8833SDave Chinner 	 * I/O routines should use io_length, which will be the same in
248c59d87c4SChristoph Hellwig 	 * most cases but may be reset (e.g. XFS recovery).
249c59d87c4SChristoph Hellwig 	 */
2503e85c868SDave Chinner 	error = xfs_buf_get_maps(bp, nmaps);
2513e85c868SDave Chinner 	if (error)  {
252182696fbSDarrick J. Wong 		kmem_cache_free(xfs_buf_cache, bp);
25332dff5e5SDarrick J. Wong 		return error;
2543e85c868SDave Chinner 	}
2553e85c868SDave Chinner 
2564c7f65aeSDave Chinner 	bp->b_rhash_key = map[0].bm_bn;
2573e85c868SDave Chinner 	bp->b_length = 0;
2583e85c868SDave Chinner 	for (i = 0; i < nmaps; i++) {
2593e85c868SDave Chinner 		bp->b_maps[i].bm_bn = map[i].bm_bn;
2603e85c868SDave Chinner 		bp->b_maps[i].bm_len = map[i].bm_len;
2613e85c868SDave Chinner 		bp->b_length += map[i].bm_len;
2623e85c868SDave Chinner 	}
2633e85c868SDave Chinner 
264c59d87c4SChristoph Hellwig 	atomic_set(&bp->b_pin_count, 0);
265c59d87c4SChristoph Hellwig 	init_waitqueue_head(&bp->b_waiters);
266c59d87c4SChristoph Hellwig 
267dbd329f1SChristoph Hellwig 	XFS_STATS_INC(bp->b_mount, xb_create);
268c59d87c4SChristoph Hellwig 	trace_xfs_buf_init(bp, _RET_IP_);
2694347b9d7SChristoph Hellwig 
27032dff5e5SDarrick J. Wong 	*bpp = bp;
27132dff5e5SDarrick J. Wong 	return 0;
272c59d87c4SChristoph Hellwig }
273c59d87c4SChristoph Hellwig 
274e7d236a6SDave Chinner static void
xfs_buf_free_pages(struct xfs_buf * bp)275e7d236a6SDave Chinner xfs_buf_free_pages(
276e8222613SDave Chinner 	struct xfs_buf	*bp)
277c59d87c4SChristoph Hellwig {
278e7d236a6SDave Chinner 	uint		i;
279e7d236a6SDave Chinner 
280e7d236a6SDave Chinner 	ASSERT(bp->b_flags & _XBF_PAGES);
281e7d236a6SDave Chinner 
282e7d236a6SDave Chinner 	if (xfs_buf_is_vmapped(bp))
28354cd3aa6SChristoph Hellwig 		vm_unmap_ram(bp->b_addr, bp->b_page_count);
284e7d236a6SDave Chinner 
285e7d236a6SDave Chinner 	for (i = 0; i < bp->b_page_count; i++) {
286e7d236a6SDave Chinner 		if (bp->b_pages[i])
287e7d236a6SDave Chinner 			__free_page(bp->b_pages[i]);
288e7d236a6SDave Chinner 	}
289c7b23b68SYosry Ahmed 	mm_account_reclaimed_pages(bp->b_page_count);
290e7d236a6SDave Chinner 
29102c51173SDave Chinner 	if (bp->b_pages != bp->b_page_array)
292c59d87c4SChristoph Hellwig 		kmem_free(bp->b_pages);
293c59d87c4SChristoph Hellwig 	bp->b_pages = NULL;
294e7d236a6SDave Chinner 	bp->b_flags &= ~_XBF_PAGES;
295c59d87c4SChristoph Hellwig }
296c59d87c4SChristoph Hellwig 
29725a40957SChristoph Hellwig static void
xfs_buf_free_callback(struct callback_head * cb)298298f3422SDave Chinner xfs_buf_free_callback(
299298f3422SDave Chinner 	struct callback_head	*cb)
300298f3422SDave Chinner {
301298f3422SDave Chinner 	struct xfs_buf		*bp = container_of(cb, struct xfs_buf, b_rcu);
302298f3422SDave Chinner 
303298f3422SDave Chinner 	xfs_buf_free_maps(bp);
304298f3422SDave Chinner 	kmem_cache_free(xfs_buf_cache, bp);
305298f3422SDave Chinner }
306298f3422SDave Chinner 
307298f3422SDave Chinner static void
xfs_buf_free(struct xfs_buf * bp)308c59d87c4SChristoph Hellwig xfs_buf_free(
309e8222613SDave Chinner 	struct xfs_buf		*bp)
310c59d87c4SChristoph Hellwig {
311c59d87c4SChristoph Hellwig 	trace_xfs_buf_free(bp, _RET_IP_);
312c59d87c4SChristoph Hellwig 
313c59d87c4SChristoph Hellwig 	ASSERT(list_empty(&bp->b_lru));
314c59d87c4SChristoph Hellwig 
315e7d236a6SDave Chinner 	if (bp->b_flags & _XBF_PAGES)
316e7d236a6SDave Chinner 		xfs_buf_free_pages(bp);
317e7d236a6SDave Chinner 	else if (bp->b_flags & _XBF_KMEM)
318c59d87c4SChristoph Hellwig 		kmem_free(bp->b_addr);
319e7d236a6SDave Chinner 
320298f3422SDave Chinner 	call_rcu(&bp->b_rcu, xfs_buf_free_callback);
321c59d87c4SChristoph Hellwig }
322c59d87c4SChristoph Hellwig 
3230a683794SDave Chinner static int
xfs_buf_alloc_kmem(struct xfs_buf * bp,xfs_buf_flags_t flags)3240a683794SDave Chinner xfs_buf_alloc_kmem(
325e8222613SDave Chinner 	struct xfs_buf	*bp,
3260a683794SDave Chinner 	xfs_buf_flags_t	flags)
327c59d87c4SChristoph Hellwig {
3280a683794SDave Chinner 	xfs_km_flags_t	kmflag_mask = KM_NOFS;
3298bcac744SDave Chinner 	size_t		size = BBTOB(bp->b_length);
3300a683794SDave Chinner 
3310a683794SDave Chinner 	/* Assure zeroed buffer for non-read cases. */
3320a683794SDave Chinner 	if (!(flags & XBF_READ))
3330a683794SDave Chinner 		kmflag_mask |= KM_ZERO;
3340a683794SDave Chinner 
33598fe2c3cSDave Chinner 	bp->b_addr = kmem_alloc(size, kmflag_mask);
3360a683794SDave Chinner 	if (!bp->b_addr)
3370a683794SDave Chinner 		return -ENOMEM;
338c59d87c4SChristoph Hellwig 
339795cac72SDave Chinner 	if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
340c59d87c4SChristoph Hellwig 	    ((unsigned long)bp->b_addr & PAGE_MASK)) {
341c59d87c4SChristoph Hellwig 		/* b_addr spans two pages - use alloc_page instead */
342c59d87c4SChristoph Hellwig 		kmem_free(bp->b_addr);
343c59d87c4SChristoph Hellwig 		bp->b_addr = NULL;
3440a683794SDave Chinner 		return -ENOMEM;
345c59d87c4SChristoph Hellwig 	}
346c59d87c4SChristoph Hellwig 	bp->b_offset = offset_in_page(bp->b_addr);
347c59d87c4SChristoph Hellwig 	bp->b_pages = bp->b_page_array;
348f8f9ee47SDave Chinner 	bp->b_pages[0] = kmem_to_page(bp->b_addr);
349c59d87c4SChristoph Hellwig 	bp->b_page_count = 1;
350611c9946SDave Chinner 	bp->b_flags |= _XBF_KMEM;
351c59d87c4SChristoph Hellwig 	return 0;
352c59d87c4SChristoph Hellwig }
353c59d87c4SChristoph Hellwig 
3540a683794SDave Chinner static int
xfs_buf_alloc_pages(struct xfs_buf * bp,xfs_buf_flags_t flags)3550a683794SDave Chinner xfs_buf_alloc_pages(
3560a683794SDave Chinner 	struct xfs_buf	*bp,
3570a683794SDave Chinner 	xfs_buf_flags_t	flags)
3580a683794SDave Chinner {
359289ae7b4SDave Chinner 	gfp_t		gfp_mask = __GFP_NOWARN;
360c9fa5630SDave Chinner 	long		filled = 0;
3610a683794SDave Chinner 
362289ae7b4SDave Chinner 	if (flags & XBF_READ_AHEAD)
363289ae7b4SDave Chinner 		gfp_mask |= __GFP_NORETRY;
364289ae7b4SDave Chinner 	else
365289ae7b4SDave Chinner 		gfp_mask |= GFP_NOFS;
366289ae7b4SDave Chinner 
36702c51173SDave Chinner 	/* Make sure that we have a page list */
368934d1076SChristoph Hellwig 	bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
36902c51173SDave Chinner 	if (bp->b_page_count <= XB_PAGES) {
37002c51173SDave Chinner 		bp->b_pages = bp->b_page_array;
37102c51173SDave Chinner 	} else {
37202c51173SDave Chinner 		bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count,
37302c51173SDave Chinner 					gfp_mask);
37402c51173SDave Chinner 		if (!bp->b_pages)
37502c51173SDave Chinner 			return -ENOMEM;
37602c51173SDave Chinner 	}
37702c51173SDave Chinner 	bp->b_flags |= _XBF_PAGES;
37802c51173SDave Chinner 
3790a683794SDave Chinner 	/* Assure zeroed buffer for non-read cases. */
3800a683794SDave Chinner 	if (!(flags & XBF_READ))
3810a683794SDave Chinner 		gfp_mask |= __GFP_ZERO;
3820a683794SDave Chinner 
383c9fa5630SDave Chinner 	/*
384c9fa5630SDave Chinner 	 * Bulk filling of pages can take multiple calls. Not filling the entire
385c9fa5630SDave Chinner 	 * array is not an allocation failure, so don't back off if we get at
386c9fa5630SDave Chinner 	 * least one extra page.
387c9fa5630SDave Chinner 	 */
388c9fa5630SDave Chinner 	for (;;) {
389c9fa5630SDave Chinner 		long	last = filled;
390c9fa5630SDave Chinner 
391c9fa5630SDave Chinner 		filled = alloc_pages_bulk_array(gfp_mask, bp->b_page_count,
392c9fa5630SDave Chinner 						bp->b_pages);
393c9fa5630SDave Chinner 		if (filled == bp->b_page_count) {
394c9fa5630SDave Chinner 			XFS_STATS_INC(bp->b_mount, xb_page_found);
395c9fa5630SDave Chinner 			break;
396c9fa5630SDave Chinner 		}
397c9fa5630SDave Chinner 
398c9fa5630SDave Chinner 		if (filled != last)
399c9fa5630SDave Chinner 			continue;
400c9fa5630SDave Chinner 
401c59d87c4SChristoph Hellwig 		if (flags & XBF_READ_AHEAD) {
402e7d236a6SDave Chinner 			xfs_buf_free_pages(bp);
403e7d236a6SDave Chinner 			return -ENOMEM;
404c59d87c4SChristoph Hellwig 		}
405c59d87c4SChristoph Hellwig 
406dbd329f1SChristoph Hellwig 		XFS_STATS_INC(bp->b_mount, xb_page_retries);
4074034247aSNeilBrown 		memalloc_retry_wait(gfp_mask);
408c59d87c4SChristoph Hellwig 	}
409c59d87c4SChristoph Hellwig 	return 0;
410c59d87c4SChristoph Hellwig }
411c59d87c4SChristoph Hellwig 
412c59d87c4SChristoph Hellwig /*
413c59d87c4SChristoph Hellwig  *	Map buffer into kernel address-space if necessary.
414c59d87c4SChristoph Hellwig  */
415c59d87c4SChristoph Hellwig STATIC int
_xfs_buf_map_pages(struct xfs_buf * bp,xfs_buf_flags_t flags)416c59d87c4SChristoph Hellwig _xfs_buf_map_pages(
417e8222613SDave Chinner 	struct xfs_buf		*bp,
418b9b3fe15SDave Chinner 	xfs_buf_flags_t		flags)
419c59d87c4SChristoph Hellwig {
420c59d87c4SChristoph Hellwig 	ASSERT(bp->b_flags & _XBF_PAGES);
421c59d87c4SChristoph Hellwig 	if (bp->b_page_count == 1) {
422c59d87c4SChristoph Hellwig 		/* A single page buffer is always mappable */
42354cd3aa6SChristoph Hellwig 		bp->b_addr = page_address(bp->b_pages[0]);
424611c9946SDave Chinner 	} else if (flags & XBF_UNMAPPED) {
425611c9946SDave Chinner 		bp->b_addr = NULL;
426611c9946SDave Chinner 	} else {
427c59d87c4SChristoph Hellwig 		int retried = 0;
4289ba1fb2cSMichal Hocko 		unsigned nofs_flag;
429c59d87c4SChristoph Hellwig 
430ae687e58SDave Chinner 		/*
431cf085a1bSJoe Perches 		 * vm_map_ram() will allocate auxiliary structures (e.g.
432ae687e58SDave Chinner 		 * pagetables) with GFP_KERNEL, yet we are likely to be under
433ae687e58SDave Chinner 		 * GFP_NOFS context here. Hence we need to tell memory reclaim
4349ba1fb2cSMichal Hocko 		 * that we are in such a context via PF_MEMALLOC_NOFS to prevent
435ae687e58SDave Chinner 		 * memory reclaim re-entering the filesystem here and
436ae687e58SDave Chinner 		 * potentially deadlocking.
437ae687e58SDave Chinner 		 */
4389ba1fb2cSMichal Hocko 		nofs_flag = memalloc_nofs_save();
439c59d87c4SChristoph Hellwig 		do {
440c59d87c4SChristoph Hellwig 			bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
441d4efd79aSChristoph Hellwig 						-1);
442c59d87c4SChristoph Hellwig 			if (bp->b_addr)
443c59d87c4SChristoph Hellwig 				break;
444c59d87c4SChristoph Hellwig 			vm_unmap_aliases();
445c59d87c4SChristoph Hellwig 		} while (retried++ <= 1);
4469ba1fb2cSMichal Hocko 		memalloc_nofs_restore(nofs_flag);
447c59d87c4SChristoph Hellwig 
448c59d87c4SChristoph Hellwig 		if (!bp->b_addr)
449c59d87c4SChristoph Hellwig 			return -ENOMEM;
450c59d87c4SChristoph Hellwig 	}
451c59d87c4SChristoph Hellwig 
452c59d87c4SChristoph Hellwig 	return 0;
453c59d87c4SChristoph Hellwig }
454c59d87c4SChristoph Hellwig 
455c59d87c4SChristoph Hellwig /*
456c59d87c4SChristoph Hellwig  *	Finding and Reading Buffers
457c59d87c4SChristoph Hellwig  */
4586031e73aSLucas Stach static int
_xfs_buf_obj_cmp(struct rhashtable_compare_arg * arg,const void * obj)4596031e73aSLucas Stach _xfs_buf_obj_cmp(
4606031e73aSLucas Stach 	struct rhashtable_compare_arg	*arg,
4616031e73aSLucas Stach 	const void			*obj)
4626031e73aSLucas Stach {
4636031e73aSLucas Stach 	const struct xfs_buf_map	*map = arg->key;
4646031e73aSLucas Stach 	const struct xfs_buf		*bp = obj;
4656031e73aSLucas Stach 
4666031e73aSLucas Stach 	/*
4676031e73aSLucas Stach 	 * The key hashing in the lookup path depends on the key being the
4686031e73aSLucas Stach 	 * first element of the compare_arg, make sure to assert this.
4696031e73aSLucas Stach 	 */
4706031e73aSLucas Stach 	BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
4716031e73aSLucas Stach 
4724c7f65aeSDave Chinner 	if (bp->b_rhash_key != map->bm_bn)
4736031e73aSLucas Stach 		return 1;
4746031e73aSLucas Stach 
4756031e73aSLucas Stach 	if (unlikely(bp->b_length != map->bm_len)) {
4766031e73aSLucas Stach 		/*
4776031e73aSLucas Stach 		 * found a block number match. If the range doesn't
4786031e73aSLucas Stach 		 * match, the only way this is allowed is if the buffer
4796031e73aSLucas Stach 		 * in the cache is stale and the transaction that made
4806031e73aSLucas Stach 		 * it stale has not yet committed. i.e. we are
4816031e73aSLucas Stach 		 * reallocating a busy extent. Skip this buffer and
4826031e73aSLucas Stach 		 * continue searching for an exact match.
4836031e73aSLucas Stach 		 */
4849ed851f6SDarrick J. Wong 		if (!(map->bm_flags & XBM_LIVESCAN))
4856031e73aSLucas Stach 			ASSERT(bp->b_flags & XBF_STALE);
4866031e73aSLucas Stach 		return 1;
4876031e73aSLucas Stach 	}
4886031e73aSLucas Stach 	return 0;
4896031e73aSLucas Stach }
4906031e73aSLucas Stach 
4916031e73aSLucas Stach static const struct rhashtable_params xfs_buf_hash_params = {
4926031e73aSLucas Stach 	.min_size		= 32,	/* empty AGs have minimal footprint */
4936031e73aSLucas Stach 	.nelem_hint		= 16,
4946031e73aSLucas Stach 	.key_len		= sizeof(xfs_daddr_t),
4954c7f65aeSDave Chinner 	.key_offset		= offsetof(struct xfs_buf, b_rhash_key),
4966031e73aSLucas Stach 	.head_offset		= offsetof(struct xfs_buf, b_rhash_head),
4976031e73aSLucas Stach 	.automatic_shrinking	= true,
4986031e73aSLucas Stach 	.obj_cmpfn		= _xfs_buf_obj_cmp,
4996031e73aSLucas Stach };
5006031e73aSLucas Stach 
5016031e73aSLucas Stach int
xfs_buf_hash_init(struct xfs_perag * pag)5026031e73aSLucas Stach xfs_buf_hash_init(
5036031e73aSLucas Stach 	struct xfs_perag	*pag)
5046031e73aSLucas Stach {
5056031e73aSLucas Stach 	spin_lock_init(&pag->pag_buf_lock);
5066031e73aSLucas Stach 	return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
5076031e73aSLucas Stach }
5086031e73aSLucas Stach 
5096031e73aSLucas Stach void
xfs_buf_hash_destroy(struct xfs_perag * pag)5106031e73aSLucas Stach xfs_buf_hash_destroy(
5116031e73aSLucas Stach 	struct xfs_perag	*pag)
5126031e73aSLucas Stach {
5136031e73aSLucas Stach 	rhashtable_destroy(&pag->pag_buf_hash);
5146031e73aSLucas Stach }
515c59d87c4SChristoph Hellwig 
516b027d4c9SDave Chinner static int
xfs_buf_map_verify(struct xfs_buftarg * btp,struct xfs_buf_map * map)517de67dc57SDave Chinner xfs_buf_map_verify(
518e70b73f8SDave Chinner 	struct xfs_buftarg	*btp,
519de67dc57SDave Chinner 	struct xfs_buf_map	*map)
520c59d87c4SChristoph Hellwig {
52110616b80SDave Chinner 	xfs_daddr_t		eofs;
522c59d87c4SChristoph Hellwig 
523c59d87c4SChristoph Hellwig 	/* Check for IOs smaller than the sector size / not sector aligned */
524de67dc57SDave Chinner 	ASSERT(!(BBTOB(map->bm_len) < btp->bt_meta_sectorsize));
525de67dc57SDave Chinner 	ASSERT(!(BBTOB(map->bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
526c59d87c4SChristoph Hellwig 
52710616b80SDave Chinner 	/*
52810616b80SDave Chinner 	 * Corrupted block numbers can get through to here, unfortunately, so we
52910616b80SDave Chinner 	 * have to check that the buffer falls within the filesystem bounds.
53010616b80SDave Chinner 	 */
53110616b80SDave Chinner 	eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
532de67dc57SDave Chinner 	if (map->bm_bn < 0 || map->bm_bn >= eofs) {
53310616b80SDave Chinner 		xfs_alert(btp->bt_mount,
534c219b015SDarrick J. Wong 			  "%s: daddr 0x%llx out of range, EOFS 0x%llx",
535de67dc57SDave Chinner 			  __func__, map->bm_bn, eofs);
5367bc0dc27SDave Chinner 		WARN_ON(1);
537b027d4c9SDave Chinner 		return -EFSCORRUPTED;
53810616b80SDave Chinner 	}
539b027d4c9SDave Chinner 	return 0;
540de67dc57SDave Chinner }
541c59d87c4SChristoph Hellwig 
542de67dc57SDave Chinner static int
xfs_buf_find_lock(struct xfs_buf * bp,xfs_buf_flags_t flags)543de67dc57SDave Chinner xfs_buf_find_lock(
544de67dc57SDave Chinner 	struct xfs_buf          *bp,
545de67dc57SDave Chinner 	xfs_buf_flags_t		flags)
546de67dc57SDave Chinner {
547c59d87c4SChristoph Hellwig 	if (flags & XBF_TRYLOCK) {
548d8d9bbb0SDave Chinner 		if (!xfs_buf_trylock(bp)) {
549de67dc57SDave Chinner 			XFS_STATS_INC(bp->b_mount, xb_busy_locked);
550b027d4c9SDave Chinner 			return -EAGAIN;
551c59d87c4SChristoph Hellwig 		}
552d8d9bbb0SDave Chinner 	} else {
553c59d87c4SChristoph Hellwig 		xfs_buf_lock(bp);
554de67dc57SDave Chinner 		XFS_STATS_INC(bp->b_mount, xb_get_locked_waited);
555c59d87c4SChristoph Hellwig 	}
556c59d87c4SChristoph Hellwig 
557c59d87c4SChristoph Hellwig 	/*
558c59d87c4SChristoph Hellwig 	 * if the buffer is stale, clear all the external state associated with
559c59d87c4SChristoph Hellwig 	 * it. We need to keep flags such as how we allocated the buffer memory
560c59d87c4SChristoph Hellwig 	 * intact here.
561c59d87c4SChristoph Hellwig 	 */
562c59d87c4SChristoph Hellwig 	if (bp->b_flags & XBF_STALE) {
5639ed851f6SDarrick J. Wong 		if (flags & XBF_LIVESCAN) {
5649ed851f6SDarrick J. Wong 			xfs_buf_unlock(bp);
5659ed851f6SDarrick J. Wong 			return -ENOENT;
5669ed851f6SDarrick J. Wong 		}
567c59d87c4SChristoph Hellwig 		ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
568611c9946SDave Chinner 		bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
5691813dd64SDave Chinner 		bp->b_ops = NULL;
570c59d87c4SChristoph Hellwig 	}
571b027d4c9SDave Chinner 	return 0;
572c59d87c4SChristoph Hellwig }
573c59d87c4SChristoph Hellwig 
57434800080SDave Chinner static inline int
xfs_buf_lookup(struct xfs_perag * pag,struct xfs_buf_map * map,xfs_buf_flags_t flags,struct xfs_buf ** bpp)575de67dc57SDave Chinner xfs_buf_lookup(
576de67dc57SDave Chinner 	struct xfs_perag	*pag,
57734800080SDave Chinner 	struct xfs_buf_map	*map,
57834800080SDave Chinner 	xfs_buf_flags_t		flags,
57934800080SDave Chinner 	struct xfs_buf		**bpp)
5808925a3dcSDave Chinner {
581b027d4c9SDave Chinner 	struct xfs_buf          *bp;
582b027d4c9SDave Chinner 	int			error;
583b027d4c9SDave Chinner 
584298f3422SDave Chinner 	rcu_read_lock();
585de67dc57SDave Chinner 	bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params);
586298f3422SDave Chinner 	if (!bp || !atomic_inc_not_zero(&bp->b_hold)) {
587298f3422SDave Chinner 		rcu_read_unlock();
58834800080SDave Chinner 		return -ENOENT;
58934800080SDave Chinner 	}
590298f3422SDave Chinner 	rcu_read_unlock();
59134800080SDave Chinner 
59234800080SDave Chinner 	error = xfs_buf_find_lock(bp, flags);
59334800080SDave Chinner 	if (error) {
59434800080SDave Chinner 		xfs_buf_rele(bp);
59534800080SDave Chinner 		return error;
59634800080SDave Chinner 	}
59734800080SDave Chinner 
59834800080SDave Chinner 	trace_xfs_buf_find(bp, flags, _RET_IP_);
59934800080SDave Chinner 	*bpp = bp;
60034800080SDave Chinner 	return 0;
6018925a3dcSDave Chinner }
6028925a3dcSDave Chinner 
603c59d87c4SChristoph Hellwig /*
604de67dc57SDave Chinner  * Insert the new_bp into the hash table. This consumes the perag reference
60534800080SDave Chinner  * taken for the lookup regardless of the result of the insert.
606c59d87c4SChristoph Hellwig  */
607de67dc57SDave Chinner static int
xfs_buf_find_insert(struct xfs_buftarg * btp,struct xfs_perag * pag,struct xfs_buf_map * cmap,struct xfs_buf_map * map,int nmaps,xfs_buf_flags_t flags,struct xfs_buf ** bpp)608de67dc57SDave Chinner xfs_buf_find_insert(
609de67dc57SDave Chinner 	struct xfs_buftarg	*btp,
610de67dc57SDave Chinner 	struct xfs_perag	*pag,
61134800080SDave Chinner 	struct xfs_buf_map	*cmap,
6126dde2707SDave Chinner 	struct xfs_buf_map	*map,
6136dde2707SDave Chinner 	int			nmaps,
6143848b5f6SDarrick J. Wong 	xfs_buf_flags_t		flags,
6153848b5f6SDarrick J. Wong 	struct xfs_buf		**bpp)
616c59d87c4SChristoph Hellwig {
6173815832aSDave Chinner 	struct xfs_buf		*new_bp;
61834800080SDave Chinner 	struct xfs_buf		*bp;
6199bb38aa0SShaokun Zhang 	int			error;
620c59d87c4SChristoph Hellwig 
62134800080SDave Chinner 	error = _xfs_buf_alloc(btp, map, nmaps, flags, &new_bp);
62232dff5e5SDarrick J. Wong 	if (error)
62334800080SDave Chinner 		goto out_drop_pag;
624c59d87c4SChristoph Hellwig 
6258bcac744SDave Chinner 	/*
6268bcac744SDave Chinner 	 * For buffers that fit entirely within a single page, first attempt to
6278bcac744SDave Chinner 	 * allocate the memory from the heap to minimise memory usage. If we
6288bcac744SDave Chinner 	 * can't get heap memory for these small buffers, we fall back to using
6298bcac744SDave Chinner 	 * the page allocator.
6308bcac744SDave Chinner 	 */
6318bcac744SDave Chinner 	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
6328bcac744SDave Chinner 	    xfs_buf_alloc_kmem(new_bp, flags) < 0) {
6338bcac744SDave Chinner 		error = xfs_buf_alloc_pages(new_bp, flags);
634170041f7SChristoph Hellwig 		if (error)
635170041f7SChristoph Hellwig 			goto out_free_buf;
6368bcac744SDave Chinner 	}
6373815832aSDave Chinner 
63834800080SDave Chinner 	spin_lock(&pag->pag_buf_lock);
63932dd4f9cSDave Chinner 	bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash,
64032dd4f9cSDave Chinner 			&new_bp->b_rhash_head, xfs_buf_hash_params);
64132dd4f9cSDave Chinner 	if (IS_ERR(bp)) {
64232dd4f9cSDave Chinner 		error = PTR_ERR(bp);
64332dd4f9cSDave Chinner 		spin_unlock(&pag->pag_buf_lock);
644170041f7SChristoph Hellwig 		goto out_free_buf;
64532dd4f9cSDave Chinner 	}
64634800080SDave Chinner 	if (bp) {
64732dd4f9cSDave Chinner 		/* found an existing buffer */
64834800080SDave Chinner 		atomic_inc(&bp->b_hold);
64934800080SDave Chinner 		spin_unlock(&pag->pag_buf_lock);
65034800080SDave Chinner 		error = xfs_buf_find_lock(bp, flags);
6513848b5f6SDarrick J. Wong 		if (error)
65234800080SDave Chinner 			xfs_buf_rele(bp);
65334800080SDave Chinner 		else
65434800080SDave Chinner 			*bpp = bp;
655fe2429b0SDave Chinner 		goto out_free_buf;
65634800080SDave Chinner 	}
657fe2429b0SDave Chinner 
65832dd4f9cSDave Chinner 	/* The new buffer keeps the perag reference until it is freed. */
65934800080SDave Chinner 	new_bp->b_pag = pag;
66034800080SDave Chinner 	spin_unlock(&pag->pag_buf_lock);
66134800080SDave Chinner 	*bpp = new_bp;
66234800080SDave Chinner 	return 0;
66334800080SDave Chinner 
66434800080SDave Chinner out_free_buf:
665fe2429b0SDave Chinner 	xfs_buf_free(new_bp);
66634800080SDave Chinner out_drop_pag:
66734800080SDave Chinner 	xfs_perag_put(pag);
66834800080SDave Chinner 	return error;
66934800080SDave Chinner }
670c59d87c4SChristoph Hellwig 
67134800080SDave Chinner /*
67234800080SDave Chinner  * Assembles a buffer covering the specified range. The code is optimised for
67334800080SDave Chinner  * cache hits, as metadata intensive workloads will see 3 orders of magnitude
67434800080SDave Chinner  * more hits than misses.
67534800080SDave Chinner  */
67634800080SDave Chinner int
xfs_buf_get_map(struct xfs_buftarg * btp,struct xfs_buf_map * map,int nmaps,xfs_buf_flags_t flags,struct xfs_buf ** bpp)67734800080SDave Chinner xfs_buf_get_map(
67834800080SDave Chinner 	struct xfs_buftarg	*btp,
67934800080SDave Chinner 	struct xfs_buf_map	*map,
68034800080SDave Chinner 	int			nmaps,
68134800080SDave Chinner 	xfs_buf_flags_t		flags,
68234800080SDave Chinner 	struct xfs_buf		**bpp)
68334800080SDave Chinner {
68434800080SDave Chinner 	struct xfs_perag	*pag;
68534800080SDave Chinner 	struct xfs_buf		*bp = NULL;
68634800080SDave Chinner 	struct xfs_buf_map	cmap = { .bm_bn = map[0].bm_bn };
68734800080SDave Chinner 	int			error;
68834800080SDave Chinner 	int			i;
68934800080SDave Chinner 
6909ed851f6SDarrick J. Wong 	if (flags & XBF_LIVESCAN)
6919ed851f6SDarrick J. Wong 		cmap.bm_flags |= XBM_LIVESCAN;
69234800080SDave Chinner 	for (i = 0; i < nmaps; i++)
69334800080SDave Chinner 		cmap.bm_len += map[i].bm_len;
69434800080SDave Chinner 
69534800080SDave Chinner 	error = xfs_buf_map_verify(btp, &cmap);
69634800080SDave Chinner 	if (error)
69734800080SDave Chinner 		return error;
69834800080SDave Chinner 
69934800080SDave Chinner 	pag = xfs_perag_get(btp->bt_mount,
70034800080SDave Chinner 			    xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
70134800080SDave Chinner 
70234800080SDave Chinner 	error = xfs_buf_lookup(pag, &cmap, flags, &bp);
70334800080SDave Chinner 	if (error && error != -ENOENT)
70434800080SDave Chinner 		goto out_put_perag;
70534800080SDave Chinner 
70634800080SDave Chinner 	/* cache hits always outnumber misses by at least 10:1 */
70734800080SDave Chinner 	if (unlikely(!bp)) {
70834800080SDave Chinner 		XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
70934800080SDave Chinner 
71034800080SDave Chinner 		if (flags & XBF_INCORE)
71134800080SDave Chinner 			goto out_put_perag;
71234800080SDave Chinner 
71334800080SDave Chinner 		/* xfs_buf_find_insert() consumes the perag reference. */
71434800080SDave Chinner 		error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
71534800080SDave Chinner 				flags, &bp);
71634800080SDave Chinner 		if (error)
71734800080SDave Chinner 			return error;
71834800080SDave Chinner 	} else {
71934800080SDave Chinner 		XFS_STATS_INC(btp->bt_mount, xb_get_locked);
72034800080SDave Chinner 		xfs_perag_put(pag);
72134800080SDave Chinner 	}
72234800080SDave Chinner 
72334800080SDave Chinner 	/* We do not hold a perag reference anymore. */
724611c9946SDave Chinner 	if (!bp->b_addr) {
725c59d87c4SChristoph Hellwig 		error = _xfs_buf_map_pages(bp, flags);
726c59d87c4SChristoph Hellwig 		if (unlikely(error)) {
72734800080SDave Chinner 			xfs_warn_ratelimited(btp->bt_mount,
72893baa55aSDarrick J. Wong 				"%s: failed to map %u pages", __func__,
72993baa55aSDarrick J. Wong 				bp->b_page_count);
730a8acad70SDave Chinner 			xfs_buf_relse(bp);
7313848b5f6SDarrick J. Wong 			return error;
732c59d87c4SChristoph Hellwig 		}
733c59d87c4SChristoph Hellwig 	}
734c59d87c4SChristoph Hellwig 
735b79f4a1cSDave Chinner 	/*
736b79f4a1cSDave Chinner 	 * Clear b_error if this is a lookup from a caller that doesn't expect
737b79f4a1cSDave Chinner 	 * valid data to be found in the buffer.
738b79f4a1cSDave Chinner 	 */
739b79f4a1cSDave Chinner 	if (!(flags & XBF_READ))
740b79f4a1cSDave Chinner 		xfs_buf_ioerror(bp, 0);
741b79f4a1cSDave Chinner 
74234800080SDave Chinner 	XFS_STATS_INC(btp->bt_mount, xb_get);
743c59d87c4SChristoph Hellwig 	trace_xfs_buf_get(bp, flags, _RET_IP_);
7443848b5f6SDarrick J. Wong 	*bpp = bp;
7453848b5f6SDarrick J. Wong 	return 0;
74634800080SDave Chinner 
74734800080SDave Chinner out_put_perag:
74834800080SDave Chinner 	xfs_perag_put(pag);
749170041f7SChristoph Hellwig 	return error;
750c59d87c4SChristoph Hellwig }
751c59d87c4SChristoph Hellwig 
75226e32875SChristoph Hellwig int
_xfs_buf_read(struct xfs_buf * bp,xfs_buf_flags_t flags)753c59d87c4SChristoph Hellwig _xfs_buf_read(
754e8222613SDave Chinner 	struct xfs_buf		*bp,
755c59d87c4SChristoph Hellwig 	xfs_buf_flags_t		flags)
756c59d87c4SChristoph Hellwig {
75743ff2122SChristoph Hellwig 	ASSERT(!(flags & XBF_WRITE));
758f4b42421SMark Tinguely 	ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL);
759c59d87c4SChristoph Hellwig 
76026e32875SChristoph Hellwig 	bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE);
761c59d87c4SChristoph Hellwig 	bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
762c59d87c4SChristoph Hellwig 
7636af88cdaSBrian Foster 	return xfs_buf_submit(bp);
764c59d87c4SChristoph Hellwig }
765c59d87c4SChristoph Hellwig 
7661aff5696SDarrick J. Wong /*
76775d02303SBrian Foster  * Reverify a buffer found in cache without an attached ->b_ops.
768add46b3bSDarrick J. Wong  *
76975d02303SBrian Foster  * If the caller passed an ops structure and the buffer doesn't have ops
77075d02303SBrian Foster  * assigned, set the ops and use it to verify the contents. If verification
77175d02303SBrian Foster  * fails, clear XBF_DONE. We assume the buffer has no recorded errors and is
77275d02303SBrian Foster  * already in XBF_DONE state on entry.
773add46b3bSDarrick J. Wong  *
77475d02303SBrian Foster  * Under normal operations, every in-core buffer is verified on read I/O
77575d02303SBrian Foster  * completion. There are two scenarios that can lead to in-core buffers without
77675d02303SBrian Foster  * an assigned ->b_ops. The first is during log recovery of buffers on a V4
77775d02303SBrian Foster  * filesystem, though these buffers are purged at the end of recovery. The
77875d02303SBrian Foster  * other is online repair, which intentionally reads with a NULL buffer ops to
77975d02303SBrian Foster  * run several verifiers across an in-core buffer in order to establish buffer
78075d02303SBrian Foster  * type.  If repair can't establish that, the buffer will be left in memory
78175d02303SBrian Foster  * with NULL buffer ops.
7821aff5696SDarrick J. Wong  */
7831aff5696SDarrick J. Wong int
xfs_buf_reverify(struct xfs_buf * bp,const struct xfs_buf_ops * ops)78475d02303SBrian Foster xfs_buf_reverify(
7851aff5696SDarrick J. Wong 	struct xfs_buf		*bp,
7861aff5696SDarrick J. Wong 	const struct xfs_buf_ops *ops)
7871aff5696SDarrick J. Wong {
7881aff5696SDarrick J. Wong 	ASSERT(bp->b_flags & XBF_DONE);
7891aff5696SDarrick J. Wong 	ASSERT(bp->b_error == 0);
7901aff5696SDarrick J. Wong 
7911aff5696SDarrick J. Wong 	if (!ops || bp->b_ops)
7921aff5696SDarrick J. Wong 		return 0;
7931aff5696SDarrick J. Wong 
7941aff5696SDarrick J. Wong 	bp->b_ops = ops;
7951aff5696SDarrick J. Wong 	bp->b_ops->verify_read(bp);
7961aff5696SDarrick J. Wong 	if (bp->b_error)
7971aff5696SDarrick J. Wong 		bp->b_flags &= ~XBF_DONE;
7981aff5696SDarrick J. Wong 	return bp->b_error;
7991aff5696SDarrick J. Wong }
8001aff5696SDarrick J. Wong 
8014ed8e27bSDarrick J. Wong int
xfs_buf_read_map(struct xfs_buftarg * target,struct xfs_buf_map * map,int nmaps,xfs_buf_flags_t flags,struct xfs_buf ** bpp,const struct xfs_buf_ops * ops,xfs_failaddr_t fa)8026dde2707SDave Chinner xfs_buf_read_map(
8036dde2707SDave Chinner 	struct xfs_buftarg	*target,
8046dde2707SDave Chinner 	struct xfs_buf_map	*map,
8056dde2707SDave Chinner 	int			nmaps,
806c3f8fc73SDave Chinner 	xfs_buf_flags_t		flags,
8074ed8e27bSDarrick J. Wong 	struct xfs_buf		**bpp,
808cdbcf82bSDarrick J. Wong 	const struct xfs_buf_ops *ops,
809cdbcf82bSDarrick J. Wong 	xfs_failaddr_t		fa)
810c59d87c4SChristoph Hellwig {
8116dde2707SDave Chinner 	struct xfs_buf		*bp;
8123848b5f6SDarrick J. Wong 	int			error;
813c59d87c4SChristoph Hellwig 
814c59d87c4SChristoph Hellwig 	flags |= XBF_READ;
8154ed8e27bSDarrick J. Wong 	*bpp = NULL;
816c59d87c4SChristoph Hellwig 
8173848b5f6SDarrick J. Wong 	error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
8183848b5f6SDarrick J. Wong 	if (error)
8194ed8e27bSDarrick J. Wong 		return error;
8201aff5696SDarrick J. Wong 
821c59d87c4SChristoph Hellwig 	trace_xfs_buf_read(bp, flags, _RET_IP_);
822c59d87c4SChristoph Hellwig 
823b0388bf1SDave Chinner 	if (!(bp->b_flags & XBF_DONE)) {
8244ed8e27bSDarrick J. Wong 		/* Initiate the buffer read and wait. */
825ff6d6af2SBill O'Donnell 		XFS_STATS_INC(target->bt_mount, xb_get_read);
8261813dd64SDave Chinner 		bp->b_ops = ops;
8274ed8e27bSDarrick J. Wong 		error = _xfs_buf_read(bp, flags);
8281aff5696SDarrick J. Wong 
8294ed8e27bSDarrick J. Wong 		/* Readahead iodone already dropped the buffer, so exit. */
8304ed8e27bSDarrick J. Wong 		if (flags & XBF_ASYNC)
8314ed8e27bSDarrick J. Wong 			return 0;
8324ed8e27bSDarrick J. Wong 	} else {
8334ed8e27bSDarrick J. Wong 		/* Buffer already read; all we need to do is check it. */
8344ed8e27bSDarrick J. Wong 		error = xfs_buf_reverify(bp, ops);
8351aff5696SDarrick J. Wong 
8364ed8e27bSDarrick J. Wong 		/* Readahead already finished; drop the buffer and exit. */
8371aff5696SDarrick J. Wong 		if (flags & XBF_ASYNC) {
838a8acad70SDave Chinner 			xfs_buf_relse(bp);
8394ed8e27bSDarrick J. Wong 			return 0;
840c59d87c4SChristoph Hellwig 		}
841c59d87c4SChristoph Hellwig 
8421aff5696SDarrick J. Wong 		/* We do not want read in the flags */
8431aff5696SDarrick J. Wong 		bp->b_flags &= ~XBF_READ;
8441aff5696SDarrick J. Wong 		ASSERT(bp->b_ops != NULL || ops == NULL);
8454ed8e27bSDarrick J. Wong 	}
8464ed8e27bSDarrick J. Wong 
8474ed8e27bSDarrick J. Wong 	/*
8484ed8e27bSDarrick J. Wong 	 * If we've had a read error, then the contents of the buffer are
8494ed8e27bSDarrick J. Wong 	 * invalid and should not be used. To ensure that a followup read tries
8504ed8e27bSDarrick J. Wong 	 * to pull the buffer from disk again, we clear the XBF_DONE flag and
8514ed8e27bSDarrick J. Wong 	 * mark the buffer stale. This ensures that anyone who has a current
8524ed8e27bSDarrick J. Wong 	 * reference to the buffer will interpret it's contents correctly and
8534ed8e27bSDarrick J. Wong 	 * future cache lookups will also treat it as an empty, uninitialised
8544ed8e27bSDarrick J. Wong 	 * buffer.
8554ed8e27bSDarrick J. Wong 	 */
8564ed8e27bSDarrick J. Wong 	if (error) {
85701728b44SDave Chinner 		/*
85801728b44SDave Chinner 		 * Check against log shutdown for error reporting because
85901728b44SDave Chinner 		 * metadata writeback may require a read first and we need to
86001728b44SDave Chinner 		 * report errors in metadata writeback until the log is shut
86101728b44SDave Chinner 		 * down. High level transaction read functions already check
86201728b44SDave Chinner 		 * against mount shutdown, anyway, so we only need to be
86301728b44SDave Chinner 		 * concerned about low level IO interactions here.
86401728b44SDave Chinner 		 */
86501728b44SDave Chinner 		if (!xlog_is_shutdown(target->bt_mount->m_log))
866cdbcf82bSDarrick J. Wong 			xfs_buf_ioerror_alert(bp, fa);
8674ed8e27bSDarrick J. Wong 
8684ed8e27bSDarrick J. Wong 		bp->b_flags &= ~XBF_DONE;
8694ed8e27bSDarrick J. Wong 		xfs_buf_stale(bp);
8704ed8e27bSDarrick J. Wong 		xfs_buf_relse(bp);
8714ed8e27bSDarrick J. Wong 
8724ed8e27bSDarrick J. Wong 		/* bad CRC means corrupted metadata */
8734ed8e27bSDarrick J. Wong 		if (error == -EFSBADCRC)
8744ed8e27bSDarrick J. Wong 			error = -EFSCORRUPTED;
8754ed8e27bSDarrick J. Wong 		return error;
8764ed8e27bSDarrick J. Wong 	}
8774ed8e27bSDarrick J. Wong 
8784ed8e27bSDarrick J. Wong 	*bpp = bp;
8794ed8e27bSDarrick J. Wong 	return 0;
880c59d87c4SChristoph Hellwig }
881c59d87c4SChristoph Hellwig 
882c59d87c4SChristoph Hellwig /*
883c59d87c4SChristoph Hellwig  *	If we are not low on memory then do the readahead in a deadlock
884c59d87c4SChristoph Hellwig  *	safe manner.
885c59d87c4SChristoph Hellwig  */
886c59d87c4SChristoph Hellwig void
xfs_buf_readahead_map(struct xfs_buftarg * target,struct xfs_buf_map * map,int nmaps,const struct xfs_buf_ops * ops)8876dde2707SDave Chinner xfs_buf_readahead_map(
8886dde2707SDave Chinner 	struct xfs_buftarg	*target,
8896dde2707SDave Chinner 	struct xfs_buf_map	*map,
890c3f8fc73SDave Chinner 	int			nmaps,
8911813dd64SDave Chinner 	const struct xfs_buf_ops *ops)
892c59d87c4SChristoph Hellwig {
8934ed8e27bSDarrick J. Wong 	struct xfs_buf		*bp;
8944ed8e27bSDarrick J. Wong 
8956dde2707SDave Chinner 	xfs_buf_read_map(target, map, nmaps,
896cdbcf82bSDarrick J. Wong 		     XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
897cdbcf82bSDarrick J. Wong 		     __this_address);
898c59d87c4SChristoph Hellwig }
899c59d87c4SChristoph Hellwig 
900c59d87c4SChristoph Hellwig /*
901c59d87c4SChristoph Hellwig  * Read an uncached buffer from disk. Allocates and returns a locked
9024c7f65aeSDave Chinner  * buffer containing the disk contents or nothing. Uncached buffers always have
9034c7f65aeSDave Chinner  * a cache index of XFS_BUF_DADDR_NULL so we can easily determine if the buffer
9044c7f65aeSDave Chinner  * is cached or uncached during fault diagnosis.
905c59d87c4SChristoph Hellwig  */
906ba372674SDave Chinner int
xfs_buf_read_uncached(struct xfs_buftarg * target,xfs_daddr_t daddr,size_t numblks,xfs_buf_flags_t flags,struct xfs_buf ** bpp,const struct xfs_buf_ops * ops)907c59d87c4SChristoph Hellwig xfs_buf_read_uncached(
908c59d87c4SChristoph Hellwig 	struct xfs_buftarg	*target,
909c59d87c4SChristoph Hellwig 	xfs_daddr_t		daddr,
910e70b73f8SDave Chinner 	size_t			numblks,
911b9b3fe15SDave Chinner 	xfs_buf_flags_t		flags,
912ba372674SDave Chinner 	struct xfs_buf		**bpp,
9131813dd64SDave Chinner 	const struct xfs_buf_ops *ops)
914c59d87c4SChristoph Hellwig {
915eab4e633SDave Chinner 	struct xfs_buf		*bp;
9162842b6dbSDarrick J. Wong 	int			error;
917c59d87c4SChristoph Hellwig 
918ba372674SDave Chinner 	*bpp = NULL;
919ba372674SDave Chinner 
9202842b6dbSDarrick J. Wong 	error = xfs_buf_get_uncached(target, numblks, flags, &bp);
9212842b6dbSDarrick J. Wong 	if (error)
9222842b6dbSDarrick J. Wong 		return error;
923c59d87c4SChristoph Hellwig 
924c59d87c4SChristoph Hellwig 	/* set up the buffer for a read IO */
9253e85c868SDave Chinner 	ASSERT(bp->b_map_count == 1);
9264c7f65aeSDave Chinner 	bp->b_rhash_key = XFS_BUF_DADDR_NULL;
9273e85c868SDave Chinner 	bp->b_maps[0].bm_bn = daddr;
928cbb7baabSDave Chinner 	bp->b_flags |= XBF_READ;
9291813dd64SDave Chinner 	bp->b_ops = ops;
930c59d87c4SChristoph Hellwig 
9316af88cdaSBrian Foster 	xfs_buf_submit(bp);
932ba372674SDave Chinner 	if (bp->b_error) {
9332842b6dbSDarrick J. Wong 		error = bp->b_error;
93483a0adc3SChristoph Hellwig 		xfs_buf_relse(bp);
935ba372674SDave Chinner 		return error;
93683a0adc3SChristoph Hellwig 	}
937ba372674SDave Chinner 
938ba372674SDave Chinner 	*bpp = bp;
939ba372674SDave Chinner 	return 0;
940c59d87c4SChristoph Hellwig }
941c59d87c4SChristoph Hellwig 
9422842b6dbSDarrick J. Wong int
xfs_buf_get_uncached(struct xfs_buftarg * target,size_t numblks,xfs_buf_flags_t flags,struct xfs_buf ** bpp)943c59d87c4SChristoph Hellwig xfs_buf_get_uncached(
944c59d87c4SChristoph Hellwig 	struct xfs_buftarg	*target,
945e70b73f8SDave Chinner 	size_t			numblks,
946b9b3fe15SDave Chinner 	xfs_buf_flags_t		flags,
9472842b6dbSDarrick J. Wong 	struct xfs_buf		**bpp)
948c59d87c4SChristoph Hellwig {
94907b5c5adSDave Chinner 	int			error;
9503e85c868SDave Chinner 	struct xfs_buf		*bp;
9513e85c868SDave Chinner 	DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
952c59d87c4SChristoph Hellwig 
9532842b6dbSDarrick J. Wong 	*bpp = NULL;
9542842b6dbSDarrick J. Wong 
955c891c30aSBrian Foster 	/* flags might contain irrelevant bits, pass only what we care about */
95632dff5e5SDarrick J. Wong 	error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
95732dff5e5SDarrick J. Wong 	if (error)
95807b5c5adSDave Chinner 		return error;
959c59d87c4SChristoph Hellwig 
960934d1076SChristoph Hellwig 	error = xfs_buf_alloc_pages(bp, flags);
961c59d87c4SChristoph Hellwig 	if (error)
962c59d87c4SChristoph Hellwig 		goto fail_free_buf;
963c59d87c4SChristoph Hellwig 
964611c9946SDave Chinner 	error = _xfs_buf_map_pages(bp, 0);
965c59d87c4SChristoph Hellwig 	if (unlikely(error)) {
966c59d87c4SChristoph Hellwig 		xfs_warn(target->bt_mount,
96708e96e1aSEric Sandeen 			"%s: failed to map pages", __func__);
96807b5c5adSDave Chinner 		goto fail_free_buf;
969c59d87c4SChristoph Hellwig 	}
970c59d87c4SChristoph Hellwig 
971c59d87c4SChristoph Hellwig 	trace_xfs_buf_get_uncached(bp, _RET_IP_);
9722842b6dbSDarrick J. Wong 	*bpp = bp;
9732842b6dbSDarrick J. Wong 	return 0;
974c59d87c4SChristoph Hellwig 
975c59d87c4SChristoph Hellwig fail_free_buf:
97607b5c5adSDave Chinner 	xfs_buf_free(bp);
9772842b6dbSDarrick J. Wong 	return error;
978c59d87c4SChristoph Hellwig }
979c59d87c4SChristoph Hellwig 
980c59d87c4SChristoph Hellwig /*
981c59d87c4SChristoph Hellwig  *	Increment reference count on buffer, to hold the buffer concurrently
982c59d87c4SChristoph Hellwig  *	with another thread which may release (free) the buffer asynchronously.
983c59d87c4SChristoph Hellwig  *	Must hold the buffer already to call this function.
984c59d87c4SChristoph Hellwig  */
985c59d87c4SChristoph Hellwig void
xfs_buf_hold(struct xfs_buf * bp)986c59d87c4SChristoph Hellwig xfs_buf_hold(
987e8222613SDave Chinner 	struct xfs_buf		*bp)
988c59d87c4SChristoph Hellwig {
989c59d87c4SChristoph Hellwig 	trace_xfs_buf_hold(bp, _RET_IP_);
990c59d87c4SChristoph Hellwig 	atomic_inc(&bp->b_hold);
991c59d87c4SChristoph Hellwig }
992c59d87c4SChristoph Hellwig 
993c59d87c4SChristoph Hellwig /*
9949c7504aaSBrian Foster  * Release a hold on the specified buffer. If the hold count is 1, the buffer is
9959c7504aaSBrian Foster  * placed on LRU or freed (depending on b_lru_ref).
996c59d87c4SChristoph Hellwig  */
997c59d87c4SChristoph Hellwig void
xfs_buf_rele(struct xfs_buf * bp)998c59d87c4SChristoph Hellwig xfs_buf_rele(
999e8222613SDave Chinner 	struct xfs_buf		*bp)
1000c59d87c4SChristoph Hellwig {
1001c59d87c4SChristoph Hellwig 	struct xfs_perag	*pag = bp->b_pag;
10029c7504aaSBrian Foster 	bool			release;
10039c7504aaSBrian Foster 	bool			freebuf = false;
1004c59d87c4SChristoph Hellwig 
1005c59d87c4SChristoph Hellwig 	trace_xfs_buf_rele(bp, _RET_IP_);
1006c59d87c4SChristoph Hellwig 
1007c59d87c4SChristoph Hellwig 	if (!pag) {
1008c59d87c4SChristoph Hellwig 		ASSERT(list_empty(&bp->b_lru));
10099c7504aaSBrian Foster 		if (atomic_dec_and_test(&bp->b_hold)) {
10109c7504aaSBrian Foster 			xfs_buf_ioacct_dec(bp);
1011c59d87c4SChristoph Hellwig 			xfs_buf_free(bp);
10129c7504aaSBrian Foster 		}
1013c59d87c4SChristoph Hellwig 		return;
1014c59d87c4SChristoph Hellwig 	}
1015c59d87c4SChristoph Hellwig 
1016c59d87c4SChristoph Hellwig 	ASSERT(atomic_read(&bp->b_hold) > 0);
10179c7504aaSBrian Foster 
101837fd1678SDave Chinner 	/*
101937fd1678SDave Chinner 	 * We grab the b_lock here first to serialise racing xfs_buf_rele()
102037fd1678SDave Chinner 	 * calls. The pag_buf_lock being taken on the last reference only
102137fd1678SDave Chinner 	 * serialises against racing lookups in xfs_buf_find(). IOWs, the second
102237fd1678SDave Chinner 	 * to last reference we drop here is not serialised against the last
102337fd1678SDave Chinner 	 * reference until we take bp->b_lock. Hence if we don't grab b_lock
102437fd1678SDave Chinner 	 * first, the last "release" reference can win the race to the lock and
102537fd1678SDave Chinner 	 * free the buffer before the second-to-last reference is processed,
102637fd1678SDave Chinner 	 * leading to a use-after-free scenario.
102737fd1678SDave Chinner 	 */
1028a4082357SDave Chinner 	spin_lock(&bp->b_lock);
102937fd1678SDave Chinner 	release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
10309c7504aaSBrian Foster 	if (!release) {
10319c7504aaSBrian Foster 		/*
10329c7504aaSBrian Foster 		 * Drop the in-flight state if the buffer is already on the LRU
10339c7504aaSBrian Foster 		 * and it holds the only reference. This is racy because we
10349c7504aaSBrian Foster 		 * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT
10359c7504aaSBrian Foster 		 * ensures the decrement occurs only once per-buf.
10369c7504aaSBrian Foster 		 */
10379c7504aaSBrian Foster 		if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru))
103863db7c81SBrian Foster 			__xfs_buf_ioacct_dec(bp);
10399c7504aaSBrian Foster 		goto out_unlock;
10409c7504aaSBrian Foster 	}
10419c7504aaSBrian Foster 
10429c7504aaSBrian Foster 	/* the last reference has been dropped ... */
104363db7c81SBrian Foster 	__xfs_buf_ioacct_dec(bp);
1044a4082357SDave Chinner 	if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
1045a4082357SDave Chinner 		/*
10469c7504aaSBrian Foster 		 * If the buffer is added to the LRU take a new reference to the
10479c7504aaSBrian Foster 		 * buffer for the LRU and clear the (now stale) dispose list
10489c7504aaSBrian Foster 		 * state flag
1049a4082357SDave Chinner 		 */
1050a4082357SDave Chinner 		if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
1051a4082357SDave Chinner 			bp->b_state &= ~XFS_BSTATE_DISPOSE;
1052a4082357SDave Chinner 			atomic_inc(&bp->b_hold);
1053a4082357SDave Chinner 		}
1054c59d87c4SChristoph Hellwig 		spin_unlock(&pag->pag_buf_lock);
1055c59d87c4SChristoph Hellwig 	} else {
1056a4082357SDave Chinner 		/*
10579c7504aaSBrian Foster 		 * most of the time buffers will already be removed from the
10589c7504aaSBrian Foster 		 * LRU, so optimise that case by checking for the
10599c7504aaSBrian Foster 		 * XFS_BSTATE_DISPOSE flag indicating the last list the buffer
10609c7504aaSBrian Foster 		 * was on was the disposal list
1061a4082357SDave Chinner 		 */
1062a4082357SDave Chinner 		if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
1063a4082357SDave Chinner 			list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
1064a4082357SDave Chinner 		} else {
1065a4082357SDave Chinner 			ASSERT(list_empty(&bp->b_lru));
1066a4082357SDave Chinner 		}
1067a4082357SDave Chinner 
106843ff2122SChristoph Hellwig 		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
10696031e73aSLucas Stach 		rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
10706031e73aSLucas Stach 				       xfs_buf_hash_params);
1071c59d87c4SChristoph Hellwig 		spin_unlock(&pag->pag_buf_lock);
1072c59d87c4SChristoph Hellwig 		xfs_perag_put(pag);
10739c7504aaSBrian Foster 		freebuf = true;
10749c7504aaSBrian Foster 	}
10759c7504aaSBrian Foster 
10769c7504aaSBrian Foster out_unlock:
10779c7504aaSBrian Foster 	spin_unlock(&bp->b_lock);
10789c7504aaSBrian Foster 
10799c7504aaSBrian Foster 	if (freebuf)
1080c59d87c4SChristoph Hellwig 		xfs_buf_free(bp);
1081c59d87c4SChristoph Hellwig }
1082c59d87c4SChristoph Hellwig 
1083c59d87c4SChristoph Hellwig 
1084c59d87c4SChristoph Hellwig /*
1085c59d87c4SChristoph Hellwig  *	Lock a buffer object, if it is not already locked.
1086c59d87c4SChristoph Hellwig  *
1087c59d87c4SChristoph Hellwig  *	If we come across a stale, pinned, locked buffer, we know that we are
1088c59d87c4SChristoph Hellwig  *	being asked to lock a buffer that has been reallocated. Because it is
1089c59d87c4SChristoph Hellwig  *	pinned, we know that the log has not been pushed to disk and hence it
1090c59d87c4SChristoph Hellwig  *	will still be locked.  Rather than continuing to have trylock attempts
1091c59d87c4SChristoph Hellwig  *	fail until someone else pushes the log, push it ourselves before
1092c59d87c4SChristoph Hellwig  *	returning.  This means that the xfsaild will not get stuck trying
1093c59d87c4SChristoph Hellwig  *	to push on stale inode buffers.
1094c59d87c4SChristoph Hellwig  */
1095c59d87c4SChristoph Hellwig int
xfs_buf_trylock(struct xfs_buf * bp)1096c59d87c4SChristoph Hellwig xfs_buf_trylock(
1097c59d87c4SChristoph Hellwig 	struct xfs_buf		*bp)
1098c59d87c4SChristoph Hellwig {
1099c59d87c4SChristoph Hellwig 	int			locked;
1100c59d87c4SChristoph Hellwig 
1101c59d87c4SChristoph Hellwig 	locked = down_trylock(&bp->b_sema) == 0;
1102fa6c668dSEric Sandeen 	if (locked)
1103c59d87c4SChristoph Hellwig 		trace_xfs_buf_trylock(bp, _RET_IP_);
1104fa6c668dSEric Sandeen 	else
1105479c6412SDarrick J. Wong 		trace_xfs_buf_trylock_fail(bp, _RET_IP_);
1106c59d87c4SChristoph Hellwig 	return locked;
1107c59d87c4SChristoph Hellwig }
1108c59d87c4SChristoph Hellwig 
1109c59d87c4SChristoph Hellwig /*
1110c59d87c4SChristoph Hellwig  *	Lock a buffer object.
1111c59d87c4SChristoph Hellwig  *
1112c59d87c4SChristoph Hellwig  *	If we come across a stale, pinned, locked buffer, we know that we
1113c59d87c4SChristoph Hellwig  *	are being asked to lock a buffer that has been reallocated. Because
1114c59d87c4SChristoph Hellwig  *	it is pinned, we know that the log has not been pushed to disk and
1115c59d87c4SChristoph Hellwig  *	hence it will still be locked. Rather than sleeping until someone
1116c59d87c4SChristoph Hellwig  *	else pushes the log, push it ourselves before trying to get the lock.
1117c59d87c4SChristoph Hellwig  */
1118c59d87c4SChristoph Hellwig void
xfs_buf_lock(struct xfs_buf * bp)1119c59d87c4SChristoph Hellwig xfs_buf_lock(
1120c59d87c4SChristoph Hellwig 	struct xfs_buf		*bp)
1121c59d87c4SChristoph Hellwig {
1122c59d87c4SChristoph Hellwig 	trace_xfs_buf_lock(bp, _RET_IP_);
1123c59d87c4SChristoph Hellwig 
1124c59d87c4SChristoph Hellwig 	if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
1125dbd329f1SChristoph Hellwig 		xfs_log_force(bp->b_mount, 0);
1126c59d87c4SChristoph Hellwig 	down(&bp->b_sema);
1127c59d87c4SChristoph Hellwig 
1128c59d87c4SChristoph Hellwig 	trace_xfs_buf_lock_done(bp, _RET_IP_);
1129c59d87c4SChristoph Hellwig }
1130c59d87c4SChristoph Hellwig 
1131c59d87c4SChristoph Hellwig void
xfs_buf_unlock(struct xfs_buf * bp)1132c59d87c4SChristoph Hellwig xfs_buf_unlock(
1133c59d87c4SChristoph Hellwig 	struct xfs_buf		*bp)
1134c59d87c4SChristoph Hellwig {
113520e8a063SBrian Foster 	ASSERT(xfs_buf_islocked(bp));
113620e8a063SBrian Foster 
1137c59d87c4SChristoph Hellwig 	up(&bp->b_sema);
1138c59d87c4SChristoph Hellwig 	trace_xfs_buf_unlock(bp, _RET_IP_);
1139c59d87c4SChristoph Hellwig }
1140c59d87c4SChristoph Hellwig 
1141c59d87c4SChristoph Hellwig STATIC void
xfs_buf_wait_unpin(struct xfs_buf * bp)1142c59d87c4SChristoph Hellwig xfs_buf_wait_unpin(
1143e8222613SDave Chinner 	struct xfs_buf		*bp)
1144c59d87c4SChristoph Hellwig {
1145c59d87c4SChristoph Hellwig 	DECLARE_WAITQUEUE	(wait, current);
1146c59d87c4SChristoph Hellwig 
1147c59d87c4SChristoph Hellwig 	if (atomic_read(&bp->b_pin_count) == 0)
1148c59d87c4SChristoph Hellwig 		return;
1149c59d87c4SChristoph Hellwig 
1150c59d87c4SChristoph Hellwig 	add_wait_queue(&bp->b_waiters, &wait);
1151c59d87c4SChristoph Hellwig 	for (;;) {
1152c59d87c4SChristoph Hellwig 		set_current_state(TASK_UNINTERRUPTIBLE);
1153c59d87c4SChristoph Hellwig 		if (atomic_read(&bp->b_pin_count) == 0)
1154c59d87c4SChristoph Hellwig 			break;
1155c59d87c4SChristoph Hellwig 		io_schedule();
1156c59d87c4SChristoph Hellwig 	}
1157c59d87c4SChristoph Hellwig 	remove_wait_queue(&bp->b_waiters, &wait);
1158c59d87c4SChristoph Hellwig 	set_current_state(TASK_RUNNING);
1159c59d87c4SChristoph Hellwig }
1160c59d87c4SChristoph Hellwig 
1161f58d0ea9SChristoph Hellwig static void
xfs_buf_ioerror_alert_ratelimited(struct xfs_buf * bp)1162f58d0ea9SChristoph Hellwig xfs_buf_ioerror_alert_ratelimited(
1163664ffb8aSChristoph Hellwig 	struct xfs_buf		*bp)
1164664ffb8aSChristoph Hellwig {
1165664ffb8aSChristoph Hellwig 	static unsigned long	lasttime;
1166664ffb8aSChristoph Hellwig 	static struct xfs_buftarg *lasttarg;
1167664ffb8aSChristoph Hellwig 
1168664ffb8aSChristoph Hellwig 	if (bp->b_target != lasttarg ||
1169664ffb8aSChristoph Hellwig 	    time_after(jiffies, (lasttime + 5*HZ))) {
1170664ffb8aSChristoph Hellwig 		lasttime = jiffies;
1171664ffb8aSChristoph Hellwig 		xfs_buf_ioerror_alert(bp, __this_address);
1172664ffb8aSChristoph Hellwig 	}
1173664ffb8aSChristoph Hellwig 	lasttarg = bp->b_target;
1174664ffb8aSChristoph Hellwig }
1175664ffb8aSChristoph Hellwig 
1176664ffb8aSChristoph Hellwig /*
1177664ffb8aSChristoph Hellwig  * Account for this latest trip around the retry handler, and decide if
1178664ffb8aSChristoph Hellwig  * we've failed enough times to constitute a permanent failure.
1179664ffb8aSChristoph Hellwig  */
1180664ffb8aSChristoph Hellwig static bool
xfs_buf_ioerror_permanent(struct xfs_buf * bp,struct xfs_error_cfg * cfg)1181664ffb8aSChristoph Hellwig xfs_buf_ioerror_permanent(
1182664ffb8aSChristoph Hellwig 	struct xfs_buf		*bp,
1183664ffb8aSChristoph Hellwig 	struct xfs_error_cfg	*cfg)
1184664ffb8aSChristoph Hellwig {
1185664ffb8aSChristoph Hellwig 	struct xfs_mount	*mp = bp->b_mount;
1186664ffb8aSChristoph Hellwig 
1187664ffb8aSChristoph Hellwig 	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
1188664ffb8aSChristoph Hellwig 	    ++bp->b_retries > cfg->max_retries)
1189664ffb8aSChristoph Hellwig 		return true;
1190664ffb8aSChristoph Hellwig 	if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
1191664ffb8aSChristoph Hellwig 	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
1192664ffb8aSChristoph Hellwig 		return true;
1193664ffb8aSChristoph Hellwig 
1194664ffb8aSChristoph Hellwig 	/* At unmount we may treat errors differently */
11952e973b2cSDave Chinner 	if (xfs_is_unmounting(mp) && mp->m_fail_unmount)
1196664ffb8aSChristoph Hellwig 		return true;
1197664ffb8aSChristoph Hellwig 
1198664ffb8aSChristoph Hellwig 	return false;
1199664ffb8aSChristoph Hellwig }
1200664ffb8aSChristoph Hellwig 
1201664ffb8aSChristoph Hellwig /*
1202664ffb8aSChristoph Hellwig  * On a sync write or shutdown we just want to stale the buffer and let the
1203664ffb8aSChristoph Hellwig  * caller handle the error in bp->b_error appropriately.
1204664ffb8aSChristoph Hellwig  *
1205664ffb8aSChristoph Hellwig  * If the write was asynchronous then no one will be looking for the error.  If
1206664ffb8aSChristoph Hellwig  * this is the first failure of this type, clear the error state and write the
1207664ffb8aSChristoph Hellwig  * buffer out again. This means we always retry an async write failure at least
1208664ffb8aSChristoph Hellwig  * once, but we also need to set the buffer up to behave correctly now for
1209664ffb8aSChristoph Hellwig  * repeated failures.
1210664ffb8aSChristoph Hellwig  *
1211664ffb8aSChristoph Hellwig  * If we get repeated async write failures, then we take action according to the
1212664ffb8aSChristoph Hellwig  * error configuration we have been set up to use.
1213664ffb8aSChristoph Hellwig  *
121470796c6bSChristoph Hellwig  * Returns true if this function took care of error handling and the caller must
121570796c6bSChristoph Hellwig  * not touch the buffer again.  Return false if the caller should proceed with
121670796c6bSChristoph Hellwig  * normal I/O completion handling.
1217664ffb8aSChristoph Hellwig  */
121870796c6bSChristoph Hellwig static bool
xfs_buf_ioend_handle_error(struct xfs_buf * bp)121970796c6bSChristoph Hellwig xfs_buf_ioend_handle_error(
1220664ffb8aSChristoph Hellwig 	struct xfs_buf		*bp)
1221664ffb8aSChristoph Hellwig {
1222664ffb8aSChristoph Hellwig 	struct xfs_mount	*mp = bp->b_mount;
1223664ffb8aSChristoph Hellwig 	struct xfs_error_cfg	*cfg;
1224664ffb8aSChristoph Hellwig 
1225f58d0ea9SChristoph Hellwig 	/*
122601728b44SDave Chinner 	 * If we've already shutdown the journal because of I/O errors, there's
122701728b44SDave Chinner 	 * no point in giving this a retry.
1228f58d0ea9SChristoph Hellwig 	 */
122901728b44SDave Chinner 	if (xlog_is_shutdown(mp->m_log))
1230f58d0ea9SChristoph Hellwig 		goto out_stale;
1231f58d0ea9SChristoph Hellwig 
1232f58d0ea9SChristoph Hellwig 	xfs_buf_ioerror_alert_ratelimited(bp);
1233f58d0ea9SChristoph Hellwig 
1234f58d0ea9SChristoph Hellwig 	/*
123522c10589SChristoph Hellwig 	 * We're not going to bother about retrying this during recovery.
123622c10589SChristoph Hellwig 	 * One strike!
123722c10589SChristoph Hellwig 	 */
123822c10589SChristoph Hellwig 	if (bp->b_flags & _XBF_LOGRECOVERY) {
123922c10589SChristoph Hellwig 		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
124022c10589SChristoph Hellwig 		return false;
124122c10589SChristoph Hellwig 	}
124222c10589SChristoph Hellwig 
124322c10589SChristoph Hellwig 	/*
1244f58d0ea9SChristoph Hellwig 	 * Synchronous writes will have callers process the error.
1245f58d0ea9SChristoph Hellwig 	 */
1246f58d0ea9SChristoph Hellwig 	if (!(bp->b_flags & XBF_ASYNC))
1247664ffb8aSChristoph Hellwig 		goto out_stale;
1248664ffb8aSChristoph Hellwig 
1249664ffb8aSChristoph Hellwig 	trace_xfs_buf_iodone_async(bp, _RET_IP_);
1250664ffb8aSChristoph Hellwig 
1251664ffb8aSChristoph Hellwig 	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
12523cc49884SChristoph Hellwig 	if (bp->b_last_error != bp->b_error ||
12533cc49884SChristoph Hellwig 	    !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) {
12543cc49884SChristoph Hellwig 		bp->b_last_error = bp->b_error;
12553cc49884SChristoph Hellwig 		if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER &&
12563cc49884SChristoph Hellwig 		    !bp->b_first_retry_time)
12573cc49884SChristoph Hellwig 			bp->b_first_retry_time = jiffies;
12583cc49884SChristoph Hellwig 		goto resubmit;
1259664ffb8aSChristoph Hellwig 	}
1260664ffb8aSChristoph Hellwig 
1261664ffb8aSChristoph Hellwig 	/*
1262664ffb8aSChristoph Hellwig 	 * Permanent error - we need to trigger a shutdown if we haven't already
1263664ffb8aSChristoph Hellwig 	 * to indicate that inconsistency will result from this action.
1264664ffb8aSChristoph Hellwig 	 */
1265664ffb8aSChristoph Hellwig 	if (xfs_buf_ioerror_permanent(bp, cfg)) {
1266664ffb8aSChristoph Hellwig 		xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1267664ffb8aSChristoph Hellwig 		goto out_stale;
1268664ffb8aSChristoph Hellwig 	}
1269664ffb8aSChristoph Hellwig 
1270664ffb8aSChristoph Hellwig 	/* Still considered a transient error. Caller will schedule retries. */
1271844c9358SChristoph Hellwig 	if (bp->b_flags & _XBF_INODES)
1272844c9358SChristoph Hellwig 		xfs_buf_inode_io_fail(bp);
1273844c9358SChristoph Hellwig 	else if (bp->b_flags & _XBF_DQUOTS)
1274844c9358SChristoph Hellwig 		xfs_buf_dquot_io_fail(bp);
1275844c9358SChristoph Hellwig 	else
1276844c9358SChristoph Hellwig 		ASSERT(list_empty(&bp->b_li_list));
1277844c9358SChristoph Hellwig 	xfs_buf_ioerror(bp, 0);
1278844c9358SChristoph Hellwig 	xfs_buf_relse(bp);
127970796c6bSChristoph Hellwig 	return true;
1280664ffb8aSChristoph Hellwig 
12813cc49884SChristoph Hellwig resubmit:
12823cc49884SChristoph Hellwig 	xfs_buf_ioerror(bp, 0);
128355b7d711SChristoph Hellwig 	bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL);
12843cc49884SChristoph Hellwig 	xfs_buf_submit(bp);
128570796c6bSChristoph Hellwig 	return true;
1286664ffb8aSChristoph Hellwig out_stale:
1287664ffb8aSChristoph Hellwig 	xfs_buf_stale(bp);
1288664ffb8aSChristoph Hellwig 	bp->b_flags |= XBF_DONE;
128955b7d711SChristoph Hellwig 	bp->b_flags &= ~XBF_WRITE;
1290664ffb8aSChristoph Hellwig 	trace_xfs_buf_error_relse(bp, _RET_IP_);
129170796c6bSChristoph Hellwig 	return false;
1292664ffb8aSChristoph Hellwig }
1293c59d87c4SChristoph Hellwig 
129476b2d323SChristoph Hellwig static void
xfs_buf_ioend(struct xfs_buf * bp)1295e8aaba9aSDave Chinner xfs_buf_ioend(
1296e8aaba9aSDave Chinner 	struct xfs_buf	*bp)
1297c59d87c4SChristoph Hellwig {
1298e8aaba9aSDave Chinner 	trace_xfs_buf_iodone(bp, _RET_IP_);
12991813dd64SDave Chinner 
130061be9c52SDave Chinner 	/*
130161be9c52SDave Chinner 	 * Pull in IO completion errors now. We are guaranteed to be running
130261be9c52SDave Chinner 	 * single threaded, so we don't need the lock to read b_io_error.
130361be9c52SDave Chinner 	 */
130461be9c52SDave Chinner 	if (!bp->b_error && bp->b_io_error)
130561be9c52SDave Chinner 		xfs_buf_ioerror(bp, bp->b_io_error);
130661be9c52SDave Chinner 
130755b7d711SChristoph Hellwig 	if (bp->b_flags & XBF_READ) {
1308b01d1461SDave Chinner 		if (!bp->b_error && bp->b_ops)
13091813dd64SDave Chinner 			bp->b_ops->verify_read(bp);
1310b01d1461SDave Chinner 		if (!bp->b_error)
1311b01d1461SDave Chinner 			bp->b_flags |= XBF_DONE;
131223fb5a93SChristoph Hellwig 	} else {
1313b6983e80SBrian Foster 		if (!bp->b_error) {
1314b6983e80SBrian Foster 			bp->b_flags &= ~XBF_WRITE_FAIL;
1315e8aaba9aSDave Chinner 			bp->b_flags |= XBF_DONE;
1316b6983e80SBrian Foster 		}
1317c59d87c4SChristoph Hellwig 
131870796c6bSChristoph Hellwig 		if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp))
1319664ffb8aSChristoph Hellwig 			return;
1320664ffb8aSChristoph Hellwig 
1321664ffb8aSChristoph Hellwig 		/* clear the retry state */
1322664ffb8aSChristoph Hellwig 		bp->b_last_error = 0;
1323664ffb8aSChristoph Hellwig 		bp->b_retries = 0;
1324664ffb8aSChristoph Hellwig 		bp->b_first_retry_time = 0;
1325664ffb8aSChristoph Hellwig 
1326664ffb8aSChristoph Hellwig 		/*
1327664ffb8aSChristoph Hellwig 		 * Note that for things like remote attribute buffers, there may
1328664ffb8aSChristoph Hellwig 		 * not be a buffer log item here, so processing the buffer log
1329664ffb8aSChristoph Hellwig 		 * item must remain optional.
1330664ffb8aSChristoph Hellwig 		 */
1331664ffb8aSChristoph Hellwig 		if (bp->b_log_item)
1332664ffb8aSChristoph Hellwig 			xfs_buf_item_done(bp);
1333664ffb8aSChristoph Hellwig 
133423fb5a93SChristoph Hellwig 		if (bp->b_flags & _XBF_INODES)
1335f593bf14SDave Chinner 			xfs_buf_inode_iodone(bp);
133623fb5a93SChristoph Hellwig 		else if (bp->b_flags & _XBF_DQUOTS)
13370c7e5afbSDave Chinner 			xfs_buf_dquot_iodone(bp);
133822c10589SChristoph Hellwig 
13391813dd64SDave Chinner 	}
13406a7584b1SChristoph Hellwig 
134122c10589SChristoph Hellwig 	bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
134222c10589SChristoph Hellwig 			 _XBF_LOGRECOVERY);
134355b7d711SChristoph Hellwig 
13446a7584b1SChristoph Hellwig 	if (bp->b_flags & XBF_ASYNC)
13456a7584b1SChristoph Hellwig 		xfs_buf_relse(bp);
13466a7584b1SChristoph Hellwig 	else
13476a7584b1SChristoph Hellwig 		complete(&bp->b_iowait);
134823fb5a93SChristoph Hellwig }
1349c59d87c4SChristoph Hellwig 
1350e8aaba9aSDave Chinner static void
xfs_buf_ioend_work(struct work_struct * work)1351e8aaba9aSDave Chinner xfs_buf_ioend_work(
1352e8aaba9aSDave Chinner 	struct work_struct	*work)
1353c59d87c4SChristoph Hellwig {
1354e8aaba9aSDave Chinner 	struct xfs_buf		*bp =
1355e8222613SDave Chinner 		container_of(work, struct xfs_buf, b_ioend_work);
13561813dd64SDave Chinner 
1357e8aaba9aSDave Chinner 	xfs_buf_ioend(bp);
1358c59d87c4SChristoph Hellwig }
1359c59d87c4SChristoph Hellwig 
1360211fe1a4SAlexander Kuleshov static void
xfs_buf_ioend_async(struct xfs_buf * bp)1361e8aaba9aSDave Chinner xfs_buf_ioend_async(
1362e8aaba9aSDave Chinner 	struct xfs_buf	*bp)
1363c59d87c4SChristoph Hellwig {
1364b29c70f5SBrian Foster 	INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
1365dbd329f1SChristoph Hellwig 	queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
1366c59d87c4SChristoph Hellwig }
1367c59d87c4SChristoph Hellwig 
1368c59d87c4SChristoph Hellwig void
__xfs_buf_ioerror(struct xfs_buf * bp,int error,xfs_failaddr_t failaddr)136931ca03c9SDarrick J. Wong __xfs_buf_ioerror(
1370e8222613SDave Chinner 	struct xfs_buf		*bp,
137131ca03c9SDarrick J. Wong 	int			error,
137231ca03c9SDarrick J. Wong 	xfs_failaddr_t		failaddr)
1373c59d87c4SChristoph Hellwig {
13742451337dSDave Chinner 	ASSERT(error <= 0 && error >= -1000);
13752451337dSDave Chinner 	bp->b_error = error;
137631ca03c9SDarrick J. Wong 	trace_xfs_buf_ioerror(bp, error, failaddr);
1377c59d87c4SChristoph Hellwig }
1378c59d87c4SChristoph Hellwig 
1379901796afSChristoph Hellwig void
xfs_buf_ioerror_alert(struct xfs_buf * bp,xfs_failaddr_t func)1380901796afSChristoph Hellwig xfs_buf_ioerror_alert(
1381901796afSChristoph Hellwig 	struct xfs_buf		*bp,
1382cdbcf82bSDarrick J. Wong 	xfs_failaddr_t		func)
1383901796afSChristoph Hellwig {
1384f9bccfccSBrian Foster 	xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error",
1385cdbcf82bSDarrick J. Wong 		"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
138604fcad80SDave Chinner 				  func, (uint64_t)xfs_buf_daddr(bp),
1387f9bccfccSBrian Foster 				  bp->b_length, -bp->b_error);
1388901796afSChristoph Hellwig }
1389901796afSChristoph Hellwig 
139054b3b1f6SBrian Foster /*
139154b3b1f6SBrian Foster  * To simulate an I/O failure, the buffer must be locked and held with at least
139254b3b1f6SBrian Foster  * three references. The LRU reference is dropped by the stale call. The buf
139354b3b1f6SBrian Foster  * item reference is dropped via ioend processing. The third reference is owned
139454b3b1f6SBrian Foster  * by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC.
139554b3b1f6SBrian Foster  */
139654b3b1f6SBrian Foster void
xfs_buf_ioend_fail(struct xfs_buf * bp)139754b3b1f6SBrian Foster xfs_buf_ioend_fail(
139854b3b1f6SBrian Foster 	struct xfs_buf	*bp)
139954b3b1f6SBrian Foster {
140054b3b1f6SBrian Foster 	bp->b_flags &= ~XBF_DONE;
140154b3b1f6SBrian Foster 	xfs_buf_stale(bp);
140254b3b1f6SBrian Foster 	xfs_buf_ioerror(bp, -EIO);
140354b3b1f6SBrian Foster 	xfs_buf_ioend(bp);
1404c59d87c4SChristoph Hellwig }
1405c59d87c4SChristoph Hellwig 
1406a2dcf5dfSChristoph Hellwig int
xfs_bwrite(struct xfs_buf * bp)1407a2dcf5dfSChristoph Hellwig xfs_bwrite(
1408a2dcf5dfSChristoph Hellwig 	struct xfs_buf		*bp)
1409a2dcf5dfSChristoph Hellwig {
1410a2dcf5dfSChristoph Hellwig 	int			error;
1411a2dcf5dfSChristoph Hellwig 
1412a2dcf5dfSChristoph Hellwig 	ASSERT(xfs_buf_islocked(bp));
1413a2dcf5dfSChristoph Hellwig 
1414a2dcf5dfSChristoph Hellwig 	bp->b_flags |= XBF_WRITE;
141527187754SDave Chinner 	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q |
1416b6983e80SBrian Foster 			 XBF_DONE);
1417a2dcf5dfSChristoph Hellwig 
14186af88cdaSBrian Foster 	error = xfs_buf_submit(bp);
1419dbd329f1SChristoph Hellwig 	if (error)
1420dbd329f1SChristoph Hellwig 		xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
1421a2dcf5dfSChristoph Hellwig 	return error;
1422a2dcf5dfSChristoph Hellwig }
1423a2dcf5dfSChristoph Hellwig 
14249bdd9bd6SBrian Foster static void
xfs_buf_bio_end_io(struct bio * bio)1425c59d87c4SChristoph Hellwig xfs_buf_bio_end_io(
14264246a0b6SChristoph Hellwig 	struct bio		*bio)
1427c59d87c4SChristoph Hellwig {
14289bdd9bd6SBrian Foster 	struct xfs_buf		*bp = (struct xfs_buf *)bio->bi_private;
1429c59d87c4SChristoph Hellwig 
14307376d745SBrian Foster 	if (!bio->bi_status &&
14317376d745SBrian Foster 	    (bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
143243dc0aa8SBrian Foster 	    XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
14337376d745SBrian Foster 		bio->bi_status = BLK_STS_IOERR;
1434c59d87c4SChristoph Hellwig 
143537eb17e6SDave Chinner 	/*
143637eb17e6SDave Chinner 	 * don't overwrite existing errors - otherwise we can lose errors on
143737eb17e6SDave Chinner 	 * buffers that require multiple bios to complete.
143837eb17e6SDave Chinner 	 */
14394e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
14404e4cbee9SChristoph Hellwig 		int error = blk_status_to_errno(bio->bi_status);
14414e4cbee9SChristoph Hellwig 
14424e4cbee9SChristoph Hellwig 		cmpxchg(&bp->b_io_error, 0, error);
14434e4cbee9SChristoph Hellwig 	}
1444c59d87c4SChristoph Hellwig 
144537eb17e6SDave Chinner 	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
1446c59d87c4SChristoph Hellwig 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
1447c59d87c4SChristoph Hellwig 
1448e8aaba9aSDave Chinner 	if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1449e8aaba9aSDave Chinner 		xfs_buf_ioend_async(bp);
1450c59d87c4SChristoph Hellwig 	bio_put(bio);
1451c59d87c4SChristoph Hellwig }
1452c59d87c4SChristoph Hellwig 
14533e85c868SDave Chinner static void
xfs_buf_ioapply_map(struct xfs_buf * bp,int map,int * buf_offset,int * count,blk_opf_t op)14543e85c868SDave Chinner xfs_buf_ioapply_map(
14553e85c868SDave Chinner 	struct xfs_buf	*bp,
14563e85c868SDave Chinner 	int		map,
14573e85c868SDave Chinner 	int		*buf_offset,
14583e85c868SDave Chinner 	int		*count,
1459d03025aeSBart Van Assche 	blk_opf_t	op)
1460c59d87c4SChristoph Hellwig {
14613e85c868SDave Chinner 	int		page_index;
14625f7136dbSMatthew Wilcox (Oracle) 	unsigned int	total_nr_pages = bp->b_page_count;
14633e85c868SDave Chinner 	int		nr_pages;
1464c59d87c4SChristoph Hellwig 	struct bio	*bio;
14653e85c868SDave Chinner 	sector_t	sector =  bp->b_maps[map].bm_bn;
14663e85c868SDave Chinner 	int		size;
14673e85c868SDave Chinner 	int		offset;
1468c59d87c4SChristoph Hellwig 
14693e85c868SDave Chinner 	/* skip the pages in the buffer before the start offset */
14703e85c868SDave Chinner 	page_index = 0;
14713e85c868SDave Chinner 	offset = *buf_offset;
14723e85c868SDave Chinner 	while (offset >= PAGE_SIZE) {
14733e85c868SDave Chinner 		page_index++;
14743e85c868SDave Chinner 		offset -= PAGE_SIZE;
1475c59d87c4SChristoph Hellwig 	}
1476c59d87c4SChristoph Hellwig 
14773e85c868SDave Chinner 	/*
14783e85c868SDave Chinner 	 * Limit the IO size to the length of the current vector, and update the
14793e85c868SDave Chinner 	 * remaining IO count for the next time around.
14803e85c868SDave Chinner 	 */
14813e85c868SDave Chinner 	size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
14823e85c868SDave Chinner 	*count -= size;
14833e85c868SDave Chinner 	*buf_offset += size;
1484c59d87c4SChristoph Hellwig 
1485c59d87c4SChristoph Hellwig next_chunk:
1486c59d87c4SChristoph Hellwig 	atomic_inc(&bp->b_io_remaining);
14875f7136dbSMatthew Wilcox (Oracle) 	nr_pages = bio_max_segs(total_nr_pages);
1488c59d87c4SChristoph Hellwig 
148907888c66SChristoph Hellwig 	bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO);
14904f024f37SKent Overstreet 	bio->bi_iter.bi_sector = sector;
1491c59d87c4SChristoph Hellwig 	bio->bi_end_io = xfs_buf_bio_end_io;
1492c59d87c4SChristoph Hellwig 	bio->bi_private = bp;
1493c59d87c4SChristoph Hellwig 
14943e85c868SDave Chinner 	for (; size && nr_pages; nr_pages--, page_index++) {
1495c59d87c4SChristoph Hellwig 		int	rbytes, nbytes = PAGE_SIZE - offset;
1496c59d87c4SChristoph Hellwig 
1497c59d87c4SChristoph Hellwig 		if (nbytes > size)
1498c59d87c4SChristoph Hellwig 			nbytes = size;
1499c59d87c4SChristoph Hellwig 
15003e85c868SDave Chinner 		rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
15013e85c868SDave Chinner 				      offset);
1502c59d87c4SChristoph Hellwig 		if (rbytes < nbytes)
1503c59d87c4SChristoph Hellwig 			break;
1504c59d87c4SChristoph Hellwig 
1505c59d87c4SChristoph Hellwig 		offset = 0;
1506aa0e8833SDave Chinner 		sector += BTOBB(nbytes);
1507c59d87c4SChristoph Hellwig 		size -= nbytes;
1508c59d87c4SChristoph Hellwig 		total_nr_pages--;
1509c59d87c4SChristoph Hellwig 	}
1510c59d87c4SChristoph Hellwig 
15114f024f37SKent Overstreet 	if (likely(bio->bi_iter.bi_size)) {
1512c59d87c4SChristoph Hellwig 		if (xfs_buf_is_vmapped(bp)) {
1513c59d87c4SChristoph Hellwig 			flush_kernel_vmap_range(bp->b_addr,
1514c59d87c4SChristoph Hellwig 						xfs_buf_vmap_len(bp));
1515c59d87c4SChristoph Hellwig 		}
15164e49ea4aSMike Christie 		submit_bio(bio);
1517c59d87c4SChristoph Hellwig 		if (size)
1518c59d87c4SChristoph Hellwig 			goto next_chunk;
1519c59d87c4SChristoph Hellwig 	} else {
152037eb17e6SDave Chinner 		/*
152137eb17e6SDave Chinner 		 * This is guaranteed not to be the last io reference count
1522595bff75SDave Chinner 		 * because the caller (xfs_buf_submit) holds a count itself.
152337eb17e6SDave Chinner 		 */
152437eb17e6SDave Chinner 		atomic_dec(&bp->b_io_remaining);
15252451337dSDave Chinner 		xfs_buf_ioerror(bp, -EIO);
1526c59d87c4SChristoph Hellwig 		bio_put(bio);
1527c59d87c4SChristoph Hellwig 	}
15283e85c868SDave Chinner 
15293e85c868SDave Chinner }
15303e85c868SDave Chinner 
15313e85c868SDave Chinner STATIC void
_xfs_buf_ioapply(struct xfs_buf * bp)15323e85c868SDave Chinner _xfs_buf_ioapply(
15333e85c868SDave Chinner 	struct xfs_buf	*bp)
15343e85c868SDave Chinner {
15353e85c868SDave Chinner 	struct blk_plug	plug;
1536d03025aeSBart Van Assche 	blk_opf_t	op;
15373e85c868SDave Chinner 	int		offset;
15383e85c868SDave Chinner 	int		size;
15393e85c868SDave Chinner 	int		i;
15403e85c868SDave Chinner 
1541c163f9a1SDave Chinner 	/*
1542c163f9a1SDave Chinner 	 * Make sure we capture only current IO errors rather than stale errors
1543c163f9a1SDave Chinner 	 * left over from previous use of the buffer (e.g. failed readahead).
1544c163f9a1SDave Chinner 	 */
1545c163f9a1SDave Chinner 	bp->b_error = 0;
1546c163f9a1SDave Chinner 
15473e85c868SDave Chinner 	if (bp->b_flags & XBF_WRITE) {
154850bfcd0cSMike Christie 		op = REQ_OP_WRITE;
15491813dd64SDave Chinner 
15501813dd64SDave Chinner 		/*
15511813dd64SDave Chinner 		 * Run the write verifier callback function if it exists. If
15521813dd64SDave Chinner 		 * this function fails it will mark the buffer with an error and
15531813dd64SDave Chinner 		 * the IO should not be dispatched.
15541813dd64SDave Chinner 		 */
15551813dd64SDave Chinner 		if (bp->b_ops) {
15561813dd64SDave Chinner 			bp->b_ops->verify_write(bp);
15571813dd64SDave Chinner 			if (bp->b_error) {
1558dbd329f1SChristoph Hellwig 				xfs_force_shutdown(bp->b_mount,
15591813dd64SDave Chinner 						   SHUTDOWN_CORRUPT_INCORE);
15601813dd64SDave Chinner 				return;
15611813dd64SDave Chinner 			}
15624c7f65aeSDave Chinner 		} else if (bp->b_rhash_key != XFS_BUF_DADDR_NULL) {
1563dbd329f1SChristoph Hellwig 			struct xfs_mount *mp = bp->b_mount;
1564400b9d88SDave Chinner 
1565400b9d88SDave Chinner 			/*
1566400b9d88SDave Chinner 			 * non-crc filesystems don't attach verifiers during
1567400b9d88SDave Chinner 			 * log recovery, so don't warn for such filesystems.
1568400b9d88SDave Chinner 			 */
156938c26bfdSDave Chinner 			if (xfs_has_crc(mp)) {
1570400b9d88SDave Chinner 				xfs_warn(mp,
1571c219b015SDarrick J. Wong 					"%s: no buf ops on daddr 0x%llx len %d",
15724c7f65aeSDave Chinner 					__func__, xfs_buf_daddr(bp),
15734c7f65aeSDave Chinner 					bp->b_length);
15749c712a13SDarrick J. Wong 				xfs_hex_dump(bp->b_addr,
15759c712a13SDarrick J. Wong 						XFS_CORRUPTION_DUMP_LEN);
1576400b9d88SDave Chinner 				dump_stack();
1577400b9d88SDave Chinner 			}
15781813dd64SDave Chinner 		}
15793e85c868SDave Chinner 	} else {
158050bfcd0cSMike Christie 		op = REQ_OP_READ;
15812123ef85SChristoph Hellwig 		if (bp->b_flags & XBF_READ_AHEAD)
15822123ef85SChristoph Hellwig 			op |= REQ_RAHEAD;
15833e85c868SDave Chinner 	}
15843e85c868SDave Chinner 
15853e85c868SDave Chinner 	/* we only use the buffer cache for meta-data */
15862123ef85SChristoph Hellwig 	op |= REQ_META;
15873e85c868SDave Chinner 
15883e85c868SDave Chinner 	/*
15893e85c868SDave Chinner 	 * Walk all the vectors issuing IO on them. Set up the initial offset
15903e85c868SDave Chinner 	 * into the buffer and the desired IO size before we start -
15913e85c868SDave Chinner 	 * _xfs_buf_ioapply_vec() will modify them appropriately for each
15923e85c868SDave Chinner 	 * subsequent call.
15933e85c868SDave Chinner 	 */
15943e85c868SDave Chinner 	offset = bp->b_offset;
15958124b9b6SChristoph Hellwig 	size = BBTOB(bp->b_length);
15963e85c868SDave Chinner 	blk_start_plug(&plug);
15973e85c868SDave Chinner 	for (i = 0; i < bp->b_map_count; i++) {
15982123ef85SChristoph Hellwig 		xfs_buf_ioapply_map(bp, i, &offset, &size, op);
15993e85c868SDave Chinner 		if (bp->b_error)
16003e85c868SDave Chinner 			break;
16013e85c868SDave Chinner 		if (size <= 0)
16023e85c868SDave Chinner 			break;	/* all done */
16033e85c868SDave Chinner 	}
16043e85c868SDave Chinner 	blk_finish_plug(&plug);
1605c59d87c4SChristoph Hellwig }
1606c59d87c4SChristoph Hellwig 
1607595bff75SDave Chinner /*
1608bb00b6f1SBrian Foster  * Wait for I/O completion of a sync buffer and return the I/O error code.
1609595bff75SDave Chinner  */
1610eaebb515SBrian Foster static int
xfs_buf_iowait(struct xfs_buf * bp)1611bb00b6f1SBrian Foster xfs_buf_iowait(
1612595bff75SDave Chinner 	struct xfs_buf	*bp)
1613c59d87c4SChristoph Hellwig {
1614bb00b6f1SBrian Foster 	ASSERT(!(bp->b_flags & XBF_ASYNC));
1615bb00b6f1SBrian Foster 
1616bb00b6f1SBrian Foster 	trace_xfs_buf_iowait(bp, _RET_IP_);
1617bb00b6f1SBrian Foster 	wait_for_completion(&bp->b_iowait);
1618bb00b6f1SBrian Foster 	trace_xfs_buf_iowait_done(bp, _RET_IP_);
1619bb00b6f1SBrian Foster 
1620bb00b6f1SBrian Foster 	return bp->b_error;
1621bb00b6f1SBrian Foster }
1622bb00b6f1SBrian Foster 
1623bb00b6f1SBrian Foster /*
1624bb00b6f1SBrian Foster  * Buffer I/O submission path, read or write. Asynchronous submission transfers
1625bb00b6f1SBrian Foster  * the buffer lock ownership and the current reference to the IO. It is not
1626bb00b6f1SBrian Foster  * safe to reference the buffer after a call to this function unless the caller
1627bb00b6f1SBrian Foster  * holds an additional reference itself.
1628bb00b6f1SBrian Foster  */
162926e32875SChristoph Hellwig static int
__xfs_buf_submit(struct xfs_buf * bp,bool wait)1630bb00b6f1SBrian Foster __xfs_buf_submit(
1631bb00b6f1SBrian Foster 	struct xfs_buf	*bp,
1632bb00b6f1SBrian Foster 	bool		wait)
1633bb00b6f1SBrian Foster {
1634bb00b6f1SBrian Foster 	int		error = 0;
1635bb00b6f1SBrian Foster 
1636595bff75SDave Chinner 	trace_xfs_buf_submit(bp, _RET_IP_);
1637c59d87c4SChristoph Hellwig 
163843ff2122SChristoph Hellwig 	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1639595bff75SDave Chinner 
164001728b44SDave Chinner 	/*
164101728b44SDave Chinner 	 * On log shutdown we stale and complete the buffer immediately. We can
164201728b44SDave Chinner 	 * be called to read the superblock before the log has been set up, so
164301728b44SDave Chinner 	 * be careful checking the log state.
164401728b44SDave Chinner 	 *
164501728b44SDave Chinner 	 * Checking the mount shutdown state here can result in the log tail
164601728b44SDave Chinner 	 * moving inappropriately on disk as the log may not yet be shut down.
164701728b44SDave Chinner 	 * i.e. failing this buffer on mount shutdown can remove it from the AIL
164801728b44SDave Chinner 	 * and move the tail of the log forwards without having written this
164901728b44SDave Chinner 	 * buffer to disk. This corrupts the log tail state in memory, and
165001728b44SDave Chinner 	 * because the log may not be shut down yet, it can then be propagated
165101728b44SDave Chinner 	 * to disk before the log is shutdown. Hence we check log shutdown
165201728b44SDave Chinner 	 * state here rather than mount state to avoid corrupting the log tail
165301728b44SDave Chinner 	 * on shutdown.
165401728b44SDave Chinner 	 */
165501728b44SDave Chinner 	if (bp->b_mount->m_log &&
165601728b44SDave Chinner 	    xlog_is_shutdown(bp->b_mount->m_log)) {
165754b3b1f6SBrian Foster 		xfs_buf_ioend_fail(bp);
1658eaebb515SBrian Foster 		return -EIO;
1659595bff75SDave Chinner 	}
1660c59d87c4SChristoph Hellwig 
1661bb00b6f1SBrian Foster 	/*
1662bb00b6f1SBrian Foster 	 * Grab a reference so the buffer does not go away underneath us. For
1663bb00b6f1SBrian Foster 	 * async buffers, I/O completion drops the callers reference, which
1664bb00b6f1SBrian Foster 	 * could occur before submission returns.
1665bb00b6f1SBrian Foster 	 */
1666bb00b6f1SBrian Foster 	xfs_buf_hold(bp);
1667bb00b6f1SBrian Foster 
1668375ec69dSChristoph Hellwig 	if (bp->b_flags & XBF_WRITE)
1669c59d87c4SChristoph Hellwig 		xfs_buf_wait_unpin(bp);
1670c59d87c4SChristoph Hellwig 
167161be9c52SDave Chinner 	/* clear the internal error state to avoid spurious errors */
167261be9c52SDave Chinner 	bp->b_io_error = 0;
167361be9c52SDave Chinner 
16748d6c1210SEric Sandeen 	/*
1675eaebb515SBrian Foster 	 * Set the count to 1 initially, this will stop an I/O completion
1676eaebb515SBrian Foster 	 * callout which happens before we have started all the I/O from calling
1677eaebb515SBrian Foster 	 * xfs_buf_ioend too early.
1678eaebb515SBrian Foster 	 */
1679eaebb515SBrian Foster 	atomic_set(&bp->b_io_remaining, 1);
1680eaebb515SBrian Foster 	if (bp->b_flags & XBF_ASYNC)
1681eaebb515SBrian Foster 		xfs_buf_ioacct_inc(bp);
1682eaebb515SBrian Foster 	_xfs_buf_ioapply(bp);
1683eaebb515SBrian Foster 
1684eaebb515SBrian Foster 	/*
1685eaebb515SBrian Foster 	 * If _xfs_buf_ioapply failed, we can get back here with only the IO
1686eaebb515SBrian Foster 	 * reference we took above. If we drop it to zero, run completion so
1687eaebb515SBrian Foster 	 * that we don't return to the caller with completion still pending.
1688eaebb515SBrian Foster 	 */
1689eaebb515SBrian Foster 	if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1690eaebb515SBrian Foster 		if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
1691eaebb515SBrian Foster 			xfs_buf_ioend(bp);
1692eaebb515SBrian Foster 		else
1693eaebb515SBrian Foster 			xfs_buf_ioend_async(bp);
1694eaebb515SBrian Foster 	}
1695eaebb515SBrian Foster 
16966af88cdaSBrian Foster 	if (wait)
16976af88cdaSBrian Foster 		error = xfs_buf_iowait(bp);
1698bb00b6f1SBrian Foster 
1699595bff75SDave Chinner 	/*
17006af88cdaSBrian Foster 	 * Release the hold that keeps the buffer referenced for the entire
17016af88cdaSBrian Foster 	 * I/O. Note that if the buffer is async, it is not safe to reference
17026af88cdaSBrian Foster 	 * after this release.
1703595bff75SDave Chinner 	 */
1704595bff75SDave Chinner 	xfs_buf_rele(bp);
1705595bff75SDave Chinner 	return error;
1706c59d87c4SChristoph Hellwig }
1707c59d87c4SChristoph Hellwig 
170888ee2df7SChristoph Hellwig void *
xfs_buf_offset(struct xfs_buf * bp,size_t offset)1709c59d87c4SChristoph Hellwig xfs_buf_offset(
171088ee2df7SChristoph Hellwig 	struct xfs_buf		*bp,
1711c59d87c4SChristoph Hellwig 	size_t			offset)
1712c59d87c4SChristoph Hellwig {
1713c59d87c4SChristoph Hellwig 	struct page		*page;
1714c59d87c4SChristoph Hellwig 
1715611c9946SDave Chinner 	if (bp->b_addr)
1716c59d87c4SChristoph Hellwig 		return bp->b_addr + offset;
1717c59d87c4SChristoph Hellwig 
1718c59d87c4SChristoph Hellwig 	page = bp->b_pages[offset >> PAGE_SHIFT];
171988ee2df7SChristoph Hellwig 	return page_address(page) + (offset & (PAGE_SIZE-1));
1720c59d87c4SChristoph Hellwig }
1721c59d87c4SChristoph Hellwig 
1722c59d87c4SChristoph Hellwig void
xfs_buf_zero(struct xfs_buf * bp,size_t boff,size_t bsize)1723f9a196eeSChristoph Hellwig xfs_buf_zero(
1724f9a196eeSChristoph Hellwig 	struct xfs_buf		*bp,
1725f9a196eeSChristoph Hellwig 	size_t			boff,
1726f9a196eeSChristoph Hellwig 	size_t			bsize)
1727c59d87c4SChristoph Hellwig {
1728795cac72SDave Chinner 	size_t			bend;
1729c59d87c4SChristoph Hellwig 
1730c59d87c4SChristoph Hellwig 	bend = boff + bsize;
1731c59d87c4SChristoph Hellwig 	while (boff < bend) {
1732795cac72SDave Chinner 		struct page	*page;
1733795cac72SDave Chinner 		int		page_index, page_offset, csize;
1734c59d87c4SChristoph Hellwig 
1735795cac72SDave Chinner 		page_index = (boff + bp->b_offset) >> PAGE_SHIFT;
1736795cac72SDave Chinner 		page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
1737795cac72SDave Chinner 		page = bp->b_pages[page_index];
1738795cac72SDave Chinner 		csize = min_t(size_t, PAGE_SIZE - page_offset,
17398124b9b6SChristoph Hellwig 				      BBTOB(bp->b_length) - boff);
1740795cac72SDave Chinner 
1741795cac72SDave Chinner 		ASSERT((csize + page_offset) <= PAGE_SIZE);
1742c59d87c4SChristoph Hellwig 
1743795cac72SDave Chinner 		memset(page_address(page) + page_offset, 0, csize);
1744c59d87c4SChristoph Hellwig 
1745c59d87c4SChristoph Hellwig 		boff += csize;
1746c59d87c4SChristoph Hellwig 	}
1747c59d87c4SChristoph Hellwig }
1748c59d87c4SChristoph Hellwig 
1749c59d87c4SChristoph Hellwig /*
17508d57c216SDarrick J. Wong  * Log a message about and stale a buffer that a caller has decided is corrupt.
17518d57c216SDarrick J. Wong  *
17528d57c216SDarrick J. Wong  * This function should be called for the kinds of metadata corruption that
17538d57c216SDarrick J. Wong  * cannot be detect from a verifier, such as incorrect inter-block relationship
17548d57c216SDarrick J. Wong  * data.  Do /not/ call this function from a verifier function.
17558d57c216SDarrick J. Wong  *
17568d57c216SDarrick J. Wong  * The buffer must be XBF_DONE prior to the call.  Afterwards, the buffer will
17578d57c216SDarrick J. Wong  * be marked stale, but b_error will not be set.  The caller is responsible for
17588d57c216SDarrick J. Wong  * releasing the buffer or fixing it.
17598d57c216SDarrick J. Wong  */
17608d57c216SDarrick J. Wong void
__xfs_buf_mark_corrupt(struct xfs_buf * bp,xfs_failaddr_t fa)17618d57c216SDarrick J. Wong __xfs_buf_mark_corrupt(
17628d57c216SDarrick J. Wong 	struct xfs_buf		*bp,
17638d57c216SDarrick J. Wong 	xfs_failaddr_t		fa)
17648d57c216SDarrick J. Wong {
17658d57c216SDarrick J. Wong 	ASSERT(bp->b_flags & XBF_DONE);
17668d57c216SDarrick J. Wong 
1767e83cf875SDarrick J. Wong 	xfs_buf_corruption_error(bp, fa);
17688d57c216SDarrick J. Wong 	xfs_buf_stale(bp);
17698d57c216SDarrick J. Wong }
17708d57c216SDarrick J. Wong 
17718d57c216SDarrick J. Wong /*
1772c59d87c4SChristoph Hellwig  *	Handling of buffer targets (buftargs).
1773c59d87c4SChristoph Hellwig  */
1774c59d87c4SChristoph Hellwig 
1775c59d87c4SChristoph Hellwig /*
1776c59d87c4SChristoph Hellwig  * Wait for any bufs with callbacks that have been submitted but have not yet
1777c59d87c4SChristoph Hellwig  * returned. These buffers will have an elevated hold count, so wait on those
1778c59d87c4SChristoph Hellwig  * while freeing all the buffers only held by the LRU.
1779c59d87c4SChristoph Hellwig  */
1780e80dfa19SDave Chinner static enum lru_status
xfs_buftarg_drain_rele(struct list_head * item,struct list_lru_one * lru,spinlock_t * lru_lock,void * arg)178110fb9ac1SBrian Foster xfs_buftarg_drain_rele(
1782e80dfa19SDave Chinner 	struct list_head	*item,
17833f97b163SVladimir Davydov 	struct list_lru_one	*lru,
1784e80dfa19SDave Chinner 	spinlock_t		*lru_lock,
1785e80dfa19SDave Chinner 	void			*arg)
1786c59d87c4SChristoph Hellwig 
1787e80dfa19SDave Chinner {
1788e80dfa19SDave Chinner 	struct xfs_buf		*bp = container_of(item, struct xfs_buf, b_lru);
1789a4082357SDave Chinner 	struct list_head	*dispose = arg;
1790e80dfa19SDave Chinner 
1791c59d87c4SChristoph Hellwig 	if (atomic_read(&bp->b_hold) > 1) {
1792a4082357SDave Chinner 		/* need to wait, so skip it this pass */
179310fb9ac1SBrian Foster 		trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
1794a4082357SDave Chinner 		return LRU_SKIP;
1795a4082357SDave Chinner 	}
1796a4082357SDave Chinner 	if (!spin_trylock(&bp->b_lock))
1797a4082357SDave Chinner 		return LRU_SKIP;
1798a4082357SDave Chinner 
1799c59d87c4SChristoph Hellwig 	/*
180090802ed9SPaul Bolle 	 * clear the LRU reference count so the buffer doesn't get
1801c59d87c4SChristoph Hellwig 	 * ignored in xfs_buf_rele().
1802c59d87c4SChristoph Hellwig 	 */
1803c59d87c4SChristoph Hellwig 	atomic_set(&bp->b_lru_ref, 0);
1804a4082357SDave Chinner 	bp->b_state |= XFS_BSTATE_DISPOSE;
18053f97b163SVladimir Davydov 	list_lru_isolate_move(lru, item, dispose);
1806a4082357SDave Chinner 	spin_unlock(&bp->b_lock);
1807a4082357SDave Chinner 	return LRU_REMOVED;
1808e80dfa19SDave Chinner }
1809e80dfa19SDave Chinner 
18108321ddb2SBrian Foster /*
18118321ddb2SBrian Foster  * Wait for outstanding I/O on the buftarg to complete.
18128321ddb2SBrian Foster  */
1813e80dfa19SDave Chinner void
xfs_buftarg_wait(struct xfs_buftarg * btp)18148321ddb2SBrian Foster xfs_buftarg_wait(
1815e80dfa19SDave Chinner 	struct xfs_buftarg	*btp)
1816c59d87c4SChristoph Hellwig {
181785bec546SDave Chinner 	/*
18189c7504aaSBrian Foster 	 * First wait on the buftarg I/O count for all in-flight buffers to be
18199c7504aaSBrian Foster 	 * released. This is critical as new buffers do not make the LRU until
18209c7504aaSBrian Foster 	 * they are released.
18219c7504aaSBrian Foster 	 *
18229c7504aaSBrian Foster 	 * Next, flush the buffer workqueue to ensure all completion processing
18239c7504aaSBrian Foster 	 * has finished. Just waiting on buffer locks is not sufficient for
18249c7504aaSBrian Foster 	 * async IO as the reference count held over IO is not released until
18259c7504aaSBrian Foster 	 * after the buffer lock is dropped. Hence we need to ensure here that
18269c7504aaSBrian Foster 	 * all reference counts have been dropped before we start walking the
18279c7504aaSBrian Foster 	 * LRU list.
182885bec546SDave Chinner 	 */
18299c7504aaSBrian Foster 	while (percpu_counter_sum(&btp->bt_io_count))
18309c7504aaSBrian Foster 		delay(100);
1831800b2694SBrian Foster 	flush_workqueue(btp->bt_mount->m_buf_workqueue);
18328321ddb2SBrian Foster }
18338321ddb2SBrian Foster 
18348321ddb2SBrian Foster void
xfs_buftarg_drain(struct xfs_buftarg * btp)18358321ddb2SBrian Foster xfs_buftarg_drain(
18368321ddb2SBrian Foster 	struct xfs_buftarg	*btp)
18378321ddb2SBrian Foster {
18388321ddb2SBrian Foster 	LIST_HEAD(dispose);
18398321ddb2SBrian Foster 	int			loop = 0;
18408321ddb2SBrian Foster 	bool			write_fail = false;
18418321ddb2SBrian Foster 
18428321ddb2SBrian Foster 	xfs_buftarg_wait(btp);
184385bec546SDave Chinner 
1844a4082357SDave Chinner 	/* loop until there is nothing left on the lru list. */
1845a4082357SDave Chinner 	while (list_lru_count(&btp->bt_lru)) {
184610fb9ac1SBrian Foster 		list_lru_walk(&btp->bt_lru, xfs_buftarg_drain_rele,
1847a4082357SDave Chinner 			      &dispose, LONG_MAX);
1848a4082357SDave Chinner 
1849a4082357SDave Chinner 		while (!list_empty(&dispose)) {
1850a4082357SDave Chinner 			struct xfs_buf *bp;
1851a4082357SDave Chinner 			bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1852a4082357SDave Chinner 			list_del_init(&bp->b_lru);
1853ac8809f9SDave Chinner 			if (bp->b_flags & XBF_WRITE_FAIL) {
185461948b6fSBrian Foster 				write_fail = true;
185561948b6fSBrian Foster 				xfs_buf_alert_ratelimited(bp,
185661948b6fSBrian Foster 					"XFS: Corruption Alert",
1857c219b015SDarrick J. Wong "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
18584c7f65aeSDave Chinner 					(long long)xfs_buf_daddr(bp));
1859ac8809f9SDave Chinner 			}
1860a4082357SDave Chinner 			xfs_buf_rele(bp);
1861a4082357SDave Chinner 		}
1862a4082357SDave Chinner 		if (loop++ != 0)
1863a4082357SDave Chinner 			delay(100);
1864a4082357SDave Chinner 	}
186561948b6fSBrian Foster 
186661948b6fSBrian Foster 	/*
186761948b6fSBrian Foster 	 * If one or more failed buffers were freed, that means dirty metadata
186861948b6fSBrian Foster 	 * was thrown away. This should only ever happen after I/O completion
186961948b6fSBrian Foster 	 * handling has elevated I/O error(s) to permanent failures and shuts
187001728b44SDave Chinner 	 * down the journal.
187161948b6fSBrian Foster 	 */
187261948b6fSBrian Foster 	if (write_fail) {
187301728b44SDave Chinner 		ASSERT(xlog_is_shutdown(btp->bt_mount->m_log));
187461948b6fSBrian Foster 		xfs_alert(btp->bt_mount,
187561948b6fSBrian Foster 	      "Please run xfs_repair to determine the extent of the problem.");
187661948b6fSBrian Foster 	}
1877e80dfa19SDave Chinner }
1878c59d87c4SChristoph Hellwig 
1879e80dfa19SDave Chinner static enum lru_status
xfs_buftarg_isolate(struct list_head * item,struct list_lru_one * lru,spinlock_t * lru_lock,void * arg)1880e80dfa19SDave Chinner xfs_buftarg_isolate(
1881e80dfa19SDave Chinner 	struct list_head	*item,
18823f97b163SVladimir Davydov 	struct list_lru_one	*lru,
1883e80dfa19SDave Chinner 	spinlock_t		*lru_lock,
1884e80dfa19SDave Chinner 	void			*arg)
1885e80dfa19SDave Chinner {
1886e80dfa19SDave Chinner 	struct xfs_buf		*bp = container_of(item, struct xfs_buf, b_lru);
1887e80dfa19SDave Chinner 	struct list_head	*dispose = arg;
1888c59d87c4SChristoph Hellwig 
1889c59d87c4SChristoph Hellwig 	/*
1890a4082357SDave Chinner 	 * we are inverting the lru lock/bp->b_lock here, so use a trylock.
1891a4082357SDave Chinner 	 * If we fail to get the lock, just skip it.
1892a4082357SDave Chinner 	 */
1893a4082357SDave Chinner 	if (!spin_trylock(&bp->b_lock))
1894a4082357SDave Chinner 		return LRU_SKIP;
1895a4082357SDave Chinner 	/*
1896c59d87c4SChristoph Hellwig 	 * Decrement the b_lru_ref count unless the value is already
1897c59d87c4SChristoph Hellwig 	 * zero. If the value is already zero, we need to reclaim the
1898c59d87c4SChristoph Hellwig 	 * buffer, otherwise it gets another trip through the LRU.
1899c59d87c4SChristoph Hellwig 	 */
190019957a18SVratislav Bendel 	if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
1901a4082357SDave Chinner 		spin_unlock(&bp->b_lock);
1902e80dfa19SDave Chinner 		return LRU_ROTATE;
1903a4082357SDave Chinner 	}
1904e80dfa19SDave Chinner 
1905a4082357SDave Chinner 	bp->b_state |= XFS_BSTATE_DISPOSE;
19063f97b163SVladimir Davydov 	list_lru_isolate_move(lru, item, dispose);
1907a4082357SDave Chinner 	spin_unlock(&bp->b_lock);
1908e80dfa19SDave Chinner 	return LRU_REMOVED;
1909c59d87c4SChristoph Hellwig }
1910c59d87c4SChristoph Hellwig 
1911addbda40SAndrew Morton static unsigned long
xfs_buftarg_shrink_scan(struct shrinker * shrink,struct shrink_control * sc)1912e80dfa19SDave Chinner xfs_buftarg_shrink_scan(
1913e80dfa19SDave Chinner 	struct shrinker		*shrink,
1914e80dfa19SDave Chinner 	struct shrink_control	*sc)
1915e80dfa19SDave Chinner {
1916e80dfa19SDave Chinner 	struct xfs_buftarg	*btp = container_of(shrink,
1917e80dfa19SDave Chinner 					struct xfs_buftarg, bt_shrinker);
1918e80dfa19SDave Chinner 	LIST_HEAD(dispose);
1919addbda40SAndrew Morton 	unsigned long		freed;
1920e80dfa19SDave Chinner 
1921503c358cSVladimir Davydov 	freed = list_lru_shrink_walk(&btp->bt_lru, sc,
1922503c358cSVladimir Davydov 				     xfs_buftarg_isolate, &dispose);
1923c59d87c4SChristoph Hellwig 
1924c59d87c4SChristoph Hellwig 	while (!list_empty(&dispose)) {
1925e80dfa19SDave Chinner 		struct xfs_buf *bp;
1926c59d87c4SChristoph Hellwig 		bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
1927c59d87c4SChristoph Hellwig 		list_del_init(&bp->b_lru);
1928c59d87c4SChristoph Hellwig 		xfs_buf_rele(bp);
1929c59d87c4SChristoph Hellwig 	}
1930c59d87c4SChristoph Hellwig 
1931e80dfa19SDave Chinner 	return freed;
1932e80dfa19SDave Chinner }
1933e80dfa19SDave Chinner 
1934addbda40SAndrew Morton static unsigned long
xfs_buftarg_shrink_count(struct shrinker * shrink,struct shrink_control * sc)1935e80dfa19SDave Chinner xfs_buftarg_shrink_count(
1936e80dfa19SDave Chinner 	struct shrinker		*shrink,
1937e80dfa19SDave Chinner 	struct shrink_control	*sc)
1938e80dfa19SDave Chinner {
1939e80dfa19SDave Chinner 	struct xfs_buftarg	*btp = container_of(shrink,
1940e80dfa19SDave Chinner 					struct xfs_buftarg, bt_shrinker);
1941503c358cSVladimir Davydov 	return list_lru_shrink_count(&btp->bt_lru, sc);
1942c59d87c4SChristoph Hellwig }
1943c59d87c4SChristoph Hellwig 
1944c59d87c4SChristoph Hellwig void
xfs_free_buftarg(struct xfs_buftarg * btp)1945c59d87c4SChristoph Hellwig xfs_free_buftarg(
1946c59d87c4SChristoph Hellwig 	struct xfs_buftarg	*btp)
1947c59d87c4SChristoph Hellwig {
194841233576SChristoph Hellwig 	struct block_device	*bdev = btp->bt_bdev;
194941233576SChristoph Hellwig 
1950c59d87c4SChristoph Hellwig 	unregister_shrinker(&btp->bt_shrinker);
19519c7504aaSBrian Foster 	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
19529c7504aaSBrian Foster 	percpu_counter_destroy(&btp->bt_io_count);
1953f5e1dd34SGlauber Costa 	list_lru_destroy(&btp->bt_lru);
1954c59d87c4SChristoph Hellwig 
19556f643c57SShiyang Ruan 	fs_put_dax(btp->bt_daxdev, btp->bt_mount);
195641233576SChristoph Hellwig 	/* the main block device is closed by kill_block_super */
195741233576SChristoph Hellwig 	if (bdev != btp->bt_mount->m_super->s_bdev)
19582ea6f689SChristoph Hellwig 		blkdev_put(bdev, btp->bt_mount->m_super);
1959c59d87c4SChristoph Hellwig 
1960c59d87c4SChristoph Hellwig 	kmem_free(btp);
1961c59d87c4SChristoph Hellwig }
1962c59d87c4SChristoph Hellwig 
19633fefdeeeSEric Sandeen int
xfs_setsize_buftarg(xfs_buftarg_t * btp,unsigned int sectorsize)19643fefdeeeSEric Sandeen xfs_setsize_buftarg(
1965c59d87c4SChristoph Hellwig 	xfs_buftarg_t		*btp,
19663fefdeeeSEric Sandeen 	unsigned int		sectorsize)
1967c59d87c4SChristoph Hellwig {
19687c71ee78SEric Sandeen 	/* Set up metadata sector size info */
19696da54179SEric Sandeen 	btp->bt_meta_sectorsize = sectorsize;
19706da54179SEric Sandeen 	btp->bt_meta_sectormask = sectorsize - 1;
1971c59d87c4SChristoph Hellwig 
1972c59d87c4SChristoph Hellwig 	if (set_blocksize(btp->bt_bdev, sectorsize)) {
1973c59d87c4SChristoph Hellwig 		xfs_warn(btp->bt_mount,
1974a1c6f057SDmitry Monakhov 			"Cannot set_blocksize to %u on device %pg",
1975a1c6f057SDmitry Monakhov 			sectorsize, btp->bt_bdev);
19762451337dSDave Chinner 		return -EINVAL;
1977c59d87c4SChristoph Hellwig 	}
1978c59d87c4SChristoph Hellwig 
19797c71ee78SEric Sandeen 	/* Set up device logical sector size mask */
19807c71ee78SEric Sandeen 	btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
19817c71ee78SEric Sandeen 	btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
19827c71ee78SEric Sandeen 
1983c59d87c4SChristoph Hellwig 	return 0;
1984c59d87c4SChristoph Hellwig }
1985c59d87c4SChristoph Hellwig 
1986c59d87c4SChristoph Hellwig /*
1987c59d87c4SChristoph Hellwig  * When allocating the initial buffer target we have not yet
1988c59d87c4SChristoph Hellwig  * read in the superblock, so don't know what sized sectors
19898b4ad79cSZhi Yong Wu  * are being used at this early stage.  Play safe.
1990c59d87c4SChristoph Hellwig  */
1991c59d87c4SChristoph Hellwig STATIC int
xfs_setsize_buftarg_early(xfs_buftarg_t * btp,struct block_device * bdev)1992c59d87c4SChristoph Hellwig xfs_setsize_buftarg_early(
1993c59d87c4SChristoph Hellwig 	xfs_buftarg_t		*btp,
1994c59d87c4SChristoph Hellwig 	struct block_device	*bdev)
1995c59d87c4SChristoph Hellwig {
1996a96c4151SEric Sandeen 	return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));
1997c59d87c4SChristoph Hellwig }
1998c59d87c4SChristoph Hellwig 
19995b5abbefSChristoph Hellwig struct xfs_buftarg *
xfs_alloc_buftarg(struct xfs_mount * mp,struct block_device * bdev)2000c59d87c4SChristoph Hellwig xfs_alloc_buftarg(
2001c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp,
20025b5abbefSChristoph Hellwig 	struct block_device	*bdev)
2003c59d87c4SChristoph Hellwig {
2004c59d87c4SChristoph Hellwig 	xfs_buftarg_t		*btp;
20056f643c57SShiyang Ruan 	const struct dax_holder_operations *ops = NULL;
2006c59d87c4SChristoph Hellwig 
20076f643c57SShiyang Ruan #if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
20086f643c57SShiyang Ruan 	ops = &xfs_dax_holder_operations;
20096f643c57SShiyang Ruan #endif
2010707e0ddaSTetsuo Handa 	btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
2011c59d87c4SChristoph Hellwig 
2012c59d87c4SChristoph Hellwig 	btp->bt_mount = mp;
2013c59d87c4SChristoph Hellwig 	btp->bt_dev =  bdev->bd_dev;
2014c59d87c4SChristoph Hellwig 	btp->bt_bdev = bdev;
20156f643c57SShiyang Ruan 	btp->bt_daxdev = fs_dax_get_by_bdev(bdev, &btp->bt_dax_part_off,
20166f643c57SShiyang Ruan 					    mp, ops);
2017c59d87c4SChristoph Hellwig 
2018f9bccfccSBrian Foster 	/*
2019f9bccfccSBrian Foster 	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
2020f9bccfccSBrian Foster 	 * per 30 seconds so as to not spam logs too much on repeated errors.
2021f9bccfccSBrian Foster 	 */
2022f9bccfccSBrian Foster 	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
2023f9bccfccSBrian Foster 			     DEFAULT_RATELIMIT_BURST);
2024f9bccfccSBrian Foster 
2025c59d87c4SChristoph Hellwig 	if (xfs_setsize_buftarg_early(btp, bdev))
2026d210a987SMichal Hocko 		goto error_free;
20275ca302c8SGlauber Costa 
20285ca302c8SGlauber Costa 	if (list_lru_init(&btp->bt_lru))
2029d210a987SMichal Hocko 		goto error_free;
20305ca302c8SGlauber Costa 
20319c7504aaSBrian Foster 	if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
2032d210a987SMichal Hocko 		goto error_lru;
20339c7504aaSBrian Foster 
2034e80dfa19SDave Chinner 	btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
2035e80dfa19SDave Chinner 	btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
2036c59d87c4SChristoph Hellwig 	btp->bt_shrinker.seeks = DEFAULT_SEEKS;
2037e80dfa19SDave Chinner 	btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
2038e33c267aSRoman Gushchin 	if (register_shrinker(&btp->bt_shrinker, "xfs-buf:%s",
2039e33c267aSRoman Gushchin 			      mp->m_super->s_id))
2040d210a987SMichal Hocko 		goto error_pcpu;
2041c59d87c4SChristoph Hellwig 	return btp;
2042c59d87c4SChristoph Hellwig 
2043d210a987SMichal Hocko error_pcpu:
2044d210a987SMichal Hocko 	percpu_counter_destroy(&btp->bt_io_count);
2045d210a987SMichal Hocko error_lru:
2046d210a987SMichal Hocko 	list_lru_destroy(&btp->bt_lru);
2047d210a987SMichal Hocko error_free:
2048c59d87c4SChristoph Hellwig 	kmem_free(btp);
2049c59d87c4SChristoph Hellwig 	return NULL;
2050c59d87c4SChristoph Hellwig }
2051c59d87c4SChristoph Hellwig 
20521a48327cSDarrick J. Wong static inline void
xfs_buf_list_del(struct xfs_buf * bp)20531a48327cSDarrick J. Wong xfs_buf_list_del(
20541a48327cSDarrick J. Wong 	struct xfs_buf		*bp)
20551a48327cSDarrick J. Wong {
20561a48327cSDarrick J. Wong 	list_del_init(&bp->b_list);
20571a48327cSDarrick J. Wong 	wake_up_var(&bp->b_list);
20581a48327cSDarrick J. Wong }
20591a48327cSDarrick J. Wong 
206043ff2122SChristoph Hellwig /*
206120e8a063SBrian Foster  * Cancel a delayed write list.
206220e8a063SBrian Foster  *
206320e8a063SBrian Foster  * Remove each buffer from the list, clear the delwri queue flag and drop the
206420e8a063SBrian Foster  * associated buffer reference.
206520e8a063SBrian Foster  */
206620e8a063SBrian Foster void
xfs_buf_delwri_cancel(struct list_head * list)206720e8a063SBrian Foster xfs_buf_delwri_cancel(
206820e8a063SBrian Foster 	struct list_head	*list)
206920e8a063SBrian Foster {
207020e8a063SBrian Foster 	struct xfs_buf		*bp;
207120e8a063SBrian Foster 
207220e8a063SBrian Foster 	while (!list_empty(list)) {
207320e8a063SBrian Foster 		bp = list_first_entry(list, struct xfs_buf, b_list);
207420e8a063SBrian Foster 
207520e8a063SBrian Foster 		xfs_buf_lock(bp);
207620e8a063SBrian Foster 		bp->b_flags &= ~_XBF_DELWRI_Q;
20771a48327cSDarrick J. Wong 		xfs_buf_list_del(bp);
207820e8a063SBrian Foster 		xfs_buf_relse(bp);
207920e8a063SBrian Foster 	}
208020e8a063SBrian Foster }
208120e8a063SBrian Foster 
208220e8a063SBrian Foster /*
208343ff2122SChristoph Hellwig  * Add a buffer to the delayed write list.
208443ff2122SChristoph Hellwig  *
208543ff2122SChristoph Hellwig  * This queues a buffer for writeout if it hasn't already been.  Note that
208643ff2122SChristoph Hellwig  * neither this routine nor the buffer list submission functions perform
208743ff2122SChristoph Hellwig  * any internal synchronization.  It is expected that the lists are thread-local
208843ff2122SChristoph Hellwig  * to the callers.
208943ff2122SChristoph Hellwig  *
209043ff2122SChristoph Hellwig  * Returns true if we queued up the buffer, or false if it already had
209143ff2122SChristoph Hellwig  * been on the buffer list.
209243ff2122SChristoph Hellwig  */
209343ff2122SChristoph Hellwig bool
xfs_buf_delwri_queue(struct xfs_buf * bp,struct list_head * list)209443ff2122SChristoph Hellwig xfs_buf_delwri_queue(
209543ff2122SChristoph Hellwig 	struct xfs_buf		*bp,
209643ff2122SChristoph Hellwig 	struct list_head	*list)
209743ff2122SChristoph Hellwig {
209843ff2122SChristoph Hellwig 	ASSERT(xfs_buf_islocked(bp));
209943ff2122SChristoph Hellwig 	ASSERT(!(bp->b_flags & XBF_READ));
2100c59d87c4SChristoph Hellwig 
2101c59d87c4SChristoph Hellwig 	/*
210243ff2122SChristoph Hellwig 	 * If the buffer is already marked delwri it already is queued up
210343ff2122SChristoph Hellwig 	 * by someone else for imediate writeout.  Just ignore it in that
210443ff2122SChristoph Hellwig 	 * case.
2105c59d87c4SChristoph Hellwig 	 */
210643ff2122SChristoph Hellwig 	if (bp->b_flags & _XBF_DELWRI_Q) {
210743ff2122SChristoph Hellwig 		trace_xfs_buf_delwri_queued(bp, _RET_IP_);
210843ff2122SChristoph Hellwig 		return false;
210943ff2122SChristoph Hellwig 	}
2110c59d87c4SChristoph Hellwig 
2111c59d87c4SChristoph Hellwig 	trace_xfs_buf_delwri_queue(bp, _RET_IP_);
2112c59d87c4SChristoph Hellwig 
211343ff2122SChristoph Hellwig 	/*
211443ff2122SChristoph Hellwig 	 * If a buffer gets written out synchronously or marked stale while it
211543ff2122SChristoph Hellwig 	 * is on a delwri list we lazily remove it. To do this, the other party
211643ff2122SChristoph Hellwig 	 * clears the  _XBF_DELWRI_Q flag but otherwise leaves the buffer alone.
211743ff2122SChristoph Hellwig 	 * It remains referenced and on the list.  In a rare corner case it
211843ff2122SChristoph Hellwig 	 * might get readded to a delwri list after the synchronous writeout, in
211943ff2122SChristoph Hellwig 	 * which case we need just need to re-add the flag here.
212043ff2122SChristoph Hellwig 	 */
212143ff2122SChristoph Hellwig 	bp->b_flags |= _XBF_DELWRI_Q;
212243ff2122SChristoph Hellwig 	if (list_empty(&bp->b_list)) {
21235a8ee6baSChristoph Hellwig 		atomic_inc(&bp->b_hold);
212443ff2122SChristoph Hellwig 		list_add_tail(&bp->b_list, list);
2125c59d87c4SChristoph Hellwig 	}
2126c59d87c4SChristoph Hellwig 
212743ff2122SChristoph Hellwig 	return true;
2128c59d87c4SChristoph Hellwig }
2129c59d87c4SChristoph Hellwig 
2130c59d87c4SChristoph Hellwig /*
21311a48327cSDarrick J. Wong  * Queue a buffer to this delwri list as part of a data integrity operation.
21321a48327cSDarrick J. Wong  * If the buffer is on any other delwri list, we'll wait for that to clear
21331a48327cSDarrick J. Wong  * so that the caller can submit the buffer for IO and wait for the result.
21341a48327cSDarrick J. Wong  * Callers must ensure the buffer is not already on the list.
21351a48327cSDarrick J. Wong  */
21361a48327cSDarrick J. Wong void
xfs_buf_delwri_queue_here(struct xfs_buf * bp,struct list_head * buffer_list)21371a48327cSDarrick J. Wong xfs_buf_delwri_queue_here(
21381a48327cSDarrick J. Wong 	struct xfs_buf		*bp,
21391a48327cSDarrick J. Wong 	struct list_head	*buffer_list)
21401a48327cSDarrick J. Wong {
21411a48327cSDarrick J. Wong 	/*
21421a48327cSDarrick J. Wong 	 * We need this buffer to end up on the /caller's/ delwri list, not any
21431a48327cSDarrick J. Wong 	 * old list.  This can happen if the buffer is marked stale (which
21441a48327cSDarrick J. Wong 	 * clears DELWRI_Q) after the AIL queues the buffer to its list but
21451a48327cSDarrick J. Wong 	 * before the AIL has a chance to submit the list.
21461a48327cSDarrick J. Wong 	 */
21471a48327cSDarrick J. Wong 	while (!list_empty(&bp->b_list)) {
21481a48327cSDarrick J. Wong 		xfs_buf_unlock(bp);
21491a48327cSDarrick J. Wong 		wait_var_event(&bp->b_list, list_empty(&bp->b_list));
21501a48327cSDarrick J. Wong 		xfs_buf_lock(bp);
21511a48327cSDarrick J. Wong 	}
21521a48327cSDarrick J. Wong 
21531a48327cSDarrick J. Wong 	ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
21541a48327cSDarrick J. Wong 
21551a48327cSDarrick J. Wong 	xfs_buf_delwri_queue(bp, buffer_list);
21561a48327cSDarrick J. Wong }
21571a48327cSDarrick J. Wong 
21581a48327cSDarrick J. Wong /*
2159c59d87c4SChristoph Hellwig  * Compare function is more complex than it needs to be because
2160c59d87c4SChristoph Hellwig  * the return value is only 32 bits and we are doing comparisons
2161c59d87c4SChristoph Hellwig  * on 64 bit values
2162c59d87c4SChristoph Hellwig  */
2163c59d87c4SChristoph Hellwig static int
xfs_buf_cmp(void * priv,const struct list_head * a,const struct list_head * b)2164c59d87c4SChristoph Hellwig xfs_buf_cmp(
2165c59d87c4SChristoph Hellwig 	void			*priv,
21664f0f586bSSami Tolvanen 	const struct list_head	*a,
21674f0f586bSSami Tolvanen 	const struct list_head	*b)
2168c59d87c4SChristoph Hellwig {
2169c59d87c4SChristoph Hellwig 	struct xfs_buf	*ap = container_of(a, struct xfs_buf, b_list);
2170c59d87c4SChristoph Hellwig 	struct xfs_buf	*bp = container_of(b, struct xfs_buf, b_list);
2171c59d87c4SChristoph Hellwig 	xfs_daddr_t		diff;
2172c59d87c4SChristoph Hellwig 
2173f4b42421SMark Tinguely 	diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn;
2174c59d87c4SChristoph Hellwig 	if (diff < 0)
2175c59d87c4SChristoph Hellwig 		return -1;
2176c59d87c4SChristoph Hellwig 	if (diff > 0)
2177c59d87c4SChristoph Hellwig 		return 1;
2178c59d87c4SChristoph Hellwig 	return 0;
2179c59d87c4SChristoph Hellwig }
2180c59d87c4SChristoph Hellwig 
218126f1fe85SDave Chinner /*
2182e339dd8dSBrian Foster  * Submit buffers for write. If wait_list is specified, the buffers are
2183e339dd8dSBrian Foster  * submitted using sync I/O and placed on the wait list such that the caller can
2184e339dd8dSBrian Foster  * iowait each buffer. Otherwise async I/O is used and the buffers are released
2185e339dd8dSBrian Foster  * at I/O completion time. In either case, buffers remain locked until I/O
2186e339dd8dSBrian Foster  * completes and the buffer is released from the queue.
218726f1fe85SDave Chinner  */
218843ff2122SChristoph Hellwig static int
xfs_buf_delwri_submit_buffers(struct list_head * buffer_list,struct list_head * wait_list)218926f1fe85SDave Chinner xfs_buf_delwri_submit_buffers(
219043ff2122SChristoph Hellwig 	struct list_head	*buffer_list,
219126f1fe85SDave Chinner 	struct list_head	*wait_list)
2192c59d87c4SChristoph Hellwig {
219343ff2122SChristoph Hellwig 	struct xfs_buf		*bp, *n;
219443ff2122SChristoph Hellwig 	int			pinned = 0;
219526f1fe85SDave Chinner 	struct blk_plug		plug;
2196c59d87c4SChristoph Hellwig 
219726f1fe85SDave Chinner 	list_sort(NULL, buffer_list, xfs_buf_cmp);
219826f1fe85SDave Chinner 
219926f1fe85SDave Chinner 	blk_start_plug(&plug);
220043ff2122SChristoph Hellwig 	list_for_each_entry_safe(bp, n, buffer_list, b_list) {
220126f1fe85SDave Chinner 		if (!wait_list) {
2202dbd0f529SDave Chinner 			if (!xfs_buf_trylock(bp))
2203dbd0f529SDave Chinner 				continue;
220443ff2122SChristoph Hellwig 			if (xfs_buf_ispinned(bp)) {
2205dbd0f529SDave Chinner 				xfs_buf_unlock(bp);
220643ff2122SChristoph Hellwig 				pinned++;
220743ff2122SChristoph Hellwig 				continue;
2208c59d87c4SChristoph Hellwig 			}
220943ff2122SChristoph Hellwig 		} else {
221043ff2122SChristoph Hellwig 			xfs_buf_lock(bp);
2211c59d87c4SChristoph Hellwig 		}
2212c59d87c4SChristoph Hellwig 
2213c59d87c4SChristoph Hellwig 		/*
221443ff2122SChristoph Hellwig 		 * Someone else might have written the buffer synchronously or
221543ff2122SChristoph Hellwig 		 * marked it stale in the meantime.  In that case only the
221643ff2122SChristoph Hellwig 		 * _XBF_DELWRI_Q flag got cleared, and we have to drop the
221743ff2122SChristoph Hellwig 		 * reference and remove it from the list here.
221843ff2122SChristoph Hellwig 		 */
221943ff2122SChristoph Hellwig 		if (!(bp->b_flags & _XBF_DELWRI_Q)) {
22201a48327cSDarrick J. Wong 			xfs_buf_list_del(bp);
222143ff2122SChristoph Hellwig 			xfs_buf_relse(bp);
222243ff2122SChristoph Hellwig 			continue;
222343ff2122SChristoph Hellwig 		}
222443ff2122SChristoph Hellwig 
222543ff2122SChristoph Hellwig 		trace_xfs_buf_delwri_split(bp, _RET_IP_);
222643ff2122SChristoph Hellwig 
2227cf53e99dSDave Chinner 		/*
2228e339dd8dSBrian Foster 		 * If we have a wait list, each buffer (and associated delwri
2229e339dd8dSBrian Foster 		 * queue reference) transfers to it and is submitted
2230e339dd8dSBrian Foster 		 * synchronously. Otherwise, drop the buffer from the delwri
2231e339dd8dSBrian Foster 		 * queue and submit async.
2232cf53e99dSDave Chinner 		 */
2233b6983e80SBrian Foster 		bp->b_flags &= ~_XBF_DELWRI_Q;
2234e339dd8dSBrian Foster 		bp->b_flags |= XBF_WRITE;
223526f1fe85SDave Chinner 		if (wait_list) {
2236e339dd8dSBrian Foster 			bp->b_flags &= ~XBF_ASYNC;
223726f1fe85SDave Chinner 			list_move_tail(&bp->b_list, wait_list);
2238e339dd8dSBrian Foster 		} else {
2239e339dd8dSBrian Foster 			bp->b_flags |= XBF_ASYNC;
22401a48327cSDarrick J. Wong 			xfs_buf_list_del(bp);
224143ff2122SChristoph Hellwig 		}
22426af88cdaSBrian Foster 		__xfs_buf_submit(bp, false);
2243e339dd8dSBrian Foster 	}
224443ff2122SChristoph Hellwig 	blk_finish_plug(&plug);
224543ff2122SChristoph Hellwig 
224643ff2122SChristoph Hellwig 	return pinned;
224743ff2122SChristoph Hellwig }
224843ff2122SChristoph Hellwig 
224943ff2122SChristoph Hellwig /*
225043ff2122SChristoph Hellwig  * Write out a buffer list asynchronously.
225143ff2122SChristoph Hellwig  *
225243ff2122SChristoph Hellwig  * This will take the @buffer_list, write all non-locked and non-pinned buffers
225343ff2122SChristoph Hellwig  * out and not wait for I/O completion on any of the buffers.  This interface
225443ff2122SChristoph Hellwig  * is only safely useable for callers that can track I/O completion by higher
225543ff2122SChristoph Hellwig  * level means, e.g. AIL pushing as the @buffer_list is consumed in this
225643ff2122SChristoph Hellwig  * function.
2257efc3289cSBrian Foster  *
2258efc3289cSBrian Foster  * Note: this function will skip buffers it would block on, and in doing so
2259efc3289cSBrian Foster  * leaves them on @buffer_list so they can be retried on a later pass. As such,
2260efc3289cSBrian Foster  * it is up to the caller to ensure that the buffer list is fully submitted or
2261efc3289cSBrian Foster  * cancelled appropriately when they are finished with the list. Failure to
2262efc3289cSBrian Foster  * cancel or resubmit the list until it is empty will result in leaked buffers
2263efc3289cSBrian Foster  * at unmount time.
2264c59d87c4SChristoph Hellwig  */
2265c59d87c4SChristoph Hellwig int
xfs_buf_delwri_submit_nowait(struct list_head * buffer_list)226643ff2122SChristoph Hellwig xfs_buf_delwri_submit_nowait(
226743ff2122SChristoph Hellwig 	struct list_head	*buffer_list)
2268c59d87c4SChristoph Hellwig {
226926f1fe85SDave Chinner 	return xfs_buf_delwri_submit_buffers(buffer_list, NULL);
227043ff2122SChristoph Hellwig }
2271c59d87c4SChristoph Hellwig 
2272c59d87c4SChristoph Hellwig /*
227343ff2122SChristoph Hellwig  * Write out a buffer list synchronously.
227443ff2122SChristoph Hellwig  *
227543ff2122SChristoph Hellwig  * This will take the @buffer_list, write all buffers out and wait for I/O
227643ff2122SChristoph Hellwig  * completion on all of the buffers. @buffer_list is consumed by the function,
227743ff2122SChristoph Hellwig  * so callers must have some other way of tracking buffers if they require such
227843ff2122SChristoph Hellwig  * functionality.
2279c59d87c4SChristoph Hellwig  */
228043ff2122SChristoph Hellwig int
xfs_buf_delwri_submit(struct list_head * buffer_list)228143ff2122SChristoph Hellwig xfs_buf_delwri_submit(
228243ff2122SChristoph Hellwig 	struct list_head	*buffer_list)
228343ff2122SChristoph Hellwig {
228426f1fe85SDave Chinner 	LIST_HEAD		(wait_list);
228543ff2122SChristoph Hellwig 	int			error = 0, error2;
228643ff2122SChristoph Hellwig 	struct xfs_buf		*bp;
2287c59d87c4SChristoph Hellwig 
228826f1fe85SDave Chinner 	xfs_buf_delwri_submit_buffers(buffer_list, &wait_list);
2289c59d87c4SChristoph Hellwig 
2290c59d87c4SChristoph Hellwig 	/* Wait for IO to complete. */
229126f1fe85SDave Chinner 	while (!list_empty(&wait_list)) {
229226f1fe85SDave Chinner 		bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
2293c59d87c4SChristoph Hellwig 
22941a48327cSDarrick J. Wong 		xfs_buf_list_del(bp);
2295cf53e99dSDave Chinner 
2296e339dd8dSBrian Foster 		/*
2297e339dd8dSBrian Foster 		 * Wait on the locked buffer, check for errors and unlock and
2298e339dd8dSBrian Foster 		 * release the delwri queue reference.
2299e339dd8dSBrian Foster 		 */
2300e339dd8dSBrian Foster 		error2 = xfs_buf_iowait(bp);
2301c59d87c4SChristoph Hellwig 		xfs_buf_relse(bp);
230243ff2122SChristoph Hellwig 		if (!error)
230343ff2122SChristoph Hellwig 			error = error2;
2304c59d87c4SChristoph Hellwig 	}
2305c59d87c4SChristoph Hellwig 
230643ff2122SChristoph Hellwig 	return error;
2307c59d87c4SChristoph Hellwig }
2308c59d87c4SChristoph Hellwig 
23097912e7feSBrian Foster /*
23107912e7feSBrian Foster  * Push a single buffer on a delwri queue.
23117912e7feSBrian Foster  *
23127912e7feSBrian Foster  * The purpose of this function is to submit a single buffer of a delwri queue
23137912e7feSBrian Foster  * and return with the buffer still on the original queue. The waiting delwri
23147912e7feSBrian Foster  * buffer submission infrastructure guarantees transfer of the delwri queue
23157912e7feSBrian Foster  * buffer reference to a temporary wait list. We reuse this infrastructure to
23167912e7feSBrian Foster  * transfer the buffer back to the original queue.
23177912e7feSBrian Foster  *
23187912e7feSBrian Foster  * Note the buffer transitions from the queued state, to the submitted and wait
23197912e7feSBrian Foster  * listed state and back to the queued state during this call. The buffer
23207912e7feSBrian Foster  * locking and queue management logic between _delwri_pushbuf() and
23217912e7feSBrian Foster  * _delwri_queue() guarantee that the buffer cannot be queued to another list
23227912e7feSBrian Foster  * before returning.
23237912e7feSBrian Foster  */
23247912e7feSBrian Foster int
xfs_buf_delwri_pushbuf(struct xfs_buf * bp,struct list_head * buffer_list)23257912e7feSBrian Foster xfs_buf_delwri_pushbuf(
23267912e7feSBrian Foster 	struct xfs_buf		*bp,
23277912e7feSBrian Foster 	struct list_head	*buffer_list)
23287912e7feSBrian Foster {
23297912e7feSBrian Foster 	LIST_HEAD		(submit_list);
23307912e7feSBrian Foster 	int			error;
23317912e7feSBrian Foster 
23327912e7feSBrian Foster 	ASSERT(bp->b_flags & _XBF_DELWRI_Q);
23337912e7feSBrian Foster 
23347912e7feSBrian Foster 	trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
23357912e7feSBrian Foster 
23367912e7feSBrian Foster 	/*
23377912e7feSBrian Foster 	 * Isolate the buffer to a new local list so we can submit it for I/O
23387912e7feSBrian Foster 	 * independently from the rest of the original list.
23397912e7feSBrian Foster 	 */
23407912e7feSBrian Foster 	xfs_buf_lock(bp);
23417912e7feSBrian Foster 	list_move(&bp->b_list, &submit_list);
23427912e7feSBrian Foster 	xfs_buf_unlock(bp);
23437912e7feSBrian Foster 
23447912e7feSBrian Foster 	/*
23457912e7feSBrian Foster 	 * Delwri submission clears the DELWRI_Q buffer flag and returns with
2346e339dd8dSBrian Foster 	 * the buffer on the wait list with the original reference. Rather than
23477912e7feSBrian Foster 	 * bounce the buffer from a local wait list back to the original list
23487912e7feSBrian Foster 	 * after I/O completion, reuse the original list as the wait list.
23497912e7feSBrian Foster 	 */
23507912e7feSBrian Foster 	xfs_buf_delwri_submit_buffers(&submit_list, buffer_list);
23517912e7feSBrian Foster 
23527912e7feSBrian Foster 	/*
2353e339dd8dSBrian Foster 	 * The buffer is now locked, under I/O and wait listed on the original
2354e339dd8dSBrian Foster 	 * delwri queue. Wait for I/O completion, restore the DELWRI_Q flag and
2355e339dd8dSBrian Foster 	 * return with the buffer unlocked and on the original queue.
23567912e7feSBrian Foster 	 */
2357e339dd8dSBrian Foster 	error = xfs_buf_iowait(bp);
23587912e7feSBrian Foster 	bp->b_flags |= _XBF_DELWRI_Q;
23597912e7feSBrian Foster 	xfs_buf_unlock(bp);
23607912e7feSBrian Foster 
23617912e7feSBrian Foster 	return error;
23627912e7feSBrian Foster }
23637912e7feSBrian Foster 
xfs_buf_set_ref(struct xfs_buf * bp,int lru_ref)23647561d27eSBrian Foster void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
23657561d27eSBrian Foster {
23667561d27eSBrian Foster 	/*
23677561d27eSBrian Foster 	 * Set the lru reference count to 0 based on the error injection tag.
23687561d27eSBrian Foster 	 * This allows userspace to disrupt buffer caching for debug/testing
23697561d27eSBrian Foster 	 * purposes.
23707561d27eSBrian Foster 	 */
2371dbd329f1SChristoph Hellwig 	if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
23727561d27eSBrian Foster 		lru_ref = 0;
23737561d27eSBrian Foster 
23747561d27eSBrian Foster 	atomic_set(&bp->b_lru_ref, lru_ref);
23757561d27eSBrian Foster }
23768473fee3SBrian Foster 
23778473fee3SBrian Foster /*
23788473fee3SBrian Foster  * Verify an on-disk magic value against the magic value specified in the
23798473fee3SBrian Foster  * verifier structure. The verifier magic is in disk byte order so the caller is
23808473fee3SBrian Foster  * expected to pass the value directly from disk.
23818473fee3SBrian Foster  */
23828473fee3SBrian Foster bool
xfs_verify_magic(struct xfs_buf * bp,__be32 dmagic)23838473fee3SBrian Foster xfs_verify_magic(
23848473fee3SBrian Foster 	struct xfs_buf		*bp,
238515baadf7SDarrick J. Wong 	__be32			dmagic)
23868473fee3SBrian Foster {
2387dbd329f1SChristoph Hellwig 	struct xfs_mount	*mp = bp->b_mount;
23888473fee3SBrian Foster 	int			idx;
23898473fee3SBrian Foster 
239038c26bfdSDave Chinner 	idx = xfs_has_crc(mp);
239114ed8688SDenis Efremov 	if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
23928473fee3SBrian Foster 		return false;
23938473fee3SBrian Foster 	return dmagic == bp->b_ops->magic[idx];
23948473fee3SBrian Foster }
239515baadf7SDarrick J. Wong /*
239615baadf7SDarrick J. Wong  * Verify an on-disk magic value against the magic value specified in the
239715baadf7SDarrick J. Wong  * verifier structure. The verifier magic is in disk byte order so the caller is
239815baadf7SDarrick J. Wong  * expected to pass the value directly from disk.
239915baadf7SDarrick J. Wong  */
240015baadf7SDarrick J. Wong bool
xfs_verify_magic16(struct xfs_buf * bp,__be16 dmagic)240115baadf7SDarrick J. Wong xfs_verify_magic16(
240215baadf7SDarrick J. Wong 	struct xfs_buf		*bp,
240315baadf7SDarrick J. Wong 	__be16			dmagic)
240415baadf7SDarrick J. Wong {
2405dbd329f1SChristoph Hellwig 	struct xfs_mount	*mp = bp->b_mount;
240615baadf7SDarrick J. Wong 	int			idx;
240715baadf7SDarrick J. Wong 
240838c26bfdSDave Chinner 	idx = xfs_has_crc(mp);
240914ed8688SDenis Efremov 	if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
241015baadf7SDarrick J. Wong 		return false;
241115baadf7SDarrick J. Wong 	return dmagic == bp->b_ops->magic16[idx];
241215baadf7SDarrick J. Wong }
2413