xref: /openbmc/linux/fs/xfs/xfs_aops.c (revision 09138ba68c1487a42c400485e999386a74911dbc)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
2c59d87c4SChristoph Hellwig /*
3c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
498c1a7c0SChristoph Hellwig  * Copyright (c) 2016-2018 Christoph Hellwig.
5c59d87c4SChristoph Hellwig  * All Rights Reserved.
6c59d87c4SChristoph Hellwig  */
7c59d87c4SChristoph Hellwig #include "xfs.h"
870a9883cSDave Chinner #include "xfs_shared.h"
9239880efSDave Chinner #include "xfs_format.h"
10239880efSDave Chinner #include "xfs_log_format.h"
11239880efSDave Chinner #include "xfs_trans_resv.h"
12c59d87c4SChristoph Hellwig #include "xfs_mount.h"
13c59d87c4SChristoph Hellwig #include "xfs_inode.h"
14239880efSDave Chinner #include "xfs_trans.h"
15c59d87c4SChristoph Hellwig #include "xfs_iomap.h"
16c59d87c4SChristoph Hellwig #include "xfs_trace.h"
17c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
1868988114SDave Chinner #include "xfs_bmap_util.h"
19ef473667SDarrick J. Wong #include "xfs_reflink.h"
20c2beff99SDarrick J. Wong #include "xfs_errortag.h"
21c2beff99SDarrick J. Wong #include "xfs_error.h"
22c59d87c4SChristoph Hellwig 
23fbcc0256SDave Chinner struct xfs_writepage_ctx {
24598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx ctx;
25d9252d52SBrian Foster 	unsigned int		data_seq;
26e666aa37SChristoph Hellwig 	unsigned int		cow_seq;
27fbcc0256SDave Chinner };
28fbcc0256SDave Chinner 
29598ecfbaSChristoph Hellwig static inline struct xfs_writepage_ctx *
XFS_WPC(struct iomap_writepage_ctx * ctx)30598ecfbaSChristoph Hellwig XFS_WPC(struct iomap_writepage_ctx *ctx)
31598ecfbaSChristoph Hellwig {
32598ecfbaSChristoph Hellwig 	return container_of(ctx, struct xfs_writepage_ctx, ctx);
33598ecfbaSChristoph Hellwig }
34598ecfbaSChristoph Hellwig 
35c59d87c4SChristoph Hellwig /*
36fc0063c4SChristoph Hellwig  * Fast and loose check if this write could update the on-disk inode size.
37fc0063c4SChristoph Hellwig  */
xfs_ioend_is_append(struct iomap_ioend * ioend)38598ecfbaSChristoph Hellwig static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
39fc0063c4SChristoph Hellwig {
40fc0063c4SChristoph Hellwig 	return ioend->io_offset + ioend->io_size >
4113d2c10bSChristoph Hellwig 		XFS_I(ioend->io_inode)->i_disk_size;
42fc0063c4SChristoph Hellwig }
43fc0063c4SChristoph Hellwig 
44fc0063c4SChristoph Hellwig /*
452813d682SChristoph Hellwig  * Update on-disk file size now that data has been written to disk.
46c59d87c4SChristoph Hellwig  */
47e7a3d7e7SBrian Foster int
xfs_setfilesize(struct xfs_inode * ip,xfs_off_t offset,size_t size)48e7a3d7e7SBrian Foster xfs_setfilesize(
492ba66237SChristoph Hellwig 	struct xfs_inode	*ip,
502ba66237SChristoph Hellwig 	xfs_off_t		offset,
512ba66237SChristoph Hellwig 	size_t			size)
52c59d87c4SChristoph Hellwig {
53e7a3d7e7SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
54e7a3d7e7SBrian Foster 	struct xfs_trans	*tp;
55c59d87c4SChristoph Hellwig 	xfs_fsize_t		isize;
56e7a3d7e7SBrian Foster 	int			error;
57e7a3d7e7SBrian Foster 
58e7a3d7e7SBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
59e7a3d7e7SBrian Foster 	if (error)
60e7a3d7e7SBrian Foster 		return error;
61c59d87c4SChristoph Hellwig 
62aa6bf01dSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
632ba66237SChristoph Hellwig 	isize = xfs_new_eof(ip, offset + size);
64281627dfSChristoph Hellwig 	if (!isize) {
65281627dfSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
664906e215SChristoph Hellwig 		xfs_trans_cancel(tp);
67281627dfSChristoph Hellwig 		return 0;
68c59d87c4SChristoph Hellwig 	}
69c59d87c4SChristoph Hellwig 
702ba66237SChristoph Hellwig 	trace_xfs_setfilesize(ip, offset, size);
71281627dfSChristoph Hellwig 
7213d2c10bSChristoph Hellwig 	ip->i_disk_size = isize;
73281627dfSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
74281627dfSChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
75281627dfSChristoph Hellwig 
7670393313SChristoph Hellwig 	return xfs_trans_commit(tp);
77c59d87c4SChristoph Hellwig }
78c59d87c4SChristoph Hellwig 
79c59d87c4SChristoph Hellwig /*
80c59d87c4SChristoph Hellwig  * IO write completion.
81c59d87c4SChristoph Hellwig  */
82c59d87c4SChristoph Hellwig STATIC void
xfs_end_ioend(struct iomap_ioend * ioend)83cb357bf3SDarrick J. Wong xfs_end_ioend(
84598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend)
85c59d87c4SChristoph Hellwig {
86c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
875ca5916bSBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
88787eb485SChristoph Hellwig 	xfs_off_t		offset = ioend->io_offset;
89787eb485SChristoph Hellwig 	size_t			size = ioend->io_size;
9073d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
914e4cbee9SChristoph Hellwig 	int			error;
92c59d87c4SChristoph Hellwig 
93af055e37SBrian Foster 	/*
9473d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
9573d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
9673d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
9773d30d48SChristoph Hellwig 	 */
9873d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
9973d30d48SChristoph Hellwig 
10073d30d48SChristoph Hellwig 	/*
101f9dd7ba4SBhaskar Chowdhury 	 * Just clean up the in-memory structures if the fs has been shut down.
102af055e37SBrian Foster 	 */
1035ca5916bSBrian Foster 	if (xfs_is_shutdown(mp)) {
1040e51a8e1SChristoph Hellwig 		error = -EIO;
10543caeb18SDarrick J. Wong 		goto done;
10643caeb18SDarrick J. Wong 	}
10743caeb18SDarrick J. Wong 
10843caeb18SDarrick J. Wong 	/*
1095ca5916bSBrian Foster 	 * Clean up all COW blocks and underlying data fork delalloc blocks on
1105ca5916bSBrian Foster 	 * I/O error. The delalloc punch is required because this ioend was
1115ca5916bSBrian Foster 	 * mapped to blocks in the COW fork and the associated pages are no
1125ca5916bSBrian Foster 	 * longer dirty. If we don't remove delalloc blocks here, they become
1135ca5916bSBrian Foster 	 * stale and can corrupt free space accounting on unmount.
114c59d87c4SChristoph Hellwig 	 */
1154e4cbee9SChristoph Hellwig 	error = blk_status_to_errno(ioend->io_bio->bi_status);
116787eb485SChristoph Hellwig 	if (unlikely(error)) {
1175ca5916bSBrian Foster 		if (ioend->io_flags & IOMAP_F_SHARED) {
118787eb485SChristoph Hellwig 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
1197348b322SDave Chinner 			xfs_bmap_punch_delalloc_range(ip, offset,
1207348b322SDave Chinner 					offset + size);
1215ca5916bSBrian Foster 		}
1225cb13dcdSZhaohongjiang 		goto done;
123787eb485SChristoph Hellwig 	}
124787eb485SChristoph Hellwig 
125787eb485SChristoph Hellwig 	/*
126787eb485SChristoph Hellwig 	 * Success: commit the COW or unwritten blocks if needed.
127787eb485SChristoph Hellwig 	 */
128760fea8bSChristoph Hellwig 	if (ioend->io_flags & IOMAP_F_SHARED)
129787eb485SChristoph Hellwig 		error = xfs_reflink_end_cow(ip, offset, size);
1304e087a3bSChristoph Hellwig 	else if (ioend->io_type == IOMAP_UNWRITTEN)
131ee70daabSEryu Guan 		error = xfs_iomap_write_unwritten(ip, offset, size, false);
13284803fb7SChristoph Hellwig 
1337cd3099fSBrian Foster 	if (!error && xfs_ioend_is_append(ioend))
1347cd3099fSBrian Foster 		error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
13504f658eeSChristoph Hellwig done:
136598ecfbaSChristoph Hellwig 	iomap_finish_ioends(ioend, error);
13773d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
1383994fc48SDarrick J. Wong }
1393994fc48SDarrick J. Wong 
140ebb7fb15SDave Chinner /*
141ebb7fb15SDave Chinner  * Finish all pending IO completions that require transactional modifications.
142ebb7fb15SDave Chinner  *
143ebb7fb15SDave Chinner  * We try to merge physical and logically contiguous ioends before completion to
144ebb7fb15SDave Chinner  * minimise the number of transactions we need to perform during IO completion.
145ebb7fb15SDave Chinner  * Both unwritten extent conversion and COW remapping need to iterate and modify
146ebb7fb15SDave Chinner  * one physical extent at a time, so we gain nothing by merging physically
147ebb7fb15SDave Chinner  * discontiguous extents here.
148ebb7fb15SDave Chinner  *
149ebb7fb15SDave Chinner  * The ioend chain length that we can be processing here is largely unbound in
150ebb7fb15SDave Chinner  * length and we may have to perform significant amounts of work on each ioend
151ebb7fb15SDave Chinner  * to complete it. Hence we have to be careful about holding the CPU for too
152ebb7fb15SDave Chinner  * long in this loop.
153ebb7fb15SDave Chinner  */
154cb357bf3SDarrick J. Wong void
xfs_end_io(struct work_struct * work)155cb357bf3SDarrick J. Wong xfs_end_io(
156cb357bf3SDarrick J. Wong 	struct work_struct	*work)
157cb357bf3SDarrick J. Wong {
158433dad94SChristoph Hellwig 	struct xfs_inode	*ip =
159433dad94SChristoph Hellwig 		container_of(work, struct xfs_inode, i_ioend_work);
160598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend;
161433dad94SChristoph Hellwig 	struct list_head	tmp;
162cb357bf3SDarrick J. Wong 	unsigned long		flags;
163cb357bf3SDarrick J. Wong 
164cb357bf3SDarrick J. Wong 	spin_lock_irqsave(&ip->i_ioend_lock, flags);
165433dad94SChristoph Hellwig 	list_replace_init(&ip->i_ioend_list, &tmp);
166cb357bf3SDarrick J. Wong 	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
167cb357bf3SDarrick J. Wong 
168598ecfbaSChristoph Hellwig 	iomap_sort_ioends(&tmp);
169598ecfbaSChristoph Hellwig 	while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
170433dad94SChristoph Hellwig 			io_list))) {
171cb357bf3SDarrick J. Wong 		list_del_init(&ioend->io_list);
1726e552494SBrian Foster 		iomap_ioend_try_merge(ioend, &tmp);
173cb357bf3SDarrick J. Wong 		xfs_end_ioend(ioend);
174ebb7fb15SDave Chinner 		cond_resched();
175cb357bf3SDarrick J. Wong 	}
176cb357bf3SDarrick J. Wong }
177cb357bf3SDarrick J. Wong 
1780e51a8e1SChristoph Hellwig STATIC void
xfs_end_bio(struct bio * bio)1790e51a8e1SChristoph Hellwig xfs_end_bio(
1800e51a8e1SChristoph Hellwig 	struct bio		*bio)
181c59d87c4SChristoph Hellwig {
182598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend = bio->bi_private;
183cb357bf3SDarrick J. Wong 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
184cb357bf3SDarrick J. Wong 	unsigned long		flags;
185c59d87c4SChristoph Hellwig 
186cb357bf3SDarrick J. Wong 	spin_lock_irqsave(&ip->i_ioend_lock, flags);
187cb357bf3SDarrick J. Wong 	if (list_empty(&ip->i_ioend_list))
188598ecfbaSChristoph Hellwig 		WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
189cb357bf3SDarrick J. Wong 					 &ip->i_ioend_work));
190cb357bf3SDarrick J. Wong 	list_add_tail(&ioend->io_list, &ip->i_ioend_list);
191cb357bf3SDarrick J. Wong 	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
192c59d87c4SChristoph Hellwig }
193c59d87c4SChristoph Hellwig 
194d9252d52SBrian Foster /*
195d9252d52SBrian Foster  * Fast revalidation of the cached writeback mapping. Return true if the current
196d9252d52SBrian Foster  * mapping is valid, false otherwise.
197d9252d52SBrian Foster  */
198d9252d52SBrian Foster static bool
xfs_imap_valid(struct iomap_writepage_ctx * wpc,struct xfs_inode * ip,loff_t offset)199d9252d52SBrian Foster xfs_imap_valid(
200598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx	*wpc,
201d9252d52SBrian Foster 	struct xfs_inode		*ip,
2024e087a3bSChristoph Hellwig 	loff_t				offset)
203d9252d52SBrian Foster {
2044e087a3bSChristoph Hellwig 	if (offset < wpc->iomap.offset ||
2054e087a3bSChristoph Hellwig 	    offset >= wpc->iomap.offset + wpc->iomap.length)
206d9252d52SBrian Foster 		return false;
207d9252d52SBrian Foster 	/*
208d9252d52SBrian Foster 	 * If this is a COW mapping, it is sufficient to check that the mapping
209d9252d52SBrian Foster 	 * covers the offset. Be careful to check this first because the caller
210d9252d52SBrian Foster 	 * can revalidate a COW mapping without updating the data seqno.
211d9252d52SBrian Foster 	 */
212760fea8bSChristoph Hellwig 	if (wpc->iomap.flags & IOMAP_F_SHARED)
213d9252d52SBrian Foster 		return true;
214d9252d52SBrian Foster 
215d9252d52SBrian Foster 	/*
216d9252d52SBrian Foster 	 * This is not a COW mapping. Check the sequence number of the data fork
217d9252d52SBrian Foster 	 * because concurrent changes could have invalidated the extent. Check
218d9252d52SBrian Foster 	 * the COW fork because concurrent changes since the last time we
219d9252d52SBrian Foster 	 * checked (and found nothing at this offset) could have added
220d9252d52SBrian Foster 	 * overlapping blocks.
221d9252d52SBrian Foster 	 */
222c2beff99SDarrick J. Wong 	if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) {
223c2beff99SDarrick J. Wong 		trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap,
224c2beff99SDarrick J. Wong 				XFS_WPC(wpc)->data_seq, XFS_DATA_FORK);
225d9252d52SBrian Foster 		return false;
226c2beff99SDarrick J. Wong 	}
227d9252d52SBrian Foster 	if (xfs_inode_has_cow_data(ip) &&
228c2beff99SDarrick J. Wong 	    XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) {
229c2beff99SDarrick J. Wong 		trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap,
230c2beff99SDarrick J. Wong 				XFS_WPC(wpc)->cow_seq, XFS_COW_FORK);
231d9252d52SBrian Foster 		return false;
232c2beff99SDarrick J. Wong 	}
233d9252d52SBrian Foster 	return true;
234d9252d52SBrian Foster }
235d9252d52SBrian Foster 
236598ecfbaSChristoph Hellwig static int
xfs_map_blocks(struct iomap_writepage_ctx * wpc,struct inode * inode,loff_t offset)237c59d87c4SChristoph Hellwig xfs_map_blocks(
238598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx *wpc,
239c59d87c4SChristoph Hellwig 	struct inode		*inode,
2405c665e5bSChristoph Hellwig 	loff_t			offset)
241c59d87c4SChristoph Hellwig {
242c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
243c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
24493407472SFabian Frederick 	ssize_t			count = i_blocksize(inode);
245b4e29032SChristoph Hellwig 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
246b4e29032SChristoph Hellwig 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
247c2f09217SDarrick J. Wong 	xfs_fileoff_t		cow_fsb;
248c2f09217SDarrick J. Wong 	int			whichfork;
2495c665e5bSChristoph Hellwig 	struct xfs_bmbt_irec	imap;
250060d4eaaSChristoph Hellwig 	struct xfs_iext_cursor	icur;
2517588cbeeSChristoph Hellwig 	int			retries = 0;
252c59d87c4SChristoph Hellwig 	int			error = 0;
253*36081fd0SZhang Yi 	unsigned int		*seq;
254c59d87c4SChristoph Hellwig 
25575c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
256d9252d52SBrian Foster 		return -EIO;
257d9252d52SBrian Foster 
258c2beff99SDarrick J. Wong 	XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS);
259c2beff99SDarrick J. Wong 
260889c65b3SChristoph Hellwig 	/*
261889c65b3SChristoph Hellwig 	 * COW fork blocks can overlap data fork blocks even if the blocks
262889c65b3SChristoph Hellwig 	 * aren't shared.  COW I/O always takes precedent, so we must always
263889c65b3SChristoph Hellwig 	 * check for overlap on reflink inodes unless the mapping is already a
264e666aa37SChristoph Hellwig 	 * COW one, or the COW fork hasn't changed from the last time we looked
265e666aa37SChristoph Hellwig 	 * at it.
266e666aa37SChristoph Hellwig 	 *
267e666aa37SChristoph Hellwig 	 * It's safe to check the COW fork if_seq here without the ILOCK because
268e666aa37SChristoph Hellwig 	 * we've indirectly protected against concurrent updates: writeback has
269e666aa37SChristoph Hellwig 	 * the page locked, which prevents concurrent invalidations by reflink
270e666aa37SChristoph Hellwig 	 * and directio and prevents concurrent buffered writes to the same
271e666aa37SChristoph Hellwig 	 * page.  Changes to if_seq always happen under i_lock, which protects
272e666aa37SChristoph Hellwig 	 * against concurrent updates and provides a memory barrier on the way
273e666aa37SChristoph Hellwig 	 * out that ensures that we always see the current value.
274889c65b3SChristoph Hellwig 	 */
2754e087a3bSChristoph Hellwig 	if (xfs_imap_valid(wpc, ip, offset))
276889c65b3SChristoph Hellwig 		return 0;
277889c65b3SChristoph Hellwig 
278889c65b3SChristoph Hellwig 	/*
279889c65b3SChristoph Hellwig 	 * If we don't have a valid map, now it's time to get a new one for this
280889c65b3SChristoph Hellwig 	 * offset.  This will convert delayed allocations (including COW ones)
281889c65b3SChristoph Hellwig 	 * into real extents.  If we return without a valid map, it means we
282889c65b3SChristoph Hellwig 	 * landed in a hole and we skip the block.
283889c65b3SChristoph Hellwig 	 */
2847588cbeeSChristoph Hellwig retry:
285c2f09217SDarrick J. Wong 	cow_fsb = NULLFILEOFF;
286c2f09217SDarrick J. Wong 	whichfork = XFS_DATA_FORK;
287c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_SHARED);
288b2197a36SChristoph Hellwig 	ASSERT(!xfs_need_iread_extents(&ip->i_df));
289060d4eaaSChristoph Hellwig 
290060d4eaaSChristoph Hellwig 	/*
291060d4eaaSChristoph Hellwig 	 * Check if this is offset is covered by a COW extents, and if yes use
292060d4eaaSChristoph Hellwig 	 * it directly instead of looking up anything in the data fork.
293060d4eaaSChristoph Hellwig 	 */
29451d62690SChristoph Hellwig 	if (xfs_inode_has_cow_data(ip) &&
295e666aa37SChristoph Hellwig 	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
296e666aa37SChristoph Hellwig 		cow_fsb = imap.br_startoff;
297e666aa37SChristoph Hellwig 	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
298598ecfbaSChristoph Hellwig 		XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
2995c665e5bSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
300be225fecSChristoph Hellwig 
301760fea8bSChristoph Hellwig 		whichfork = XFS_COW_FORK;
3025c665e5bSChristoph Hellwig 		goto allocate_blocks;
3035c665e5bSChristoph Hellwig 	}
3045c665e5bSChristoph Hellwig 
3055c665e5bSChristoph Hellwig 	/*
306d9252d52SBrian Foster 	 * No COW extent overlap. Revalidate now that we may have updated
307d9252d52SBrian Foster 	 * ->cow_seq. If the data mapping is still valid, we're done.
3085c665e5bSChristoph Hellwig 	 */
3094e087a3bSChristoph Hellwig 	if (xfs_imap_valid(wpc, ip, offset)) {
3105c665e5bSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
3115c665e5bSChristoph Hellwig 		return 0;
3125c665e5bSChristoph Hellwig 	}
3135c665e5bSChristoph Hellwig 
3145c665e5bSChristoph Hellwig 	/*
3155c665e5bSChristoph Hellwig 	 * If we don't have a valid map, now it's time to get a new one for this
3165c665e5bSChristoph Hellwig 	 * offset.  This will convert delayed allocations (including COW ones)
3175c665e5bSChristoph Hellwig 	 * into real extents.
3185c665e5bSChristoph Hellwig 	 */
3193345746eSChristoph Hellwig 	if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
3203345746eSChristoph Hellwig 		imap.br_startoff = end_fsb;	/* fake a hole past EOF */
321598ecfbaSChristoph Hellwig 	XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
322c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
323c59d87c4SChristoph Hellwig 
32412df89f2SChristoph Hellwig 	/* landed in a hole or beyond EOF? */
3253345746eSChristoph Hellwig 	if (imap.br_startoff > offset_fsb) {
3263345746eSChristoph Hellwig 		imap.br_blockcount = imap.br_startoff - offset_fsb;
3275c665e5bSChristoph Hellwig 		imap.br_startoff = offset_fsb;
3285c665e5bSChristoph Hellwig 		imap.br_startblock = HOLESTARTBLOCK;
329be225fecSChristoph Hellwig 		imap.br_state = XFS_EXT_NORM;
33012df89f2SChristoph Hellwig 	}
33112df89f2SChristoph Hellwig 
332e666aa37SChristoph Hellwig 	/*
33312df89f2SChristoph Hellwig 	 * Truncate to the next COW extent if there is one.  This is the only
33412df89f2SChristoph Hellwig 	 * opportunity to do this because we can skip COW fork lookups for the
33512df89f2SChristoph Hellwig 	 * subsequent blocks in the mapping; however, the requirement to treat
33612df89f2SChristoph Hellwig 	 * the COW range separately remains.
337e666aa37SChristoph Hellwig 	 */
338e666aa37SChristoph Hellwig 	if (cow_fsb != NULLFILEOFF &&
339e666aa37SChristoph Hellwig 	    cow_fsb < imap.br_startoff + imap.br_blockcount)
340e666aa37SChristoph Hellwig 		imap.br_blockcount = cow_fsb - imap.br_startoff;
341e666aa37SChristoph Hellwig 
342be225fecSChristoph Hellwig 	/* got a delalloc extent? */
34312df89f2SChristoph Hellwig 	if (imap.br_startblock != HOLESTARTBLOCK &&
34412df89f2SChristoph Hellwig 	    isnullstartblock(imap.br_startblock))
3455c665e5bSChristoph Hellwig 		goto allocate_blocks;
346e2f6ad46SDave Chinner 
347304a68b9SDave Chinner 	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq);
348760fea8bSChristoph Hellwig 	trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
3495c665e5bSChristoph Hellwig 	return 0;
3505c665e5bSChristoph Hellwig allocate_blocks:
351*36081fd0SZhang Yi 	/*
352*36081fd0SZhang Yi 	 * Convert a dellalloc extent to a real one. The current page is held
353*36081fd0SZhang Yi 	 * locked so nothing could have removed the block backing offset_fsb,
354*36081fd0SZhang Yi 	 * although it could have moved from the COW to the data fork by another
355*36081fd0SZhang Yi 	 * thread.
356*36081fd0SZhang Yi 	 */
357*36081fd0SZhang Yi 	if (whichfork == XFS_COW_FORK)
358*36081fd0SZhang Yi 		seq = &XFS_WPC(wpc)->cow_seq;
359*36081fd0SZhang Yi 	else
360*36081fd0SZhang Yi 		seq = &XFS_WPC(wpc)->data_seq;
361*36081fd0SZhang Yi 
362*36081fd0SZhang Yi 	error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
363*36081fd0SZhang Yi 				&wpc->iomap, seq);
3647588cbeeSChristoph Hellwig 	if (error) {
3657588cbeeSChristoph Hellwig 		/*
3667588cbeeSChristoph Hellwig 		 * If we failed to find the extent in the COW fork we might have
3677588cbeeSChristoph Hellwig 		 * raced with a COW to data fork conversion or truncate.
3687588cbeeSChristoph Hellwig 		 * Restart the lookup to catch the extent in the data fork for
3697588cbeeSChristoph Hellwig 		 * the former case, but prevent additional retries to avoid
3707588cbeeSChristoph Hellwig 		 * looping forever for the latter case.
3717588cbeeSChristoph Hellwig 		 */
372760fea8bSChristoph Hellwig 		if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
3737588cbeeSChristoph Hellwig 			goto retry;
3747588cbeeSChristoph Hellwig 		ASSERT(error != -EAGAIN);
3755c665e5bSChristoph Hellwig 		return error;
3767588cbeeSChristoph Hellwig 	}
3774ad765edSChristoph Hellwig 
3784ad765edSChristoph Hellwig 	/*
3794ad765edSChristoph Hellwig 	 * Due to merging the return real extent might be larger than the
3804ad765edSChristoph Hellwig 	 * original delalloc one.  Trim the return extent to the next COW
3814ad765edSChristoph Hellwig 	 * boundary again to force a re-lookup.
3824ad765edSChristoph Hellwig 	 */
383760fea8bSChristoph Hellwig 	if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
3844e087a3bSChristoph Hellwig 		loff_t		cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
3854ad765edSChristoph Hellwig 
3864e087a3bSChristoph Hellwig 		if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
3874e087a3bSChristoph Hellwig 			wpc->iomap.length = cow_offset - wpc->iomap.offset;
3884e087a3bSChristoph Hellwig 	}
3894e087a3bSChristoph Hellwig 
3904e087a3bSChristoph Hellwig 	ASSERT(wpc->iomap.offset <= offset);
3914e087a3bSChristoph Hellwig 	ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
392760fea8bSChristoph Hellwig 	trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
393c59d87c4SChristoph Hellwig 	return 0;
394c59d87c4SChristoph Hellwig }
395c59d87c4SChristoph Hellwig 
396598ecfbaSChristoph Hellwig static int
xfs_prepare_ioend(struct iomap_ioend * ioend,int status)397598ecfbaSChristoph Hellwig xfs_prepare_ioend(
398598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend,
399e10de372SDave Chinner 	int			status)
400c59d87c4SChristoph Hellwig {
40173d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
40273d30d48SChristoph Hellwig 
40373d30d48SChristoph Hellwig 	/*
40473d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
40573d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
40673d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
40773d30d48SChristoph Hellwig 	 */
40873d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
40973d30d48SChristoph Hellwig 
4105eda4300SDarrick J. Wong 	/* Convert CoW extents to regular */
411760fea8bSChristoph Hellwig 	if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
4125eda4300SDarrick J. Wong 		status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
4135eda4300SDarrick J. Wong 				ioend->io_offset, ioend->io_size);
4145eda4300SDarrick J. Wong 	}
4155eda4300SDarrick J. Wong 
41673d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
41773d30d48SChristoph Hellwig 
4187adb8f14SBrian Foster 	/* send ioends that might require a transaction to the completion wq */
4197adb8f14SBrian Foster 	if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
4207adb8f14SBrian Foster 	    (ioend->io_flags & IOMAP_F_SHARED))
4210e51a8e1SChristoph Hellwig 		ioend->io_bio->bi_end_io = xfs_end_bio;
422e10de372SDave Chinner 	return status;
4237bf7f352SDave Chinner }
4247bf7f352SDave Chinner 
425c59d87c4SChristoph Hellwig /*
4268ac5b996SDave Chinner  * If the folio has delalloc blocks on it, the caller is asking us to punch them
4278ac5b996SDave Chinner  * out. If we don't, we can leave a stale delalloc mapping covered by a clean
4288ac5b996SDave Chinner  * page that needs to be dirtied again before the delalloc mapping can be
4298ac5b996SDave Chinner  * converted. This stale delalloc mapping can trip up a later direct I/O read
4308ac5b996SDave Chinner  * operation on the same region.
431c59d87c4SChristoph Hellwig  *
4328ac5b996SDave Chinner  * We prevent this by truncating away the delalloc regions on the folio. Because
43382cb1417SChristoph Hellwig  * they are delalloc, we can do this without needing a transaction. Indeed - if
43482cb1417SChristoph Hellwig  * we get ENOSPC errors, we have to be able to do this truncation without a
4358ac5b996SDave Chinner  * transaction as there is no space left for block reservation (typically why
4368ac5b996SDave Chinner  * we see a ENOSPC in writeback).
437c59d87c4SChristoph Hellwig  */
438598ecfbaSChristoph Hellwig static void
xfs_discard_folio(struct folio * folio,loff_t pos)4396e478521SMatthew Wilcox (Oracle) xfs_discard_folio(
4406e478521SMatthew Wilcox (Oracle) 	struct folio		*folio,
4416e478521SMatthew Wilcox (Oracle) 	loff_t			pos)
442c59d87c4SChristoph Hellwig {
4437348b322SDave Chinner 	struct xfs_inode	*ip = XFS_I(folio->mapping->host);
44403625721SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
44503625721SChristoph Hellwig 	int			error;
446c59d87c4SChristoph Hellwig 
44775c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
448e9c3a8e8SDarrick J. Wong 		return;
449c59d87c4SChristoph Hellwig 
4504ab45e25SChristoph Hellwig 	xfs_alert_ratelimited(mp,
4516e478521SMatthew Wilcox (Oracle) 		"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
4526e478521SMatthew Wilcox (Oracle) 			folio, ip->i_ino, pos);
453c59d87c4SChristoph Hellwig 
4548ac5b996SDave Chinner 	/*
455c1950a11SZizhen Pang 	 * The end of the punch range is always the offset of the first
4568ac5b996SDave Chinner 	 * byte of the next folio. Hence the end offset is only dependent on the
4578ac5b996SDave Chinner 	 * folio itself and not the start offset that is passed in.
4588ac5b996SDave Chinner 	 */
4597348b322SDave Chinner 	error = xfs_bmap_punch_delalloc_range(ip, pos,
4608ac5b996SDave Chinner 				folio_pos(folio) + folio_size(folio));
4617348b322SDave Chinner 
46275c8c50fSDave Chinner 	if (error && !xfs_is_shutdown(mp))
46303625721SChristoph Hellwig 		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
464c59d87c4SChristoph Hellwig }
465c59d87c4SChristoph Hellwig 
466598ecfbaSChristoph Hellwig static const struct iomap_writeback_ops xfs_writeback_ops = {
467598ecfbaSChristoph Hellwig 	.map_blocks		= xfs_map_blocks,
468598ecfbaSChristoph Hellwig 	.prepare_ioend		= xfs_prepare_ioend,
4696e478521SMatthew Wilcox (Oracle) 	.discard_folio		= xfs_discard_folio,
470598ecfbaSChristoph Hellwig };
471c59d87c4SChristoph Hellwig 
472c59d87c4SChristoph Hellwig STATIC int
xfs_vm_writepages(struct address_space * mapping,struct writeback_control * wbc)473c59d87c4SChristoph Hellwig xfs_vm_writepages(
474c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
475c59d87c4SChristoph Hellwig 	struct writeback_control *wbc)
476c59d87c4SChristoph Hellwig {
477be225fecSChristoph Hellwig 	struct xfs_writepage_ctx wpc = { };
478fbcc0256SDave Chinner 
479c59d87c4SChristoph Hellwig 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
480598ecfbaSChristoph Hellwig 	return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
481c59d87c4SChristoph Hellwig }
482c59d87c4SChristoph Hellwig 
4836e2608dfSDan Williams STATIC int
xfs_dax_writepages(struct address_space * mapping,struct writeback_control * wbc)4846e2608dfSDan Williams xfs_dax_writepages(
4856e2608dfSDan Williams 	struct address_space	*mapping,
4866e2608dfSDan Williams 	struct writeback_control *wbc)
4876e2608dfSDan Williams {
48830fa529eSChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(mapping->host);
48930fa529eSChristoph Hellwig 
49030fa529eSChristoph Hellwig 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
4916e2608dfSDan Williams 	return dax_writeback_mapping_range(mapping,
4923f666c56SVivek Goyal 			xfs_inode_buftarg(ip)->bt_daxdev, wbc);
4936e2608dfSDan Williams }
4946e2608dfSDan Williams 
495c59d87c4SChristoph Hellwig STATIC sector_t
xfs_vm_bmap(struct address_space * mapping,sector_t block)496c59d87c4SChristoph Hellwig xfs_vm_bmap(
497c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
498c59d87c4SChristoph Hellwig 	sector_t		block)
499c59d87c4SChristoph Hellwig {
500b84e7722SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(mapping->host);
501c59d87c4SChristoph Hellwig 
502b84e7722SChristoph Hellwig 	trace_xfs_vm_bmap(ip);
503db1327b1SDarrick J. Wong 
504db1327b1SDarrick J. Wong 	/*
505db1327b1SDarrick J. Wong 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
506793057e1SIngo Molnar 	 * bypasses the file system for actual I/O.  We really can't allow
507db1327b1SDarrick J. Wong 	 * that on reflinks inodes, so we have to skip out here.  And yes,
508eb5e248dSDarrick J. Wong 	 * 0 is the magic code for a bmap error.
509eb5e248dSDarrick J. Wong 	 *
510eb5e248dSDarrick J. Wong 	 * Since we don't pass back blockdev info, we can't return bmap
511eb5e248dSDarrick J. Wong 	 * information for rt files either.
512db1327b1SDarrick J. Wong 	 */
51366ae56a5SChristoph Hellwig 	if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
514db1327b1SDarrick J. Wong 		return 0;
515690c2a38SChristoph Hellwig 	return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
516c59d87c4SChristoph Hellwig }
517c59d87c4SChristoph Hellwig 
518c59d87c4SChristoph Hellwig STATIC int
xfs_vm_read_folio(struct file * unused,struct folio * folio)5197479c505SMatthew Wilcox (Oracle) xfs_vm_read_folio(
520c59d87c4SChristoph Hellwig 	struct file		*unused,
5217479c505SMatthew Wilcox (Oracle) 	struct folio		*folio)
522c59d87c4SChristoph Hellwig {
5237479c505SMatthew Wilcox (Oracle) 	return iomap_read_folio(folio, &xfs_read_iomap_ops);
524c59d87c4SChristoph Hellwig }
525c59d87c4SChristoph Hellwig 
5269d24a13aSMatthew Wilcox (Oracle) STATIC void
xfs_vm_readahead(struct readahead_control * rac)5279d24a13aSMatthew Wilcox (Oracle) xfs_vm_readahead(
5289d24a13aSMatthew Wilcox (Oracle) 	struct readahead_control	*rac)
529c59d87c4SChristoph Hellwig {
5309d24a13aSMatthew Wilcox (Oracle) 	iomap_readahead(rac, &xfs_read_iomap_ops);
53122e757a4SDave Chinner }
53222e757a4SDave Chinner 
53367482129SDarrick J. Wong static int
xfs_iomap_swapfile_activate(struct swap_info_struct * sis,struct file * swap_file,sector_t * span)53467482129SDarrick J. Wong xfs_iomap_swapfile_activate(
53567482129SDarrick J. Wong 	struct swap_info_struct		*sis,
53667482129SDarrick J. Wong 	struct file			*swap_file,
53767482129SDarrick J. Wong 	sector_t			*span)
53867482129SDarrick J. Wong {
53930fa529eSChristoph Hellwig 	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
540690c2a38SChristoph Hellwig 	return iomap_swapfile_activate(sis, swap_file, span,
541690c2a38SChristoph Hellwig 			&xfs_read_iomap_ops);
54267482129SDarrick J. Wong }
54367482129SDarrick J. Wong 
544c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = {
5457479c505SMatthew Wilcox (Oracle) 	.read_folio		= xfs_vm_read_folio,
5469d24a13aSMatthew Wilcox (Oracle) 	.readahead		= xfs_vm_readahead,
547c59d87c4SChristoph Hellwig 	.writepages		= xfs_vm_writepages,
5484ce02c67SRitesh Harjani (IBM) 	.dirty_folio		= iomap_dirty_folio,
5498597447dSMatthew Wilcox (Oracle) 	.release_folio		= iomap_release_folio,
550d82354f6SMatthew Wilcox (Oracle) 	.invalidate_folio	= iomap_invalidate_folio,
551c59d87c4SChristoph Hellwig 	.bmap			= xfs_vm_bmap,
5522ec810d5SMatthew Wilcox (Oracle) 	.migrate_folio		= filemap_migrate_folio,
55382cb1417SChristoph Hellwig 	.is_partially_uptodate  = iomap_is_partially_uptodate,
554c59d87c4SChristoph Hellwig 	.error_remove_page	= generic_error_remove_page,
55567482129SDarrick J. Wong 	.swap_activate		= xfs_iomap_swapfile_activate,
556c59d87c4SChristoph Hellwig };
5576e2608dfSDan Williams 
5586e2608dfSDan Williams const struct address_space_operations xfs_dax_aops = {
5596e2608dfSDan Williams 	.writepages		= xfs_dax_writepages,
56046de8b97SMatthew Wilcox (Oracle) 	.dirty_folio		= noop_dirty_folio,
56167482129SDarrick J. Wong 	.swap_activate		= xfs_iomap_swapfile_activate,
5626e2608dfSDan Williams };
563