10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
268988114SDave Chinner /*
368988114SDave Chinner * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4c24b5dfaSDave Chinner * Copyright (c) 2012 Red Hat, Inc.
568988114SDave Chinner * All Rights Reserved.
668988114SDave Chinner */
768988114SDave Chinner #include "xfs.h"
868988114SDave Chinner #include "xfs_fs.h"
970a9883cSDave Chinner #include "xfs_shared.h"
10239880efSDave Chinner #include "xfs_format.h"
11239880efSDave Chinner #include "xfs_log_format.h"
12239880efSDave Chinner #include "xfs_trans_resv.h"
1368988114SDave Chinner #include "xfs_bit.h"
1468988114SDave Chinner #include "xfs_mount.h"
153ab78df2SDarrick J. Wong #include "xfs_defer.h"
1668988114SDave Chinner #include "xfs_inode.h"
1768988114SDave Chinner #include "xfs_btree.h"
18239880efSDave Chinner #include "xfs_trans.h"
1968988114SDave Chinner #include "xfs_alloc.h"
2068988114SDave Chinner #include "xfs_bmap.h"
2168988114SDave Chinner #include "xfs_bmap_util.h"
22a4fbe6abSDave Chinner #include "xfs_bmap_btree.h"
2368988114SDave Chinner #include "xfs_rtalloc.h"
2468988114SDave Chinner #include "xfs_error.h"
2568988114SDave Chinner #include "xfs_quota.h"
2668988114SDave Chinner #include "xfs_trans_space.h"
2768988114SDave Chinner #include "xfs_trace.h"
28c24b5dfaSDave Chinner #include "xfs_icache.h"
29f86f4037SDarrick J. Wong #include "xfs_iomap.h"
30f86f4037SDarrick J. Wong #include "xfs_reflink.h"
3168988114SDave Chinner
3268988114SDave Chinner /* Kernel only BMAP related definitions and functions */
3368988114SDave Chinner
3468988114SDave Chinner /*
3568988114SDave Chinner * Convert the given file system block to a disk block. We have to treat it
3668988114SDave Chinner * differently based on whether the file is a real time file or not, because the
3768988114SDave Chinner * bmap code does.
3868988114SDave Chinner */
3968988114SDave Chinner xfs_daddr_t
xfs_fsb_to_db(struct xfs_inode * ip,xfs_fsblock_t fsb)4068988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
4168988114SDave Chinner {
42ecfc28a4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip))
43ecfc28a4SChristoph Hellwig return XFS_FSB_TO_BB(ip->i_mount, fsb);
44ecfc28a4SChristoph Hellwig return XFS_FSB_TO_DADDR(ip->i_mount, fsb);
4568988114SDave Chinner }
4668988114SDave Chinner
4768988114SDave Chinner /*
483fbbbea3SDave Chinner * Routine to zero an extent on disk allocated to the specific inode.
493fbbbea3SDave Chinner *
503fbbbea3SDave Chinner * The VFS functions take a linearised filesystem block offset, so we have to
513fbbbea3SDave Chinner * convert the sparse xfs fsb to the right format first.
523fbbbea3SDave Chinner * VFS types are real funky, too.
533fbbbea3SDave Chinner */
543fbbbea3SDave Chinner int
xfs_zero_extent(struct xfs_inode * ip,xfs_fsblock_t start_fsb,xfs_off_t count_fsb)553fbbbea3SDave Chinner xfs_zero_extent(
563fbbbea3SDave Chinner struct xfs_inode *ip,
573fbbbea3SDave Chinner xfs_fsblock_t start_fsb,
583fbbbea3SDave Chinner xfs_off_t count_fsb)
593fbbbea3SDave Chinner {
603fbbbea3SDave Chinner struct xfs_mount *mp = ip->i_mount;
6130fa529eSChristoph Hellwig struct xfs_buftarg *target = xfs_inode_buftarg(ip);
623fbbbea3SDave Chinner xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
633fbbbea3SDave Chinner sector_t block = XFS_BB_TO_FSBT(mp, sector);
643fbbbea3SDave Chinner
6530fa529eSChristoph Hellwig return blkdev_issue_zeroout(target->bt_bdev,
663dc29161SMatthew Wilcox block << (mp->m_super->s_blocksize_bits - 9),
673dc29161SMatthew Wilcox count_fsb << (mp->m_super->s_blocksize_bits - 9),
68ee472d83SChristoph Hellwig GFP_NOFS, 0);
693fbbbea3SDave Chinner }
703fbbbea3SDave Chinner
71bb9c2e54SDave Chinner #ifdef CONFIG_XFS_RT
7268988114SDave Chinner int
xfs_bmap_rtalloc(struct xfs_bmalloca * ap)7368988114SDave Chinner xfs_bmap_rtalloc(
749d5e8492SDarrick J. Wong struct xfs_bmalloca *ap)
7568988114SDave Chinner {
769d5e8492SDarrick J. Wong struct xfs_mount *mp = ap->ip->i_mount;
779d5e8492SDarrick J. Wong xfs_fileoff_t orig_offset = ap->offset;
789d5e8492SDarrick J. Wong xfs_rtblock_t rtb;
7968988114SDave Chinner xfs_extlen_t prod = 0; /* product factor for allocators */
800703a8e1SDave Chinner xfs_extlen_t mod = 0; /* product factor for allocators */
8168988114SDave Chinner xfs_extlen_t ralen = 0; /* realtime allocation length */
8268988114SDave Chinner xfs_extlen_t align; /* minimum allocation alignment */
839d5e8492SDarrick J. Wong xfs_extlen_t orig_length = ap->length;
849d5e8492SDarrick J. Wong xfs_extlen_t minlen = mp->m_sb.sb_rextsize;
859d5e8492SDarrick J. Wong xfs_extlen_t raminlen;
869d5e8492SDarrick J. Wong bool rtlocked = false;
87676a659bSDarrick J. Wong bool ignore_locality = false;
889d5e8492SDarrick J. Wong int error;
8968988114SDave Chinner
9068988114SDave Chinner align = xfs_get_extsz_hint(ap->ip);
919d5e8492SDarrick J. Wong retry:
9268988114SDave Chinner prod = align / mp->m_sb.sb_rextsize;
9368988114SDave Chinner error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
9468988114SDave Chinner align, 1, ap->eof, 0,
9568988114SDave Chinner ap->conv, &ap->offset, &ap->length);
9668988114SDave Chinner if (error)
9768988114SDave Chinner return error;
9868988114SDave Chinner ASSERT(ap->length);
9968988114SDave Chinner ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
10068988114SDave Chinner
10168988114SDave Chinner /*
1029d5e8492SDarrick J. Wong * If we shifted the file offset downward to satisfy an extent size
1039d5e8492SDarrick J. Wong * hint, increase minlen by that amount so that the allocator won't
1049d5e8492SDarrick J. Wong * give us an allocation that's too short to cover at least one of the
1059d5e8492SDarrick J. Wong * blocks that the caller asked for.
1069d5e8492SDarrick J. Wong */
1079d5e8492SDarrick J. Wong if (ap->offset != orig_offset)
1089d5e8492SDarrick J. Wong minlen += orig_offset - ap->offset;
1099d5e8492SDarrick J. Wong
1109d5e8492SDarrick J. Wong /*
11168988114SDave Chinner * If the offset & length are not perfectly aligned
11268988114SDave Chinner * then kill prod, it will just get us in trouble.
11368988114SDave Chinner */
1140703a8e1SDave Chinner div_u64_rem(ap->offset, align, &mod);
1150703a8e1SDave Chinner if (mod || ap->length % align)
11668988114SDave Chinner prod = 1;
11768988114SDave Chinner /*
11868988114SDave Chinner * Set ralen to be the actual requested length in rtextents.
11968988114SDave Chinner */
12068988114SDave Chinner ralen = ap->length / mp->m_sb.sb_rextsize;
12168988114SDave Chinner /*
12295f0b95eSChandan Babu R * If the old value was close enough to XFS_BMBT_MAX_EXTLEN that
12368988114SDave Chinner * we rounded up to it, cut it back so it's valid again.
12468988114SDave Chinner * Note that if it's a really large request (bigger than
12595f0b95eSChandan Babu R * XFS_BMBT_MAX_EXTLEN), we don't hear about that number, and can't
12668988114SDave Chinner * adjust the starting point to match it.
12768988114SDave Chinner */
12895f0b95eSChandan Babu R if (ralen * mp->m_sb.sb_rextsize >= XFS_MAX_BMBT_EXTLEN)
12995f0b95eSChandan Babu R ralen = XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize;
13068988114SDave Chinner
13168988114SDave Chinner /*
1324b680afbSDave Chinner * Lock out modifications to both the RT bitmap and summary inodes
13368988114SDave Chinner */
1349d5e8492SDarrick J. Wong if (!rtlocked) {
135f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
13668988114SDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
137f4a0660dSDarrick J. Wong xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
1384b680afbSDave Chinner xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
1399d5e8492SDarrick J. Wong rtlocked = true;
1409d5e8492SDarrick J. Wong }
14168988114SDave Chinner
14268988114SDave Chinner /*
14368988114SDave Chinner * If it's an allocation to an empty file at offset 0,
14468988114SDave Chinner * pick an extent that will space things out in the rt area.
14568988114SDave Chinner */
14668988114SDave Chinner if (ap->eof && ap->offset == 0) {
1473f649ab7SKees Cook xfs_rtblock_t rtx; /* realtime extent no */
14868988114SDave Chinner
14968988114SDave Chinner error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
15068988114SDave Chinner if (error)
15168988114SDave Chinner return error;
15268988114SDave Chinner ap->blkno = rtx * mp->m_sb.sb_rextsize;
15368988114SDave Chinner } else {
15468988114SDave Chinner ap->blkno = 0;
15568988114SDave Chinner }
15668988114SDave Chinner
15768988114SDave Chinner xfs_bmap_adjacent(ap);
15868988114SDave Chinner
15968988114SDave Chinner /*
16068988114SDave Chinner * Realtime allocation, done through xfs_rtallocate_extent.
16168988114SDave Chinner */
162676a659bSDarrick J. Wong if (ignore_locality)
163676a659bSDarrick J. Wong ap->blkno = 0;
164676a659bSDarrick J. Wong else
16568988114SDave Chinner do_div(ap->blkno, mp->m_sb.sb_rextsize);
16668988114SDave Chinner rtb = ap->blkno;
16768988114SDave Chinner ap->length = ralen;
1689d5e8492SDarrick J. Wong raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
1699d5e8492SDarrick J. Wong error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length,
170089ec2f8SChristoph Hellwig &ralen, ap->wasdel, prod, &rtb);
171089ec2f8SChristoph Hellwig if (error)
17268988114SDave Chinner return error;
173089ec2f8SChristoph Hellwig
1749d5e8492SDarrick J. Wong if (rtb != NULLRTBLOCK) {
1759d5e8492SDarrick J. Wong ap->blkno = rtb * mp->m_sb.sb_rextsize;
1769d5e8492SDarrick J. Wong ap->length = ralen * mp->m_sb.sb_rextsize;
1779d5e8492SDarrick J. Wong ap->ip->i_nblocks += ap->length;
17868988114SDave Chinner xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
17968988114SDave Chinner if (ap->wasdel)
1809d5e8492SDarrick J. Wong ap->ip->i_delayed_blks -= ap->length;
18168988114SDave Chinner /*
18268988114SDave Chinner * Adjust the disk quota also. This was reserved
18368988114SDave Chinner * earlier.
18468988114SDave Chinner */
18568988114SDave Chinner xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
18668988114SDave Chinner ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
1879d5e8492SDarrick J. Wong XFS_TRANS_DQ_RTBCOUNT, ap->length);
1889d5e8492SDarrick J. Wong return 0;
18968988114SDave Chinner }
1909d5e8492SDarrick J. Wong
1919d5e8492SDarrick J. Wong if (align > mp->m_sb.sb_rextsize) {
1929d5e8492SDarrick J. Wong /*
1939d5e8492SDarrick J. Wong * We previously enlarged the request length to try to satisfy
1949d5e8492SDarrick J. Wong * an extent size hint. The allocator didn't return anything,
1959d5e8492SDarrick J. Wong * so reset the parameters to the original values and try again
1969d5e8492SDarrick J. Wong * without alignment criteria.
1979d5e8492SDarrick J. Wong */
1989d5e8492SDarrick J. Wong ap->offset = orig_offset;
1999d5e8492SDarrick J. Wong ap->length = orig_length;
2009d5e8492SDarrick J. Wong minlen = align = mp->m_sb.sb_rextsize;
2019d5e8492SDarrick J. Wong goto retry;
2029d5e8492SDarrick J. Wong }
2039d5e8492SDarrick J. Wong
204676a659bSDarrick J. Wong if (!ignore_locality && ap->blkno != 0) {
205676a659bSDarrick J. Wong /*
206676a659bSDarrick J. Wong * If we can't allocate near a specific rt extent, try again
207676a659bSDarrick J. Wong * without locality criteria.
208676a659bSDarrick J. Wong */
209676a659bSDarrick J. Wong ignore_locality = true;
210676a659bSDarrick J. Wong goto retry;
211676a659bSDarrick J. Wong }
212676a659bSDarrick J. Wong
2139d5e8492SDarrick J. Wong ap->blkno = NULLFSBLOCK;
2149d5e8492SDarrick J. Wong ap->length = 0;
21568988114SDave Chinner return 0;
21668988114SDave Chinner }
217bb9c2e54SDave Chinner #endif /* CONFIG_XFS_RT */
21868988114SDave Chinner
21968988114SDave Chinner /*
22068988114SDave Chinner * Extent tree block counting routines.
22168988114SDave Chinner */
22268988114SDave Chinner
22368988114SDave Chinner /*
224d29cb3e4SDarrick J. Wong * Count leaf blocks given a range of extent records. Delayed allocation
225d29cb3e4SDarrick J. Wong * extents are not counted towards the totals.
22668988114SDave Chinner */
227e17a5c6fSChristoph Hellwig xfs_extnum_t
xfs_bmap_count_leaves(struct xfs_ifork * ifp,xfs_filblks_t * count)22868988114SDave Chinner xfs_bmap_count_leaves(
229d29cb3e4SDarrick J. Wong struct xfs_ifork *ifp,
230e7f5d5caSDarrick J. Wong xfs_filblks_t *count)
23168988114SDave Chinner {
232b2b1712aSChristoph Hellwig struct xfs_iext_cursor icur;
233e17a5c6fSChristoph Hellwig struct xfs_bmbt_irec got;
234b2b1712aSChristoph Hellwig xfs_extnum_t numrecs = 0;
23568988114SDave Chinner
236b2b1712aSChristoph Hellwig for_each_xfs_iext(ifp, &icur, &got) {
237e17a5c6fSChristoph Hellwig if (!isnullstartblock(got.br_startblock)) {
238e17a5c6fSChristoph Hellwig *count += got.br_blockcount;
239e17a5c6fSChristoph Hellwig numrecs++;
24068988114SDave Chinner }
24168988114SDave Chinner }
242b2b1712aSChristoph Hellwig
243e17a5c6fSChristoph Hellwig return numrecs;
244d29cb3e4SDarrick J. Wong }
24568988114SDave Chinner
24668988114SDave Chinner /*
247d29cb3e4SDarrick J. Wong * Count fsblocks of the given fork. Delayed allocation extents are
248d29cb3e4SDarrick J. Wong * not counted towards the totals.
24968988114SDave Chinner */
250e7f5d5caSDarrick J. Wong int
xfs_bmap_count_blocks(struct xfs_trans * tp,struct xfs_inode * ip,int whichfork,xfs_extnum_t * nextents,xfs_filblks_t * count)25168988114SDave Chinner xfs_bmap_count_blocks(
252e7f5d5caSDarrick J. Wong struct xfs_trans *tp,
253e7f5d5caSDarrick J. Wong struct xfs_inode *ip,
254e7f5d5caSDarrick J. Wong int whichfork,
255e7f5d5caSDarrick J. Wong xfs_extnum_t *nextents,
256e7f5d5caSDarrick J. Wong xfs_filblks_t *count)
25768988114SDave Chinner {
258fec40e22SDarrick J. Wong struct xfs_mount *mp = ip->i_mount;
259732436efSDarrick J. Wong struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
260fec40e22SDarrick J. Wong struct xfs_btree_cur *cur;
261fec40e22SDarrick J. Wong xfs_extlen_t btblocks = 0;
262e7f5d5caSDarrick J. Wong int error;
26368988114SDave Chinner
264e7f5d5caSDarrick J. Wong *nextents = 0;
265e7f5d5caSDarrick J. Wong *count = 0;
266fec40e22SDarrick J. Wong
267e7f5d5caSDarrick J. Wong if (!ifp)
26868988114SDave Chinner return 0;
269e7f5d5caSDarrick J. Wong
270f7e67b20SChristoph Hellwig switch (ifp->if_format) {
271e7f5d5caSDarrick J. Wong case XFS_DINODE_FMT_BTREE:
272e7f5d5caSDarrick J. Wong error = xfs_iread_extents(tp, ip, whichfork);
273e7f5d5caSDarrick J. Wong if (error)
274e7f5d5caSDarrick J. Wong return error;
27568988114SDave Chinner
276fec40e22SDarrick J. Wong cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
277fec40e22SDarrick J. Wong error = xfs_btree_count_blocks(cur, &btblocks);
278fec40e22SDarrick J. Wong xfs_btree_del_cursor(cur, error);
279fec40e22SDarrick J. Wong if (error)
280fec40e22SDarrick J. Wong return error;
28168988114SDave Chinner
282fec40e22SDarrick J. Wong /*
283fec40e22SDarrick J. Wong * xfs_btree_count_blocks includes the root block contained in
284fec40e22SDarrick J. Wong * the inode fork in @btblocks, so subtract one because we're
285fec40e22SDarrick J. Wong * only interested in allocated disk blocks.
286fec40e22SDarrick J. Wong */
287fec40e22SDarrick J. Wong *count += btblocks - 1;
288fec40e22SDarrick J. Wong
28953004ee7SGustavo A. R. Silva fallthrough;
290fec40e22SDarrick J. Wong case XFS_DINODE_FMT_EXTENTS:
291fec40e22SDarrick J. Wong *nextents = xfs_bmap_count_leaves(ifp, count);
292fec40e22SDarrick J. Wong break;
293e7f5d5caSDarrick J. Wong }
29468988114SDave Chinner
29568988114SDave Chinner return 0;
29668988114SDave Chinner }
29768988114SDave Chinner
298abbf9e8aSChristoph Hellwig static int
xfs_getbmap_report_one(struct xfs_inode * ip,struct getbmapx * bmv,struct kgetbmap * out,int64_t bmv_end,struct xfs_bmbt_irec * got)299abbf9e8aSChristoph Hellwig xfs_getbmap_report_one(
300f86f4037SDarrick J. Wong struct xfs_inode *ip,
301abbf9e8aSChristoph Hellwig struct getbmapx *bmv,
302232b5194SChristoph Hellwig struct kgetbmap *out,
303abbf9e8aSChristoph Hellwig int64_t bmv_end,
304abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec *got)
305f86f4037SDarrick J. Wong {
306232b5194SChristoph Hellwig struct kgetbmap *p = out + bmv->bmv_entries;
307d392bc81SChristoph Hellwig bool shared = false;
308f86f4037SDarrick J. Wong int error;
309f86f4037SDarrick J. Wong
310d392bc81SChristoph Hellwig error = xfs_reflink_trim_around_shared(ip, got, &shared);
311f86f4037SDarrick J. Wong if (error)
312f86f4037SDarrick J. Wong return error;
313f86f4037SDarrick J. Wong
314abbf9e8aSChristoph Hellwig if (isnullstartblock(got->br_startblock) ||
315abbf9e8aSChristoph Hellwig got->br_startblock == DELAYSTARTBLOCK) {
316f86f4037SDarrick J. Wong /*
3178ee81ed5SYe Bin * Take the flush completion as being a point-in-time snapshot
3188ee81ed5SYe Bin * where there are no delalloc extents, and if any new ones
3198ee81ed5SYe Bin * have been created racily, just skip them as being 'after'
3208ee81ed5SYe Bin * the flush and so don't get reported.
321f86f4037SDarrick J. Wong */
3228ee81ed5SYe Bin if (!(bmv->bmv_iflags & BMV_IF_DELALLOC))
3238ee81ed5SYe Bin return 0;
324abbf9e8aSChristoph Hellwig
325abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_DELALLOC;
326abbf9e8aSChristoph Hellwig p->bmv_block = -2;
327f86f4037SDarrick J. Wong } else {
328abbf9e8aSChristoph Hellwig p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
329f86f4037SDarrick J. Wong }
330f86f4037SDarrick J. Wong
331abbf9e8aSChristoph Hellwig if (got->br_state == XFS_EXT_UNWRITTEN &&
332abbf9e8aSChristoph Hellwig (bmv->bmv_iflags & BMV_IF_PREALLOC))
333abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_PREALLOC;
334abbf9e8aSChristoph Hellwig
335abbf9e8aSChristoph Hellwig if (shared)
336abbf9e8aSChristoph Hellwig p->bmv_oflags |= BMV_OF_SHARED;
337abbf9e8aSChristoph Hellwig
338abbf9e8aSChristoph Hellwig p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
339abbf9e8aSChristoph Hellwig p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
340abbf9e8aSChristoph Hellwig
341abbf9e8aSChristoph Hellwig bmv->bmv_offset = p->bmv_offset + p->bmv_length;
342abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
343abbf9e8aSChristoph Hellwig bmv->bmv_entries++;
344f86f4037SDarrick J. Wong return 0;
345f86f4037SDarrick J. Wong }
346f86f4037SDarrick J. Wong
347abbf9e8aSChristoph Hellwig static void
xfs_getbmap_report_hole(struct xfs_inode * ip,struct getbmapx * bmv,struct kgetbmap * out,int64_t bmv_end,xfs_fileoff_t bno,xfs_fileoff_t end)348abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(
349abbf9e8aSChristoph Hellwig struct xfs_inode *ip,
350abbf9e8aSChristoph Hellwig struct getbmapx *bmv,
351232b5194SChristoph Hellwig struct kgetbmap *out,
352abbf9e8aSChristoph Hellwig int64_t bmv_end,
353abbf9e8aSChristoph Hellwig xfs_fileoff_t bno,
354abbf9e8aSChristoph Hellwig xfs_fileoff_t end)
355abbf9e8aSChristoph Hellwig {
356232b5194SChristoph Hellwig struct kgetbmap *p = out + bmv->bmv_entries;
357abbf9e8aSChristoph Hellwig
358abbf9e8aSChristoph Hellwig if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
359abbf9e8aSChristoph Hellwig return;
360abbf9e8aSChristoph Hellwig
361abbf9e8aSChristoph Hellwig p->bmv_block = -1;
362abbf9e8aSChristoph Hellwig p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
363abbf9e8aSChristoph Hellwig p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
364abbf9e8aSChristoph Hellwig
365abbf9e8aSChristoph Hellwig bmv->bmv_offset = p->bmv_offset + p->bmv_length;
366abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
367abbf9e8aSChristoph Hellwig bmv->bmv_entries++;
368abbf9e8aSChristoph Hellwig }
369abbf9e8aSChristoph Hellwig
370abbf9e8aSChristoph Hellwig static inline bool
xfs_getbmap_full(struct getbmapx * bmv)371abbf9e8aSChristoph Hellwig xfs_getbmap_full(
372abbf9e8aSChristoph Hellwig struct getbmapx *bmv)
373abbf9e8aSChristoph Hellwig {
374abbf9e8aSChristoph Hellwig return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
375abbf9e8aSChristoph Hellwig }
376abbf9e8aSChristoph Hellwig
377abbf9e8aSChristoph Hellwig static bool
xfs_getbmap_next_rec(struct xfs_bmbt_irec * rec,xfs_fileoff_t total_end)378abbf9e8aSChristoph Hellwig xfs_getbmap_next_rec(
379abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec *rec,
380abbf9e8aSChristoph Hellwig xfs_fileoff_t total_end)
381abbf9e8aSChristoph Hellwig {
382abbf9e8aSChristoph Hellwig xfs_fileoff_t end = rec->br_startoff + rec->br_blockcount;
383abbf9e8aSChristoph Hellwig
384abbf9e8aSChristoph Hellwig if (end == total_end)
385abbf9e8aSChristoph Hellwig return false;
386abbf9e8aSChristoph Hellwig
387abbf9e8aSChristoph Hellwig rec->br_startoff += rec->br_blockcount;
388abbf9e8aSChristoph Hellwig if (!isnullstartblock(rec->br_startblock) &&
389abbf9e8aSChristoph Hellwig rec->br_startblock != DELAYSTARTBLOCK)
390abbf9e8aSChristoph Hellwig rec->br_startblock += rec->br_blockcount;
391abbf9e8aSChristoph Hellwig rec->br_blockcount = total_end - end;
392abbf9e8aSChristoph Hellwig return true;
393abbf9e8aSChristoph Hellwig }
394abbf9e8aSChristoph Hellwig
39568988114SDave Chinner /*
39668988114SDave Chinner * Get inode's extents as described in bmv, and format for output.
39768988114SDave Chinner * Calls formatter to fill the user's buffer until all extents
39868988114SDave Chinner * are mapped, until the passed-in bmv->bmv_count slots have
39968988114SDave Chinner * been filled, or until the formatter short-circuits the loop,
40068988114SDave Chinner * if it is tracking filled-in extents on its own.
40168988114SDave Chinner */
40268988114SDave Chinner int /* error code */
xfs_getbmap(struct xfs_inode * ip,struct getbmapx * bmv,struct kgetbmap * out)40368988114SDave Chinner xfs_getbmap(
404232b5194SChristoph Hellwig struct xfs_inode *ip,
40568988114SDave Chinner struct getbmapx *bmv, /* user bmap structure */
406232b5194SChristoph Hellwig struct kgetbmap *out)
40768988114SDave Chinner {
408abbf9e8aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount;
409abbf9e8aSChristoph Hellwig int iflags = bmv->bmv_iflags;
410232b5194SChristoph Hellwig int whichfork, lock, error = 0;
411abbf9e8aSChristoph Hellwig int64_t bmv_end, max_len;
412abbf9e8aSChristoph Hellwig xfs_fileoff_t bno, first_bno;
413abbf9e8aSChristoph Hellwig struct xfs_ifork *ifp;
414abbf9e8aSChristoph Hellwig struct xfs_bmbt_irec got, rec;
415abbf9e8aSChristoph Hellwig xfs_filblks_t len;
416b2b1712aSChristoph Hellwig struct xfs_iext_cursor icur;
41768988114SDave Chinner
418232b5194SChristoph Hellwig if (bmv->bmv_iflags & ~BMV_IF_VALID)
419232b5194SChristoph Hellwig return -EINVAL;
420f86f4037SDarrick J. Wong #ifndef DEBUG
421f86f4037SDarrick J. Wong /* Only allow CoW fork queries if we're debugging. */
422f86f4037SDarrick J. Wong if (iflags & BMV_IF_COWFORK)
423f86f4037SDarrick J. Wong return -EINVAL;
424f86f4037SDarrick J. Wong #endif
425f86f4037SDarrick J. Wong if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
426f86f4037SDarrick J. Wong return -EINVAL;
427f86f4037SDarrick J. Wong
428abbf9e8aSChristoph Hellwig if (bmv->bmv_length < -1)
429abbf9e8aSChristoph Hellwig return -EINVAL;
430abbf9e8aSChristoph Hellwig bmv->bmv_entries = 0;
431abbf9e8aSChristoph Hellwig if (bmv->bmv_length == 0)
432abbf9e8aSChristoph Hellwig return 0;
433abbf9e8aSChristoph Hellwig
434f86f4037SDarrick J. Wong if (iflags & BMV_IF_ATTRFORK)
435f86f4037SDarrick J. Wong whichfork = XFS_ATTR_FORK;
436f86f4037SDarrick J. Wong else if (iflags & BMV_IF_COWFORK)
437f86f4037SDarrick J. Wong whichfork = XFS_COW_FORK;
438f86f4037SDarrick J. Wong else
439f86f4037SDarrick J. Wong whichfork = XFS_DATA_FORK;
44068988114SDave Chinner
44168988114SDave Chinner xfs_ilock(ip, XFS_IOLOCK_SHARED);
442f86f4037SDarrick J. Wong switch (whichfork) {
443abbf9e8aSChristoph Hellwig case XFS_ATTR_FORK:
444001c179cSChenXiaoSong lock = xfs_ilock_attr_map_shared(ip);
445932b42c6SDarrick J. Wong if (!xfs_inode_has_attr_fork(ip))
446001c179cSChenXiaoSong goto out_unlock_ilock;
447abbf9e8aSChristoph Hellwig
448abbf9e8aSChristoph Hellwig max_len = 1LL << 32;
449abbf9e8aSChristoph Hellwig break;
450abbf9e8aSChristoph Hellwig case XFS_COW_FORK:
451001c179cSChenXiaoSong lock = XFS_ILOCK_SHARED;
452001c179cSChenXiaoSong xfs_ilock(ip, lock);
453001c179cSChenXiaoSong
454abbf9e8aSChristoph Hellwig /* No CoW fork? Just return */
455001c179cSChenXiaoSong if (!xfs_ifork_ptr(ip, whichfork))
456001c179cSChenXiaoSong goto out_unlock_ilock;
457abbf9e8aSChristoph Hellwig
458abbf9e8aSChristoph Hellwig if (xfs_get_cowextsz_hint(ip))
459abbf9e8aSChristoph Hellwig max_len = mp->m_super->s_maxbytes;
460abbf9e8aSChristoph Hellwig else
461abbf9e8aSChristoph Hellwig max_len = XFS_ISIZE(ip);
462abbf9e8aSChristoph Hellwig break;
463f86f4037SDarrick J. Wong case XFS_DATA_FORK:
464efa70be1SChristoph Hellwig if (!(iflags & BMV_IF_DELALLOC) &&
46513d2c10bSChristoph Hellwig (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_disk_size)) {
4662451337dSDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
46768988114SDave Chinner if (error)
46868988114SDave Chinner goto out_unlock_iolock;
469efa70be1SChristoph Hellwig
47068988114SDave Chinner /*
471efa70be1SChristoph Hellwig * Even after flushing the inode, there can still be
472efa70be1SChristoph Hellwig * delalloc blocks on the inode beyond EOF due to
473efa70be1SChristoph Hellwig * speculative preallocation. These are not removed
474efa70be1SChristoph Hellwig * until the release function is called or the inode
475efa70be1SChristoph Hellwig * is inactivated. Hence we cannot assert here that
476efa70be1SChristoph Hellwig * ip->i_delayed_blks == 0.
47768988114SDave Chinner */
47868988114SDave Chinner }
47968988114SDave Chinner
480abbf9e8aSChristoph Hellwig if (xfs_get_extsz_hint(ip) ||
481db07349dSChristoph Hellwig (ip->i_diflags &
482abbf9e8aSChristoph Hellwig (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
483abbf9e8aSChristoph Hellwig max_len = mp->m_super->s_maxbytes;
484abbf9e8aSChristoph Hellwig else
485abbf9e8aSChristoph Hellwig max_len = XFS_ISIZE(ip);
486abbf9e8aSChristoph Hellwig
487309ecac8SChristoph Hellwig lock = xfs_ilock_data_map_shared(ip);
488f86f4037SDarrick J. Wong break;
489efa70be1SChristoph Hellwig }
49068988114SDave Chinner
491001c179cSChenXiaoSong ifp = xfs_ifork_ptr(ip, whichfork);
492001c179cSChenXiaoSong
493f7e67b20SChristoph Hellwig switch (ifp->if_format) {
494abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_EXTENTS:
495abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_BTREE:
496abbf9e8aSChristoph Hellwig break;
497abbf9e8aSChristoph Hellwig case XFS_DINODE_FMT_LOCAL:
498abbf9e8aSChristoph Hellwig /* Local format inode forks report no extents. */
49968988114SDave Chinner goto out_unlock_ilock;
500abbf9e8aSChristoph Hellwig default:
501abbf9e8aSChristoph Hellwig error = -EINVAL;
502abbf9e8aSChristoph Hellwig goto out_unlock_ilock;
50368988114SDave Chinner }
50468988114SDave Chinner
505abbf9e8aSChristoph Hellwig if (bmv->bmv_length == -1) {
506abbf9e8aSChristoph Hellwig max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
507abbf9e8aSChristoph Hellwig bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
508abbf9e8aSChristoph Hellwig }
509abbf9e8aSChristoph Hellwig
510abbf9e8aSChristoph Hellwig bmv_end = bmv->bmv_offset + bmv->bmv_length;
511abbf9e8aSChristoph Hellwig
512abbf9e8aSChristoph Hellwig first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
513abbf9e8aSChristoph Hellwig len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
514abbf9e8aSChristoph Hellwig
515abbf9e8aSChristoph Hellwig error = xfs_iread_extents(NULL, ip, whichfork);
516abbf9e8aSChristoph Hellwig if (error)
517abbf9e8aSChristoph Hellwig goto out_unlock_ilock;
518abbf9e8aSChristoph Hellwig
519b2b1712aSChristoph Hellwig if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
520abbf9e8aSChristoph Hellwig /*
521abbf9e8aSChristoph Hellwig * Report a whole-file hole if the delalloc flag is set to
522abbf9e8aSChristoph Hellwig * stay compatible with the old implementation.
523abbf9e8aSChristoph Hellwig */
524abbf9e8aSChristoph Hellwig if (iflags & BMV_IF_DELALLOC)
525abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
526abbf9e8aSChristoph Hellwig XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
527abbf9e8aSChristoph Hellwig goto out_unlock_ilock;
528abbf9e8aSChristoph Hellwig }
529abbf9e8aSChristoph Hellwig
530abbf9e8aSChristoph Hellwig while (!xfs_getbmap_full(bmv)) {
531abbf9e8aSChristoph Hellwig xfs_trim_extent(&got, first_bno, len);
532abbf9e8aSChristoph Hellwig
533abbf9e8aSChristoph Hellwig /*
534abbf9e8aSChristoph Hellwig * Report an entry for a hole if this extent doesn't directly
535abbf9e8aSChristoph Hellwig * follow the previous one.
536abbf9e8aSChristoph Hellwig */
537abbf9e8aSChristoph Hellwig if (got.br_startoff > bno) {
538abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
539abbf9e8aSChristoph Hellwig got.br_startoff);
540abbf9e8aSChristoph Hellwig if (xfs_getbmap_full(bmv))
541abbf9e8aSChristoph Hellwig break;
542abbf9e8aSChristoph Hellwig }
543abbf9e8aSChristoph Hellwig
544abbf9e8aSChristoph Hellwig /*
545abbf9e8aSChristoph Hellwig * In order to report shared extents accurately, we report each
546abbf9e8aSChristoph Hellwig * distinct shared / unshared part of a single bmbt record with
547abbf9e8aSChristoph Hellwig * an individual getbmapx record.
548abbf9e8aSChristoph Hellwig */
549abbf9e8aSChristoph Hellwig bno = got.br_startoff + got.br_blockcount;
550abbf9e8aSChristoph Hellwig rec = got;
55168988114SDave Chinner do {
552abbf9e8aSChristoph Hellwig error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
553abbf9e8aSChristoph Hellwig &rec);
554abbf9e8aSChristoph Hellwig if (error || xfs_getbmap_full(bmv))
555abbf9e8aSChristoph Hellwig goto out_unlock_ilock;
556abbf9e8aSChristoph Hellwig } while (xfs_getbmap_next_rec(&rec, bno));
55768988114SDave Chinner
558b2b1712aSChristoph Hellwig if (!xfs_iext_next_extent(ifp, &icur, &got)) {
559abbf9e8aSChristoph Hellwig xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
56068988114SDave Chinner
5611bba82feSDarrick J. Wong if (bmv->bmv_entries > 0)
5621bba82feSDarrick J. Wong out[bmv->bmv_entries - 1].bmv_oflags |=
5631bba82feSDarrick J. Wong BMV_OF_LAST;
56468988114SDave Chinner
565abbf9e8aSChristoph Hellwig if (whichfork != XFS_ATTR_FORK && bno < end &&
566abbf9e8aSChristoph Hellwig !xfs_getbmap_full(bmv)) {
567abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
568abbf9e8aSChristoph Hellwig bno, end);
569abbf9e8aSChristoph Hellwig }
570abbf9e8aSChristoph Hellwig break;
57168988114SDave Chinner }
57268988114SDave Chinner
573abbf9e8aSChristoph Hellwig if (bno >= first_bno + len)
574abbf9e8aSChristoph Hellwig break;
57568988114SDave Chinner }
57668988114SDave Chinner
57768988114SDave Chinner out_unlock_ilock:
57801f4f327SChristoph Hellwig xfs_iunlock(ip, lock);
57968988114SDave Chinner out_unlock_iolock:
58068988114SDave Chinner xfs_iunlock(ip, XFS_IOLOCK_SHARED);
58168988114SDave Chinner return error;
58268988114SDave Chinner }
58368988114SDave Chinner
58468988114SDave Chinner /*
585e2ac8363SChristoph Hellwig * Dead simple method of punching delalyed allocation blocks from a range in
586e2ac8363SChristoph Hellwig * the inode. This will always punch out both the start and end blocks, even
587e2ac8363SChristoph Hellwig * if the ranges only partially overlap them, so it is up to the caller to
588e2ac8363SChristoph Hellwig * ensure that partial blocks are not passed in.
58968988114SDave Chinner */
59068988114SDave Chinner int
xfs_bmap_punch_delalloc_range(struct xfs_inode * ip,xfs_off_t start_byte,xfs_off_t end_byte)59168988114SDave Chinner xfs_bmap_punch_delalloc_range(
59268988114SDave Chinner struct xfs_inode *ip,
5937348b322SDave Chinner xfs_off_t start_byte,
5947348b322SDave Chinner xfs_off_t end_byte)
59568988114SDave Chinner {
5967348b322SDave Chinner struct xfs_mount *mp = ip->i_mount;
597e2ac8363SChristoph Hellwig struct xfs_ifork *ifp = &ip->i_df;
5987348b322SDave Chinner xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
5997348b322SDave Chinner xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
600e2ac8363SChristoph Hellwig struct xfs_bmbt_irec got, del;
601e2ac8363SChristoph Hellwig struct xfs_iext_cursor icur;
60268988114SDave Chinner int error = 0;
60368988114SDave Chinner
604b2197a36SChristoph Hellwig ASSERT(!xfs_need_iread_extents(ifp));
60568988114SDave Chinner
6060065b541SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL);
607e2ac8363SChristoph Hellwig if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
608d4380177SChristoph Hellwig goto out_unlock;
609e2ac8363SChristoph Hellwig
610e2ac8363SChristoph Hellwig while (got.br_startoff + got.br_blockcount > start_fsb) {
611e2ac8363SChristoph Hellwig del = got;
6127348b322SDave Chinner xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb);
613e2ac8363SChristoph Hellwig
614e2ac8363SChristoph Hellwig /*
615e2ac8363SChristoph Hellwig * A delete can push the cursor forward. Step back to the
616e2ac8363SChristoph Hellwig * previous extent on non-delalloc or extents outside the
617e2ac8363SChristoph Hellwig * target range.
618e2ac8363SChristoph Hellwig */
619e2ac8363SChristoph Hellwig if (!del.br_blockcount ||
620e2ac8363SChristoph Hellwig !isnullstartblock(del.br_startblock)) {
621e2ac8363SChristoph Hellwig if (!xfs_iext_prev_extent(ifp, &icur, &got))
622e2ac8363SChristoph Hellwig break;
623e2ac8363SChristoph Hellwig continue;
624e2ac8363SChristoph Hellwig }
625e2ac8363SChristoph Hellwig
626e2ac8363SChristoph Hellwig error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur,
627e2ac8363SChristoph Hellwig &got, &del);
628e2ac8363SChristoph Hellwig if (error || !xfs_iext_get_extent(ifp, &icur, &got))
629e2ac8363SChristoph Hellwig break;
630e2ac8363SChristoph Hellwig }
63168988114SDave Chinner
632d4380177SChristoph Hellwig out_unlock:
633d4380177SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL);
63468988114SDave Chinner return error;
63568988114SDave Chinner }
636c24b5dfaSDave Chinner
637c24b5dfaSDave Chinner /*
638c24b5dfaSDave Chinner * Test whether it is appropriate to check an inode for and free post EOF
6392bc2d49cSChristoph Hellwig * blocks.
640c24b5dfaSDave Chinner */
641c24b5dfaSDave Chinner bool
xfs_can_free_eofblocks(struct xfs_inode * ip)6427d88329eSDarrick J. Wong xfs_can_free_eofblocks(
6432bc2d49cSChristoph Hellwig struct xfs_inode *ip)
644c24b5dfaSDave Chinner {
6457d88329eSDarrick J. Wong struct xfs_bmbt_irec imap;
6467d88329eSDarrick J. Wong struct xfs_mount *mp = ip->i_mount;
6477d88329eSDarrick J. Wong xfs_fileoff_t end_fsb;
6487d88329eSDarrick J. Wong xfs_fileoff_t last_fsb;
6497d88329eSDarrick J. Wong int nimaps = 1;
6507d88329eSDarrick J. Wong int error;
6517d88329eSDarrick J. Wong
6527d88329eSDarrick J. Wong /*
6537d88329eSDarrick J. Wong * Caller must either hold the exclusive io lock; or be inactivating
6547d88329eSDarrick J. Wong * the inode, which guarantees there are no other users of the inode.
6557d88329eSDarrick J. Wong */
6567d88329eSDarrick J. Wong ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL) ||
6577d88329eSDarrick J. Wong (VFS_I(ip)->i_state & I_FREEING));
6587d88329eSDarrick J. Wong
659c24b5dfaSDave Chinner /* prealloc/delalloc exists only on regular files */
660c19b3b05SDave Chinner if (!S_ISREG(VFS_I(ip)->i_mode))
661c24b5dfaSDave Chinner return false;
662c24b5dfaSDave Chinner
663c24b5dfaSDave Chinner /*
664c24b5dfaSDave Chinner * Zero sized files with no cached pages and delalloc blocks will not
665c24b5dfaSDave Chinner * have speculative prealloc/delalloc blocks to remove.
666c24b5dfaSDave Chinner */
667c24b5dfaSDave Chinner if (VFS_I(ip)->i_size == 0 &&
6682667c6f9SDave Chinner VFS_I(ip)->i_mapping->nrpages == 0 &&
669c24b5dfaSDave Chinner ip->i_delayed_blks == 0)
670c24b5dfaSDave Chinner return false;
671c24b5dfaSDave Chinner
672c24b5dfaSDave Chinner /* If we haven't read in the extent list, then don't do it now. */
673b2197a36SChristoph Hellwig if (xfs_need_iread_extents(&ip->i_df))
674c24b5dfaSDave Chinner return false;
675c24b5dfaSDave Chinner
676c24b5dfaSDave Chinner /*
6772bc2d49cSChristoph Hellwig * Only free real extents for inodes with persistent preallocations or
6782bc2d49cSChristoph Hellwig * the append-only flag.
679c24b5dfaSDave Chinner */
680db07349dSChristoph Hellwig if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
6812bc2d49cSChristoph Hellwig if (ip->i_delayed_blks == 0)
682c24b5dfaSDave Chinner return false;
683c24b5dfaSDave Chinner
6847d88329eSDarrick J. Wong /*
6857d88329eSDarrick J. Wong * Do not try to free post-EOF blocks if EOF is beyond the end of the
6867d88329eSDarrick J. Wong * range supported by the page cache, because the truncation will loop
6877d88329eSDarrick J. Wong * forever.
6887d88329eSDarrick J. Wong */
6897d88329eSDarrick J. Wong end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
6908944c6fbSDarrick J. Wong if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
6918944c6fbSDarrick J. Wong end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
6927d88329eSDarrick J. Wong last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
6937d88329eSDarrick J. Wong if (last_fsb <= end_fsb)
6947d88329eSDarrick J. Wong return false;
6957d88329eSDarrick J. Wong
6967d88329eSDarrick J. Wong /*
6977d88329eSDarrick J. Wong * Look up the mapping for the first block past EOF. If we can't find
6987d88329eSDarrick J. Wong * it, there's nothing to free.
6997d88329eSDarrick J. Wong */
7007d88329eSDarrick J. Wong xfs_ilock(ip, XFS_ILOCK_SHARED);
7017d88329eSDarrick J. Wong error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps,
7027d88329eSDarrick J. Wong 0);
7037d88329eSDarrick J. Wong xfs_iunlock(ip, XFS_ILOCK_SHARED);
7047d88329eSDarrick J. Wong if (error || nimaps == 0)
7057d88329eSDarrick J. Wong return false;
7067d88329eSDarrick J. Wong
7077d88329eSDarrick J. Wong /*
7087d88329eSDarrick J. Wong * If there's a real mapping there or there are delayed allocation
7097d88329eSDarrick J. Wong * reservations, then we have post-EOF blocks to try to free.
7107d88329eSDarrick J. Wong */
7117d88329eSDarrick J. Wong return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks;
712c24b5dfaSDave Chinner }
713c24b5dfaSDave Chinner
714c24b5dfaSDave Chinner /*
7153b4683c2SBrian Foster * This is called to free any blocks beyond eof. The caller must hold
7163b4683c2SBrian Foster * IOLOCK_EXCL unless we are in the inode reclaim path and have the only
7173b4683c2SBrian Foster * reference to the inode.
718c24b5dfaSDave Chinner */
719c24b5dfaSDave Chinner int
xfs_free_eofblocks(struct xfs_inode * ip)720c24b5dfaSDave Chinner xfs_free_eofblocks(
721a36b9261SBrian Foster struct xfs_inode *ip)
722c24b5dfaSDave Chinner {
723a36b9261SBrian Foster struct xfs_trans *tp;
724a36b9261SBrian Foster struct xfs_mount *mp = ip->i_mount;
7257d88329eSDarrick J. Wong int error;
726a36b9261SBrian Foster
7277d88329eSDarrick J. Wong /* Attach the dquots to the inode up front. */
728c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip);
729c24b5dfaSDave Chinner if (error)
730c24b5dfaSDave Chinner return error;
731c24b5dfaSDave Chinner
7327d88329eSDarrick J. Wong /* Wait on dio to ensure i_size has settled. */
733e4229d6bSBrian Foster inode_dio_wait(VFS_I(ip));
734e4229d6bSBrian Foster
7352bc2d49cSChristoph Hellwig /*
7362bc2d49cSChristoph Hellwig * For preallocated files only free delayed allocations.
7372bc2d49cSChristoph Hellwig *
7382bc2d49cSChristoph Hellwig * Note that this means we also leave speculative preallocations in
7392bc2d49cSChristoph Hellwig * place for preallocated files.
7402bc2d49cSChristoph Hellwig */
7412bc2d49cSChristoph Hellwig if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
7422bc2d49cSChristoph Hellwig if (ip->i_delayed_blks) {
7432bc2d49cSChristoph Hellwig xfs_bmap_punch_delalloc_range(ip,
7442bc2d49cSChristoph Hellwig round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
7452bc2d49cSChristoph Hellwig LLONG_MAX);
7462bc2d49cSChristoph Hellwig }
7472bc2d49cSChristoph Hellwig xfs_inode_clear_eofblocks_tag(ip);
7482bc2d49cSChristoph Hellwig return 0;
7492bc2d49cSChristoph Hellwig }
7502bc2d49cSChristoph Hellwig
7517d88329eSDarrick J. Wong error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
752c24b5dfaSDave Chinner if (error) {
75375c8c50fSDave Chinner ASSERT(xfs_is_shutdown(mp));
754c24b5dfaSDave Chinner return error;
755c24b5dfaSDave Chinner }
756c24b5dfaSDave Chinner
757c24b5dfaSDave Chinner xfs_ilock(ip, XFS_ILOCK_EXCL);
758c24b5dfaSDave Chinner xfs_trans_ijoin(tp, ip, 0);
759c24b5dfaSDave Chinner
760c24b5dfaSDave Chinner /*
7617d88329eSDarrick J. Wong * Do not update the on-disk file size. If we update the on-disk file
7627d88329eSDarrick J. Wong * size and then the system crashes before the contents of the file are
7637d88329eSDarrick J. Wong * flushed to disk then the files may be full of holes (ie NULL files
7647d88329eSDarrick J. Wong * bug).
765c24b5dfaSDave Chinner */
7664e529339SBrian Foster error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK,
7674e529339SBrian Foster XFS_ISIZE(ip), XFS_BMAPI_NODISCARD);
7687d88329eSDarrick J. Wong if (error)
7697d88329eSDarrick J. Wong goto err_cancel;
7707d88329eSDarrick J. Wong
7717d88329eSDarrick J. Wong error = xfs_trans_commit(tp);
7727d88329eSDarrick J. Wong if (error)
7737d88329eSDarrick J. Wong goto out_unlock;
7747d88329eSDarrick J. Wong
7757d88329eSDarrick J. Wong xfs_inode_clear_eofblocks_tag(ip);
7767d88329eSDarrick J. Wong goto out_unlock;
7777d88329eSDarrick J. Wong
7787d88329eSDarrick J. Wong err_cancel:
779c24b5dfaSDave Chinner /*
780c24b5dfaSDave Chinner * If we get an error at this point we simply don't
781c24b5dfaSDave Chinner * bother truncating the file.
782c24b5dfaSDave Chinner */
7834906e215SChristoph Hellwig xfs_trans_cancel(tp);
7847d88329eSDarrick J. Wong out_unlock:
785c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL);
786c24b5dfaSDave Chinner return error;
787c24b5dfaSDave Chinner }
788c24b5dfaSDave Chinner
78983aee9e4SChristoph Hellwig int
xfs_alloc_file_space(struct xfs_inode * ip,xfs_off_t offset,xfs_off_t len)790c24b5dfaSDave Chinner xfs_alloc_file_space(
79183aee9e4SChristoph Hellwig struct xfs_inode *ip,
792c24b5dfaSDave Chinner xfs_off_t offset,
7934d1b97f9SDarrick J. Wong xfs_off_t len)
794c24b5dfaSDave Chinner {
795c24b5dfaSDave Chinner xfs_mount_t *mp = ip->i_mount;
796c24b5dfaSDave Chinner xfs_off_t count;
797c24b5dfaSDave Chinner xfs_filblks_t allocatesize_fsb;
798c24b5dfaSDave Chinner xfs_extlen_t extsz, temp;
799c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb;
800e093c4beSMax Reitz xfs_fileoff_t endoffset_fsb;
801c24b5dfaSDave Chinner int rt;
802c24b5dfaSDave Chinner xfs_trans_t *tp;
803c24b5dfaSDave Chinner xfs_bmbt_irec_t imaps[1], *imapp;
804c24b5dfaSDave Chinner int error;
805c24b5dfaSDave Chinner
806c24b5dfaSDave Chinner trace_xfs_alloc_file_space(ip);
807c24b5dfaSDave Chinner
80875c8c50fSDave Chinner if (xfs_is_shutdown(mp))
8092451337dSDave Chinner return -EIO;
810c24b5dfaSDave Chinner
811c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip);
812c24b5dfaSDave Chinner if (error)
813c24b5dfaSDave Chinner return error;
814c24b5dfaSDave Chinner
815c24b5dfaSDave Chinner if (len <= 0)
8162451337dSDave Chinner return -EINVAL;
817c24b5dfaSDave Chinner
818c24b5dfaSDave Chinner rt = XFS_IS_REALTIME_INODE(ip);
819c24b5dfaSDave Chinner extsz = xfs_get_extsz_hint(ip);
820c24b5dfaSDave Chinner
821c24b5dfaSDave Chinner count = len;
822c24b5dfaSDave Chinner imapp = &imaps[0];
823c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
824e093c4beSMax Reitz endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
825e093c4beSMax Reitz allocatesize_fsb = endoffset_fsb - startoffset_fsb;
826c24b5dfaSDave Chinner
827c24b5dfaSDave Chinner /*
828c24b5dfaSDave Chinner * Allocate file space until done or until there is an error
829c24b5dfaSDave Chinner */
830c24b5dfaSDave Chinner while (allocatesize_fsb && !error) {
831c24b5dfaSDave Chinner xfs_fileoff_t s, e;
8323de4eb10SDarrick J. Wong unsigned int dblocks, rblocks, resblks;
833d4eba134SChristoph Hellwig int nimaps = 1;
834c24b5dfaSDave Chinner
835c24b5dfaSDave Chinner /*
836c24b5dfaSDave Chinner * Determine space reservations for data/realtime.
837c24b5dfaSDave Chinner */
838c24b5dfaSDave Chinner if (unlikely(extsz)) {
839c24b5dfaSDave Chinner s = startoffset_fsb;
840c24b5dfaSDave Chinner do_div(s, extsz);
841c24b5dfaSDave Chinner s *= extsz;
842c24b5dfaSDave Chinner e = startoffset_fsb + allocatesize_fsb;
8430703a8e1SDave Chinner div_u64_rem(startoffset_fsb, extsz, &temp);
8440703a8e1SDave Chinner if (temp)
845c24b5dfaSDave Chinner e += temp;
8460703a8e1SDave Chinner div_u64_rem(e, extsz, &temp);
8470703a8e1SDave Chinner if (temp)
848c24b5dfaSDave Chinner e += extsz - temp;
849c24b5dfaSDave Chinner } else {
850c24b5dfaSDave Chinner s = 0;
851c24b5dfaSDave Chinner e = allocatesize_fsb;
852c24b5dfaSDave Chinner }
853c24b5dfaSDave Chinner
854c24b5dfaSDave Chinner /*
855c24b5dfaSDave Chinner * The transaction reservation is limited to a 32-bit block
856c24b5dfaSDave Chinner * count, hence we need to limit the number of blocks we are
857c24b5dfaSDave Chinner * trying to reserve to avoid an overflow. We can't allocate
858c24b5dfaSDave Chinner * more than @nimaps extents, and an extent is limited on disk
85995f0b95eSChandan Babu R * to XFS_BMBT_MAX_EXTLEN (21 bits), so use that to enforce the
86095f0b95eSChandan Babu R * limit.
861c24b5dfaSDave Chinner */
86295f0b95eSChandan Babu R resblks = min_t(xfs_fileoff_t, (e - s),
86395f0b95eSChandan Babu R (XFS_MAX_BMBT_EXTLEN * nimaps));
864c24b5dfaSDave Chinner if (unlikely(rt)) {
86502b7ee4eSDarrick J. Wong dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
86602b7ee4eSDarrick J. Wong rblocks = resblks;
867c24b5dfaSDave Chinner } else {
86802b7ee4eSDarrick J. Wong dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
86902b7ee4eSDarrick J. Wong rblocks = 0;
870c24b5dfaSDave Chinner }
871c24b5dfaSDave Chinner
8723de4eb10SDarrick J. Wong error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
8733de4eb10SDarrick J. Wong dblocks, rblocks, false, &tp);
874c24b5dfaSDave Chinner if (error)
8753de4eb10SDarrick J. Wong break;
876c24b5dfaSDave Chinner
877727e1acdSChandan Babu R error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
878727e1acdSChandan Babu R XFS_IEXT_ADD_NOSPLIT_CNT);
8794f86bb4bSChandan Babu R if (error == -EFBIG)
8804f86bb4bSChandan Babu R error = xfs_iext_count_upgrade(tp, ip,
8814f86bb4bSChandan Babu R XFS_IEXT_ADD_NOSPLIT_CNT);
882727e1acdSChandan Babu R if (error)
88335b11010SDarrick J. Wong goto error;
884727e1acdSChandan Babu R
885f43bd357SChristoph Hellwig /*
886f43bd357SChristoph Hellwig * If the allocator cannot find a single free extent large
887f43bd357SChristoph Hellwig * enough to cover the start block of the requested range,
888f43bd357SChristoph Hellwig * xfs_bmapi_write will return -ENOSR.
889f43bd357SChristoph Hellwig *
890f43bd357SChristoph Hellwig * In that case we simply need to keep looping with the same
891f43bd357SChristoph Hellwig * startoffset_fsb so that one of the following allocations
892f43bd357SChristoph Hellwig * will eventually reach the requested range.
893f43bd357SChristoph Hellwig */
894c24b5dfaSDave Chinner error = xfs_bmapi_write(tp, ip, startoffset_fsb,
8954d1b97f9SDarrick J. Wong allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
896da781e64SBrian Foster &nimaps);
897f43bd357SChristoph Hellwig if (error) {
898f43bd357SChristoph Hellwig if (error != -ENOSR)
89935b11010SDarrick J. Wong goto error;
900f43bd357SChristoph Hellwig error = 0;
901f43bd357SChristoph Hellwig } else {
902f43bd357SChristoph Hellwig startoffset_fsb += imapp->br_blockcount;
903f43bd357SChristoph Hellwig allocatesize_fsb -= imapp->br_blockcount;
904f43bd357SChristoph Hellwig }
905c24b5dfaSDave Chinner
9060b02c8c0SDave Chinner ip->i_diflags |= XFS_DIFLAG_PREALLOC;
9070b02c8c0SDave Chinner xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
9080b02c8c0SDave Chinner
90970393313SChristoph Hellwig error = xfs_trans_commit(tp);
910c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL);
911c24b5dfaSDave Chinner }
912c24b5dfaSDave Chinner
913c24b5dfaSDave Chinner return error;
914c24b5dfaSDave Chinner
91535b11010SDarrick J. Wong error:
9164906e215SChristoph Hellwig xfs_trans_cancel(tp);
917c24b5dfaSDave Chinner xfs_iunlock(ip, XFS_ILOCK_EXCL);
918c24b5dfaSDave Chinner return error;
919c24b5dfaSDave Chinner }
920c24b5dfaSDave Chinner
921bdb0d04fSChristoph Hellwig static int
xfs_unmap_extent(struct xfs_inode * ip,xfs_fileoff_t startoffset_fsb,xfs_filblks_t len_fsb,int * done)922bdb0d04fSChristoph Hellwig xfs_unmap_extent(
92383aee9e4SChristoph Hellwig struct xfs_inode *ip,
924bdb0d04fSChristoph Hellwig xfs_fileoff_t startoffset_fsb,
925bdb0d04fSChristoph Hellwig xfs_filblks_t len_fsb,
926bdb0d04fSChristoph Hellwig int *done)
927c24b5dfaSDave Chinner {
928bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount;
929bdb0d04fSChristoph Hellwig struct xfs_trans *tp;
930bdb0d04fSChristoph Hellwig uint resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
931bdb0d04fSChristoph Hellwig int error;
932c24b5dfaSDave Chinner
9333de4eb10SDarrick J. Wong error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 0,
9343a1af6c3SDarrick J. Wong false, &tp);
935bdb0d04fSChristoph Hellwig if (error)
9363a1af6c3SDarrick J. Wong return error;
937c24b5dfaSDave Chinner
93885ef08b5SChandan Babu R error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
93985ef08b5SChandan Babu R XFS_IEXT_PUNCH_HOLE_CNT);
9404f86bb4bSChandan Babu R if (error == -EFBIG)
9414f86bb4bSChandan Babu R error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT);
94285ef08b5SChandan Babu R if (error)
94385ef08b5SChandan Babu R goto out_trans_cancel;
94485ef08b5SChandan Babu R
9452af52842SBrian Foster error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done);
946bdb0d04fSChristoph Hellwig if (error)
947c8eac49eSBrian Foster goto out_trans_cancel;
948bdb0d04fSChristoph Hellwig
949bdb0d04fSChristoph Hellwig error = xfs_trans_commit(tp);
950bdb0d04fSChristoph Hellwig out_unlock:
951bdb0d04fSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL);
952bdb0d04fSChristoph Hellwig return error;
953bdb0d04fSChristoph Hellwig
954bdb0d04fSChristoph Hellwig out_trans_cancel:
955bdb0d04fSChristoph Hellwig xfs_trans_cancel(tp);
956bdb0d04fSChristoph Hellwig goto out_unlock;
957bdb0d04fSChristoph Hellwig }
958bdb0d04fSChristoph Hellwig
959249bd908SDave Chinner /* Caller must first wait for the completion of any pending DIOs if required. */
9602c307174SDave Chinner int
xfs_flush_unmap_range(struct xfs_inode * ip,xfs_off_t offset,xfs_off_t len)961bdb0d04fSChristoph Hellwig xfs_flush_unmap_range(
962bdb0d04fSChristoph Hellwig struct xfs_inode *ip,
963bdb0d04fSChristoph Hellwig xfs_off_t offset,
964bdb0d04fSChristoph Hellwig xfs_off_t len)
965bdb0d04fSChristoph Hellwig {
966bdb0d04fSChristoph Hellwig struct inode *inode = VFS_I(ip);
967bdb0d04fSChristoph Hellwig xfs_off_t rounding, start, end;
968bdb0d04fSChristoph Hellwig int error;
969bdb0d04fSChristoph Hellwig
9702e63ed9bSJohn Garry /*
9712e63ed9bSJohn Garry * Make sure we extend the flush out to extent alignment
9722e63ed9bSJohn Garry * boundaries so any extent range overlapping the start/end
9732e63ed9bSJohn Garry * of the modification we are about to do is clean and idle.
9742e63ed9bSJohn Garry */
9752e63ed9bSJohn Garry rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE);
9762e63ed9bSJohn Garry start = rounddown_64(offset, rounding);
9772e63ed9bSJohn Garry end = roundup_64(offset + len, rounding) - 1;
978bdb0d04fSChristoph Hellwig
979bdb0d04fSChristoph Hellwig error = filemap_write_and_wait_range(inode->i_mapping, start, end);
980c24b5dfaSDave Chinner if (error)
981c24b5dfaSDave Chinner return error;
982bdb0d04fSChristoph Hellwig truncate_pagecache_range(inode, start, end);
983bdb0d04fSChristoph Hellwig return 0;
984c24b5dfaSDave Chinner }
985c24b5dfaSDave Chinner
986c24b5dfaSDave Chinner int
xfs_free_file_space(struct xfs_inode * ip,xfs_off_t offset,xfs_off_t len)987c24b5dfaSDave Chinner xfs_free_file_space(
988c24b5dfaSDave Chinner struct xfs_inode *ip,
989c24b5dfaSDave Chinner xfs_off_t offset,
990c24b5dfaSDave Chinner xfs_off_t len)
991c24b5dfaSDave Chinner {
992bdb0d04fSChristoph Hellwig struct xfs_mount *mp = ip->i_mount;
993c24b5dfaSDave Chinner xfs_fileoff_t startoffset_fsb;
994bdb0d04fSChristoph Hellwig xfs_fileoff_t endoffset_fsb;
9953c2bdc91SChristoph Hellwig int done = 0, error;
996c24b5dfaSDave Chinner
997c24b5dfaSDave Chinner trace_xfs_free_file_space(ip);
998c24b5dfaSDave Chinner
999c14cfccaSDarrick J. Wong error = xfs_qm_dqattach(ip);
1000c24b5dfaSDave Chinner if (error)
1001c24b5dfaSDave Chinner return error;
1002c24b5dfaSDave Chinner
1003c24b5dfaSDave Chinner if (len <= 0) /* if nothing being freed */
1004bdb0d04fSChristoph Hellwig return 0;
1005bdb0d04fSChristoph Hellwig
1006c24b5dfaSDave Chinner startoffset_fsb = XFS_B_TO_FSB(mp, offset);
1007c24b5dfaSDave Chinner endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
1008c24b5dfaSDave Chinner
1009fe341eb1SDarrick J. Wong /* We can only free complete realtime extents. */
101025219dbfSDarrick J. Wong if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
101125219dbfSDarrick J. Wong startoffset_fsb = roundup_64(startoffset_fsb,
101225219dbfSDarrick J. Wong mp->m_sb.sb_rextsize);
101325219dbfSDarrick J. Wong endoffset_fsb = rounddown_64(endoffset_fsb,
101425219dbfSDarrick J. Wong mp->m_sb.sb_rextsize);
1015fe341eb1SDarrick J. Wong }
1016fe341eb1SDarrick J. Wong
1017bdb0d04fSChristoph Hellwig /*
1018daa79baeSChristoph Hellwig * Need to zero the stuff we're not freeing, on disk.
1019bdb0d04fSChristoph Hellwig */
10203c2bdc91SChristoph Hellwig if (endoffset_fsb > startoffset_fsb) {
10213c2bdc91SChristoph Hellwig while (!done) {
1022bdb0d04fSChristoph Hellwig error = xfs_unmap_extent(ip, startoffset_fsb,
1023bdb0d04fSChristoph Hellwig endoffset_fsb - startoffset_fsb, &done);
10243c2bdc91SChristoph Hellwig if (error)
10253c2bdc91SChristoph Hellwig return error;
10263c2bdc91SChristoph Hellwig }
1027c24b5dfaSDave Chinner }
1028c24b5dfaSDave Chinner
10293c2bdc91SChristoph Hellwig /*
10303c2bdc91SChristoph Hellwig * Now that we've unmap all full blocks we'll have to zero out any
1031f1ba5fafSShiyang Ruan * partial block at the beginning and/or end. xfs_zero_range is smart
1032f5c54717SChristoph Hellwig * enough to skip any holes, including those we just created, but we
1033f5c54717SChristoph Hellwig * must take care not to zero beyond EOF and enlarge i_size.
10343c2bdc91SChristoph Hellwig */
10353dd09d5aSCalvin Owens if (offset >= XFS_ISIZE(ip))
10363dd09d5aSCalvin Owens return 0;
10373dd09d5aSCalvin Owens if (offset + len > XFS_ISIZE(ip))
10383dd09d5aSCalvin Owens len = XFS_ISIZE(ip) - offset;
1039f1ba5fafSShiyang Ruan error = xfs_zero_range(ip, offset, len, NULL);
1040e53c4b59SDarrick J. Wong if (error)
1041e53c4b59SDarrick J. Wong return error;
1042e53c4b59SDarrick J. Wong
1043e53c4b59SDarrick J. Wong /*
1044e53c4b59SDarrick J. Wong * If we zeroed right up to EOF and EOF straddles a page boundary we
1045e53c4b59SDarrick J. Wong * must make sure that the post-EOF area is also zeroed because the
1046f1ba5fafSShiyang Ruan * page could be mmap'd and xfs_zero_range doesn't do that for us.
1047e53c4b59SDarrick J. Wong * Writeback of the eof page will do this, albeit clumsily.
1048e53c4b59SDarrick J. Wong */
1049a579121fSDarrick J. Wong if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
1050e53c4b59SDarrick J. Wong error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1051a579121fSDarrick J. Wong round_down(offset + len, PAGE_SIZE), LLONG_MAX);
1052e53c4b59SDarrick J. Wong }
1053e53c4b59SDarrick J. Wong
1054e53c4b59SDarrick J. Wong return error;
1055c24b5dfaSDave Chinner }
1056c24b5dfaSDave Chinner
105772c1a739Skbuild test robot static int
xfs_prepare_shift(struct xfs_inode * ip,loff_t offset)10584ed36c6bSChristoph Hellwig xfs_prepare_shift(
1059e1d8fb88SNamjae Jeon struct xfs_inode *ip,
10604ed36c6bSChristoph Hellwig loff_t offset)
1061e1d8fb88SNamjae Jeon {
1062*fe962ab3SJohn Garry unsigned int rounding;
1063e1d8fb88SNamjae Jeon int error;
1064f71721d0SBrian Foster
1065f71721d0SBrian Foster /*
1066f71721d0SBrian Foster * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1067f71721d0SBrian Foster * into the accessible region of the file.
1068f71721d0SBrian Foster */
10692bc2d49cSChristoph Hellwig if (xfs_can_free_eofblocks(ip)) {
1070a36b9261SBrian Foster error = xfs_free_eofblocks(ip);
107141b9d726SBrian Foster if (error)
107241b9d726SBrian Foster return error;
107341b9d726SBrian Foster }
10741669a8caSDave Chinner
1075f71721d0SBrian Foster /*
1076d0c22041SBrian Foster * Shift operations must stabilize the start block offset boundary along
1077d0c22041SBrian Foster * with the full range of the operation. If we don't, a COW writeback
1078d0c22041SBrian Foster * completion could race with an insert, front merge with the start
1079d0c22041SBrian Foster * extent (after split) during the shift and corrupt the file. Start
1080*fe962ab3SJohn Garry * with the allocation unit just prior to the start to stabilize the
1081*fe962ab3SJohn Garry * boundary.
1082d0c22041SBrian Foster */
1083*fe962ab3SJohn Garry rounding = xfs_inode_alloc_unitsize(ip);
1084*fe962ab3SJohn Garry offset = rounddown_64(offset, rounding);
1085d0c22041SBrian Foster if (offset)
1086*fe962ab3SJohn Garry offset -= rounding;
1087d0c22041SBrian Foster
1088d0c22041SBrian Foster /*
1089f71721d0SBrian Foster * Writeback and invalidate cache for the remainder of the file as we're
1090a904b1caSNamjae Jeon * about to shift down every extent from offset to EOF.
1091f71721d0SBrian Foster */
10927f9f71beSDave Chinner error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip));
10931749d1eaSBrian Foster if (error)
10941749d1eaSBrian Foster return error;
1095e1d8fb88SNamjae Jeon
1096a904b1caSNamjae Jeon /*
10973af423b0SDarrick J. Wong * Clean out anything hanging around in the cow fork now that
10983af423b0SDarrick J. Wong * we've flushed all the dirty data out to disk to avoid having
10993af423b0SDarrick J. Wong * CoW extents at the wrong offsets.
11003af423b0SDarrick J. Wong */
110151d62690SChristoph Hellwig if (xfs_inode_has_cow_data(ip)) {
11023af423b0SDarrick J. Wong error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
11033af423b0SDarrick J. Wong true);
11043af423b0SDarrick J. Wong if (error)
11053af423b0SDarrick J. Wong return error;
11063af423b0SDarrick J. Wong }
11073af423b0SDarrick J. Wong
11084ed36c6bSChristoph Hellwig return 0;
1109e1d8fb88SNamjae Jeon }
1110e1d8fb88SNamjae Jeon
1111e1d8fb88SNamjae Jeon /*
1112a904b1caSNamjae Jeon * xfs_collapse_file_space()
1113a904b1caSNamjae Jeon * This routine frees disk space and shift extent for the given file.
1114a904b1caSNamjae Jeon * The first thing we do is to free data blocks in the specified range
1115a904b1caSNamjae Jeon * by calling xfs_free_file_space(). It would also sync dirty data
1116a904b1caSNamjae Jeon * and invalidate page cache over the region on which collapse range
1117a904b1caSNamjae Jeon * is working. And Shift extent records to the left to cover a hole.
1118a904b1caSNamjae Jeon * RETURNS:
1119a904b1caSNamjae Jeon * 0 on success
1120a904b1caSNamjae Jeon * errno on error
1121a904b1caSNamjae Jeon *
1122a904b1caSNamjae Jeon */
1123a904b1caSNamjae Jeon int
xfs_collapse_file_space(struct xfs_inode * ip,xfs_off_t offset,xfs_off_t len)1124a904b1caSNamjae Jeon xfs_collapse_file_space(
1125a904b1caSNamjae Jeon struct xfs_inode *ip,
1126a904b1caSNamjae Jeon xfs_off_t offset,
1127a904b1caSNamjae Jeon xfs_off_t len)
1128a904b1caSNamjae Jeon {
11294ed36c6bSChristoph Hellwig struct xfs_mount *mp = ip->i_mount;
11304ed36c6bSChristoph Hellwig struct xfs_trans *tp;
1131a904b1caSNamjae Jeon int error;
11324ed36c6bSChristoph Hellwig xfs_fileoff_t next_fsb = XFS_B_TO_FSB(mp, offset + len);
11334ed36c6bSChristoph Hellwig xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len);
1134ecfea3f0SChristoph Hellwig bool done = false;
1135a904b1caSNamjae Jeon
1136a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
11379ad1a23aSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
11389ad1a23aSChristoph Hellwig
1139a904b1caSNamjae Jeon trace_xfs_collapse_file_space(ip);
1140a904b1caSNamjae Jeon
1141a904b1caSNamjae Jeon error = xfs_free_file_space(ip, offset, len);
1142a904b1caSNamjae Jeon if (error)
1143a904b1caSNamjae Jeon return error;
1144a904b1caSNamjae Jeon
11454ed36c6bSChristoph Hellwig error = xfs_prepare_shift(ip, offset);
11464ed36c6bSChristoph Hellwig if (error)
11474ed36c6bSChristoph Hellwig return error;
11484ed36c6bSChristoph Hellwig
1149211683b2SBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
11504ed36c6bSChristoph Hellwig if (error)
1151211683b2SBrian Foster return error;
11524ed36c6bSChristoph Hellwig
11534ed36c6bSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL);
1154211683b2SBrian Foster xfs_trans_ijoin(tp, ip, 0);
11554ed36c6bSChristoph Hellwig
1156211683b2SBrian Foster while (!done) {
1157ecfea3f0SChristoph Hellwig error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
1158333f950cSBrian Foster &done);
11594ed36c6bSChristoph Hellwig if (error)
1160c8eac49eSBrian Foster goto out_trans_cancel;
1161211683b2SBrian Foster if (done)
1162211683b2SBrian Foster break;
11634ed36c6bSChristoph Hellwig
1164211683b2SBrian Foster /* finish any deferred frees and roll the transaction */
1165211683b2SBrian Foster error = xfs_defer_finish(&tp);
1166211683b2SBrian Foster if (error)
1167211683b2SBrian Foster goto out_trans_cancel;
11684ed36c6bSChristoph Hellwig }
11694ed36c6bSChristoph Hellwig
1170211683b2SBrian Foster error = xfs_trans_commit(tp);
1171211683b2SBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL);
11724ed36c6bSChristoph Hellwig return error;
11734ed36c6bSChristoph Hellwig
11744ed36c6bSChristoph Hellwig out_trans_cancel:
11754ed36c6bSChristoph Hellwig xfs_trans_cancel(tp);
1176211683b2SBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL);
11774ed36c6bSChristoph Hellwig return error;
1178a904b1caSNamjae Jeon }
1179a904b1caSNamjae Jeon
1180a904b1caSNamjae Jeon /*
1181a904b1caSNamjae Jeon * xfs_insert_file_space()
1182a904b1caSNamjae Jeon * This routine create hole space by shifting extents for the given file.
1183a904b1caSNamjae Jeon * The first thing we do is to sync dirty data and invalidate page cache
1184a904b1caSNamjae Jeon * over the region on which insert range is working. And split an extent
1185a904b1caSNamjae Jeon * to two extents at given offset by calling xfs_bmap_split_extent.
1186a904b1caSNamjae Jeon * And shift all extent records which are laying between [offset,
1187a904b1caSNamjae Jeon * last allocated extent] to the right to reserve hole range.
1188a904b1caSNamjae Jeon * RETURNS:
1189a904b1caSNamjae Jeon * 0 on success
1190a904b1caSNamjae Jeon * errno on error
1191a904b1caSNamjae Jeon */
1192a904b1caSNamjae Jeon int
xfs_insert_file_space(struct xfs_inode * ip,loff_t offset,loff_t len)1193a904b1caSNamjae Jeon xfs_insert_file_space(
1194a904b1caSNamjae Jeon struct xfs_inode *ip,
1195a904b1caSNamjae Jeon loff_t offset,
1196a904b1caSNamjae Jeon loff_t len)
1197a904b1caSNamjae Jeon {
11984ed36c6bSChristoph Hellwig struct xfs_mount *mp = ip->i_mount;
11994ed36c6bSChristoph Hellwig struct xfs_trans *tp;
12004ed36c6bSChristoph Hellwig int error;
12014ed36c6bSChristoph Hellwig xfs_fileoff_t stop_fsb = XFS_B_TO_FSB(mp, offset);
12024ed36c6bSChristoph Hellwig xfs_fileoff_t next_fsb = NULLFSBLOCK;
12034ed36c6bSChristoph Hellwig xfs_fileoff_t shift_fsb = XFS_B_TO_FSB(mp, len);
1204ecfea3f0SChristoph Hellwig bool done = false;
12054ed36c6bSChristoph Hellwig
1206a904b1caSNamjae Jeon ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
12079ad1a23aSChristoph Hellwig ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
12089ad1a23aSChristoph Hellwig
1209a904b1caSNamjae Jeon trace_xfs_insert_file_space(ip);
1210a904b1caSNamjae Jeon
1211f62cb48eSDarrick J. Wong error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
1212f62cb48eSDarrick J. Wong if (error)
1213f62cb48eSDarrick J. Wong return error;
1214f62cb48eSDarrick J. Wong
12154ed36c6bSChristoph Hellwig error = xfs_prepare_shift(ip, offset);
12164ed36c6bSChristoph Hellwig if (error)
12174ed36c6bSChristoph Hellwig return error;
12184ed36c6bSChristoph Hellwig
1219b73df17eSBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
1220b73df17eSBrian Foster XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
1221b73df17eSBrian Foster if (error)
1222b73df17eSBrian Foster return error;
1223b73df17eSBrian Foster
1224b73df17eSBrian Foster xfs_ilock(ip, XFS_ILOCK_EXCL);
1225dd87f87dSBrian Foster xfs_trans_ijoin(tp, ip, 0);
1226b73df17eSBrian Foster
122785ef08b5SChandan Babu R error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
122885ef08b5SChandan Babu R XFS_IEXT_PUNCH_HOLE_CNT);
12294f86bb4bSChandan Babu R if (error == -EFBIG)
12304f86bb4bSChandan Babu R error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT);
123185ef08b5SChandan Babu R if (error)
123285ef08b5SChandan Babu R goto out_trans_cancel;
123385ef08b5SChandan Babu R
1234dd87f87dSBrian Foster /*
1235dd87f87dSBrian Foster * The extent shifting code works on extent granularity. So, if stop_fsb
1236dd87f87dSBrian Foster * is not the starting block of extent, we need to split the extent at
1237dd87f87dSBrian Foster * stop_fsb.
1238dd87f87dSBrian Foster */
1239b73df17eSBrian Foster error = xfs_bmap_split_extent(tp, ip, stop_fsb);
1240b73df17eSBrian Foster if (error)
1241b73df17eSBrian Foster goto out_trans_cancel;
1242b73df17eSBrian Foster
1243dd87f87dSBrian Foster do {
12449c516e0eSBrian Foster error = xfs_defer_finish(&tp);
12454ed36c6bSChristoph Hellwig if (error)
1246dd87f87dSBrian Foster goto out_trans_cancel;
12474ed36c6bSChristoph Hellwig
1248ecfea3f0SChristoph Hellwig error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
1249333f950cSBrian Foster &done, stop_fsb);
12504ed36c6bSChristoph Hellwig if (error)
1251c8eac49eSBrian Foster goto out_trans_cancel;
1252dd87f87dSBrian Foster } while (!done);
12534ed36c6bSChristoph Hellwig
12544ed36c6bSChristoph Hellwig error = xfs_trans_commit(tp);
1255dd87f87dSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL);
12564ed36c6bSChristoph Hellwig return error;
12574ed36c6bSChristoph Hellwig
1258c8eac49eSBrian Foster out_trans_cancel:
12594ed36c6bSChristoph Hellwig xfs_trans_cancel(tp);
1260dd87f87dSBrian Foster xfs_iunlock(ip, XFS_ILOCK_EXCL);
12614ed36c6bSChristoph Hellwig return error;
1262a904b1caSNamjae Jeon }
1263a904b1caSNamjae Jeon
1264a904b1caSNamjae Jeon /*
1265a133d952SDave Chinner * We need to check that the format of the data fork in the temporary inode is
1266a133d952SDave Chinner * valid for the target inode before doing the swap. This is not a problem with
1267a133d952SDave Chinner * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
1268a133d952SDave Chinner * data fork depending on the space the attribute fork is taking so we can get
1269a133d952SDave Chinner * invalid formats on the target inode.
1270a133d952SDave Chinner *
1271a133d952SDave Chinner * E.g. target has space for 7 extents in extent format, temp inode only has
1272a133d952SDave Chinner * space for 6. If we defragment down to 7 extents, then the tmp format is a
1273a133d952SDave Chinner * btree, but when swapped it needs to be in extent format. Hence we can't just
1274a133d952SDave Chinner * blindly swap data forks on attr2 filesystems.
1275a133d952SDave Chinner *
1276a133d952SDave Chinner * Note that we check the swap in both directions so that we don't end up with
1277a133d952SDave Chinner * a corrupt temporary inode, either.
1278a133d952SDave Chinner *
1279a133d952SDave Chinner * Note that fixing the way xfs_fsr sets up the attribute fork in the source
1280a133d952SDave Chinner * inode will prevent this situation from occurring, so all we do here is
1281a133d952SDave Chinner * reject and log the attempt. basically we are putting the responsibility on
1282a133d952SDave Chinner * userspace to get this right.
1283a133d952SDave Chinner */
1284a133d952SDave Chinner static int
xfs_swap_extents_check_format(struct xfs_inode * ip,struct xfs_inode * tip)1285a133d952SDave Chinner xfs_swap_extents_check_format(
1286e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */
1287e06259aaSDarrick J. Wong struct xfs_inode *tip) /* tmp inode */
1288a133d952SDave Chinner {
1289f7e67b20SChristoph Hellwig struct xfs_ifork *ifp = &ip->i_df;
1290f7e67b20SChristoph Hellwig struct xfs_ifork *tifp = &tip->i_df;
1291a133d952SDave Chinner
1292765d3c39SDarrick J. Wong /* User/group/project quota ids must match if quotas are enforced. */
1293765d3c39SDarrick J. Wong if (XFS_IS_QUOTA_ON(ip->i_mount) &&
1294765d3c39SDarrick J. Wong (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) ||
1295765d3c39SDarrick J. Wong !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) ||
1296ceaf603cSChristoph Hellwig ip->i_projid != tip->i_projid))
1297765d3c39SDarrick J. Wong return -EINVAL;
1298765d3c39SDarrick J. Wong
1299a133d952SDave Chinner /* Should never get a local format */
1300f7e67b20SChristoph Hellwig if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
1301f7e67b20SChristoph Hellwig tifp->if_format == XFS_DINODE_FMT_LOCAL)
13022451337dSDave Chinner return -EINVAL;
1303a133d952SDave Chinner
1304a133d952SDave Chinner /*
1305a133d952SDave Chinner * if the target inode has less extents that then temporary inode then
1306a133d952SDave Chinner * why did userspace call us?
1307a133d952SDave Chinner */
1308f7e67b20SChristoph Hellwig if (ifp->if_nextents < tifp->if_nextents)
13092451337dSDave Chinner return -EINVAL;
1310a133d952SDave Chinner
1311a133d952SDave Chinner /*
13121f08af52SDarrick J. Wong * If we have to use the (expensive) rmap swap method, we can
13131f08af52SDarrick J. Wong * handle any number of extents and any format.
13141f08af52SDarrick J. Wong */
131538c26bfdSDave Chinner if (xfs_has_rmapbt(ip->i_mount))
13161f08af52SDarrick J. Wong return 0;
13171f08af52SDarrick J. Wong
13181f08af52SDarrick J. Wong /*
1319a133d952SDave Chinner * if the target inode is in extent form and the temp inode is in btree
1320a133d952SDave Chinner * form then we will end up with the target inode in the wrong format
1321a133d952SDave Chinner * as we already know there are less extents in the temp inode.
1322a133d952SDave Chinner */
1323f7e67b20SChristoph Hellwig if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1324f7e67b20SChristoph Hellwig tifp->if_format == XFS_DINODE_FMT_BTREE)
13252451337dSDave Chinner return -EINVAL;
1326a133d952SDave Chinner
1327a133d952SDave Chinner /* Check temp in extent form to max in target */
1328f7e67b20SChristoph Hellwig if (tifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1329f7e67b20SChristoph Hellwig tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
13302451337dSDave Chinner return -EINVAL;
1331a133d952SDave Chinner
1332a133d952SDave Chinner /* Check target in extent form to max in temp */
1333f7e67b20SChristoph Hellwig if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1334f7e67b20SChristoph Hellwig ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
13352451337dSDave Chinner return -EINVAL;
1336a133d952SDave Chinner
1337a133d952SDave Chinner /*
1338a133d952SDave Chinner * If we are in a btree format, check that the temp root block will fit
1339a133d952SDave Chinner * in the target and that it has enough extents to be in btree format
1340a133d952SDave Chinner * in the target.
1341a133d952SDave Chinner *
1342a133d952SDave Chinner * Note that we have to be careful to allow btree->extent conversions
1343a133d952SDave Chinner * (a common defrag case) which will occur when the temp inode is in
1344a133d952SDave Chinner * extent format...
1345a133d952SDave Chinner */
1346f7e67b20SChristoph Hellwig if (tifp->if_format == XFS_DINODE_FMT_BTREE) {
1347932b42c6SDarrick J. Wong if (xfs_inode_has_attr_fork(ip) &&
1348c01147d9SDarrick J. Wong XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip))
13492451337dSDave Chinner return -EINVAL;
1350f7e67b20SChristoph Hellwig if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
13512451337dSDave Chinner return -EINVAL;
1352a133d952SDave Chinner }
1353a133d952SDave Chinner
1354a133d952SDave Chinner /* Reciprocal target->temp btree format checks */
1355f7e67b20SChristoph Hellwig if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
1356932b42c6SDarrick J. Wong if (xfs_inode_has_attr_fork(tip) &&
1357c01147d9SDarrick J. Wong XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip))
13582451337dSDave Chinner return -EINVAL;
1359f7e67b20SChristoph Hellwig if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
13602451337dSDave Chinner return -EINVAL;
1361a133d952SDave Chinner }
1362a133d952SDave Chinner
1363a133d952SDave Chinner return 0;
1364a133d952SDave Chinner }
1365a133d952SDave Chinner
13667abbb8f9SDave Chinner static int
xfs_swap_extent_flush(struct xfs_inode * ip)13674ef897a2SDave Chinner xfs_swap_extent_flush(
13684ef897a2SDave Chinner struct xfs_inode *ip)
13694ef897a2SDave Chinner {
13704ef897a2SDave Chinner int error;
13714ef897a2SDave Chinner
13724ef897a2SDave Chinner error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
13734ef897a2SDave Chinner if (error)
13744ef897a2SDave Chinner return error;
13754ef897a2SDave Chinner truncate_pagecache_range(VFS_I(ip), 0, -1);
13764ef897a2SDave Chinner
13774ef897a2SDave Chinner /* Verify O_DIRECT for ftmp */
13784ef897a2SDave Chinner if (VFS_I(ip)->i_mapping->nrpages)
13794ef897a2SDave Chinner return -EINVAL;
13804ef897a2SDave Chinner return 0;
13814ef897a2SDave Chinner }
13824ef897a2SDave Chinner
13831f08af52SDarrick J. Wong /*
13841f08af52SDarrick J. Wong * Move extents from one file to another, when rmap is enabled.
13851f08af52SDarrick J. Wong */
13861f08af52SDarrick J. Wong STATIC int
xfs_swap_extent_rmap(struct xfs_trans ** tpp,struct xfs_inode * ip,struct xfs_inode * tip)13871f08af52SDarrick J. Wong xfs_swap_extent_rmap(
13881f08af52SDarrick J. Wong struct xfs_trans **tpp,
13891f08af52SDarrick J. Wong struct xfs_inode *ip,
13901f08af52SDarrick J. Wong struct xfs_inode *tip)
13911f08af52SDarrick J. Wong {
13927a7943c7SBrian Foster struct xfs_trans *tp = *tpp;
13931f08af52SDarrick J. Wong struct xfs_bmbt_irec irec;
13941f08af52SDarrick J. Wong struct xfs_bmbt_irec uirec;
13951f08af52SDarrick J. Wong struct xfs_bmbt_irec tirec;
13961f08af52SDarrick J. Wong xfs_fileoff_t offset_fsb;
13971f08af52SDarrick J. Wong xfs_fileoff_t end_fsb;
13981f08af52SDarrick J. Wong xfs_filblks_t count_fsb;
13991f08af52SDarrick J. Wong int error;
14001f08af52SDarrick J. Wong xfs_filblks_t ilen;
14011f08af52SDarrick J. Wong xfs_filblks_t rlen;
14021f08af52SDarrick J. Wong int nimaps;
1403c8ce540dSDarrick J. Wong uint64_t tip_flags2;
14041f08af52SDarrick J. Wong
14051f08af52SDarrick J. Wong /*
14061f08af52SDarrick J. Wong * If the source file has shared blocks, we must flag the donor
14071f08af52SDarrick J. Wong * file as having shared blocks so that we get the shared-block
14081f08af52SDarrick J. Wong * rmap functions when we go to fix up the rmaps. The flags
14091f08af52SDarrick J. Wong * will be switch for reals later.
14101f08af52SDarrick J. Wong */
14113e09ab8fSChristoph Hellwig tip_flags2 = tip->i_diflags2;
14123e09ab8fSChristoph Hellwig if (ip->i_diflags2 & XFS_DIFLAG2_REFLINK)
14133e09ab8fSChristoph Hellwig tip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
14141f08af52SDarrick J. Wong
14151f08af52SDarrick J. Wong offset_fsb = 0;
14161f08af52SDarrick J. Wong end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip)));
14171f08af52SDarrick J. Wong count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
14181f08af52SDarrick J. Wong
14191f08af52SDarrick J. Wong while (count_fsb) {
14201f08af52SDarrick J. Wong /* Read extent from the donor file */
14211f08af52SDarrick J. Wong nimaps = 1;
14221f08af52SDarrick J. Wong error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec,
14231f08af52SDarrick J. Wong &nimaps, 0);
14241f08af52SDarrick J. Wong if (error)
14251f08af52SDarrick J. Wong goto out;
14261f08af52SDarrick J. Wong ASSERT(nimaps == 1);
14271f08af52SDarrick J. Wong ASSERT(tirec.br_startblock != DELAYSTARTBLOCK);
14281f08af52SDarrick J. Wong
14291f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap(tip, &tirec);
14301f08af52SDarrick J. Wong ilen = tirec.br_blockcount;
14311f08af52SDarrick J. Wong
14321f08af52SDarrick J. Wong /* Unmap the old blocks in the source file. */
14331f08af52SDarrick J. Wong while (tirec.br_blockcount) {
1434692b6cddSDave Chinner ASSERT(tp->t_highest_agno == NULLAGNUMBER);
14351f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
14361f08af52SDarrick J. Wong
14371f08af52SDarrick J. Wong /* Read extent from the source file */
14381f08af52SDarrick J. Wong nimaps = 1;
14391f08af52SDarrick J. Wong error = xfs_bmapi_read(ip, tirec.br_startoff,
14401f08af52SDarrick J. Wong tirec.br_blockcount, &irec,
14411f08af52SDarrick J. Wong &nimaps, 0);
14421f08af52SDarrick J. Wong if (error)
1443d5a2e289SBrian Foster goto out;
14441f08af52SDarrick J. Wong ASSERT(nimaps == 1);
14451f08af52SDarrick J. Wong ASSERT(tirec.br_startoff == irec.br_startoff);
14461f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);
14471f08af52SDarrick J. Wong
14481f08af52SDarrick J. Wong /* Trim the extent. */
14491f08af52SDarrick J. Wong uirec = tirec;
14501f08af52SDarrick J. Wong uirec.br_blockcount = rlen = min_t(xfs_filblks_t,
14511f08af52SDarrick J. Wong tirec.br_blockcount,
14521f08af52SDarrick J. Wong irec.br_blockcount);
14531f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
14541f08af52SDarrick J. Wong
1455bcc561f2SChandan Babu R if (xfs_bmap_is_real_extent(&uirec)) {
1456bcc561f2SChandan Babu R error = xfs_iext_count_may_overflow(ip,
1457bcc561f2SChandan Babu R XFS_DATA_FORK,
1458bcc561f2SChandan Babu R XFS_IEXT_SWAP_RMAP_CNT);
14594f86bb4bSChandan Babu R if (error == -EFBIG)
14604f86bb4bSChandan Babu R error = xfs_iext_count_upgrade(tp, ip,
14614f86bb4bSChandan Babu R XFS_IEXT_SWAP_RMAP_CNT);
1462bcc561f2SChandan Babu R if (error)
1463bcc561f2SChandan Babu R goto out;
1464bcc561f2SChandan Babu R }
1465bcc561f2SChandan Babu R
1466bcc561f2SChandan Babu R if (xfs_bmap_is_real_extent(&irec)) {
1467bcc561f2SChandan Babu R error = xfs_iext_count_may_overflow(tip,
1468bcc561f2SChandan Babu R XFS_DATA_FORK,
1469bcc561f2SChandan Babu R XFS_IEXT_SWAP_RMAP_CNT);
14704f86bb4bSChandan Babu R if (error == -EFBIG)
14714f86bb4bSChandan Babu R error = xfs_iext_count_upgrade(tp, ip,
14724f86bb4bSChandan Babu R XFS_IEXT_SWAP_RMAP_CNT);
1473bcc561f2SChandan Babu R if (error)
1474bcc561f2SChandan Babu R goto out;
1475bcc561f2SChandan Babu R }
1476bcc561f2SChandan Babu R
14771f08af52SDarrick J. Wong /* Remove the mapping from the donor file. */
14783e08f42aSDarrick J. Wong xfs_bmap_unmap_extent(tp, tip, &uirec);
14791f08af52SDarrick J. Wong
14801f08af52SDarrick J. Wong /* Remove the mapping from the source file. */
14813e08f42aSDarrick J. Wong xfs_bmap_unmap_extent(tp, ip, &irec);
14821f08af52SDarrick J. Wong
14831f08af52SDarrick J. Wong /* Map the donor file's blocks into the source file. */
14843e08f42aSDarrick J. Wong xfs_bmap_map_extent(tp, ip, &uirec);
14851f08af52SDarrick J. Wong
14861f08af52SDarrick J. Wong /* Map the source file's blocks into the donor file. */
14873e08f42aSDarrick J. Wong xfs_bmap_map_extent(tp, tip, &irec);
14881f08af52SDarrick J. Wong
14899e28a242SBrian Foster error = xfs_defer_finish(tpp);
14907a7943c7SBrian Foster tp = *tpp;
14911f08af52SDarrick J. Wong if (error)
14929b1f4e98SBrian Foster goto out;
14931f08af52SDarrick J. Wong
14941f08af52SDarrick J. Wong tirec.br_startoff += rlen;
14951f08af52SDarrick J. Wong if (tirec.br_startblock != HOLESTARTBLOCK &&
14961f08af52SDarrick J. Wong tirec.br_startblock != DELAYSTARTBLOCK)
14971f08af52SDarrick J. Wong tirec.br_startblock += rlen;
14981f08af52SDarrick J. Wong tirec.br_blockcount -= rlen;
14991f08af52SDarrick J. Wong }
15001f08af52SDarrick J. Wong
15011f08af52SDarrick J. Wong /* Roll on... */
15021f08af52SDarrick J. Wong count_fsb -= ilen;
15031f08af52SDarrick J. Wong offset_fsb += ilen;
15041f08af52SDarrick J. Wong }
15051f08af52SDarrick J. Wong
15063e09ab8fSChristoph Hellwig tip->i_diflags2 = tip_flags2;
15071f08af52SDarrick J. Wong return 0;
15081f08af52SDarrick J. Wong
15091f08af52SDarrick J. Wong out:
15101f08af52SDarrick J. Wong trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
15113e09ab8fSChristoph Hellwig tip->i_diflags2 = tip_flags2;
15121f08af52SDarrick J. Wong return error;
15131f08af52SDarrick J. Wong }
15141f08af52SDarrick J. Wong
151539aff5fdSDarrick J. Wong /* Swap the extents of two files by swapping data forks. */
151639aff5fdSDarrick J. Wong STATIC int
xfs_swap_extent_forks(struct xfs_trans * tp,struct xfs_inode * ip,struct xfs_inode * tip,int * src_log_flags,int * target_log_flags)151739aff5fdSDarrick J. Wong xfs_swap_extent_forks(
151839aff5fdSDarrick J. Wong struct xfs_trans *tp,
151939aff5fdSDarrick J. Wong struct xfs_inode *ip,
152039aff5fdSDarrick J. Wong struct xfs_inode *tip,
152139aff5fdSDarrick J. Wong int *src_log_flags,
152239aff5fdSDarrick J. Wong int *target_log_flags)
152339aff5fdSDarrick J. Wong {
1524e7f5d5caSDarrick J. Wong xfs_filblks_t aforkblks = 0;
1525e7f5d5caSDarrick J. Wong xfs_filblks_t taforkblks = 0;
1526e7f5d5caSDarrick J. Wong xfs_extnum_t junk;
1527c8ce540dSDarrick J. Wong uint64_t tmp;
152839aff5fdSDarrick J. Wong int error;
152939aff5fdSDarrick J. Wong
153039aff5fdSDarrick J. Wong /*
153139aff5fdSDarrick J. Wong * Count the number of extended attribute blocks
153239aff5fdSDarrick J. Wong */
1533932b42c6SDarrick J. Wong if (xfs_inode_has_attr_fork(ip) && ip->i_af.if_nextents > 0 &&
15342ed5b09bSDarrick J. Wong ip->i_af.if_format != XFS_DINODE_FMT_LOCAL) {
1535e7f5d5caSDarrick J. Wong error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
153639aff5fdSDarrick J. Wong &aforkblks);
153739aff5fdSDarrick J. Wong if (error)
153839aff5fdSDarrick J. Wong return error;
153939aff5fdSDarrick J. Wong }
1540932b42c6SDarrick J. Wong if (xfs_inode_has_attr_fork(tip) && tip->i_af.if_nextents > 0 &&
15412ed5b09bSDarrick J. Wong tip->i_af.if_format != XFS_DINODE_FMT_LOCAL) {
1542e7f5d5caSDarrick J. Wong error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
154339aff5fdSDarrick J. Wong &taforkblks);
154439aff5fdSDarrick J. Wong if (error)
154539aff5fdSDarrick J. Wong return error;
154639aff5fdSDarrick J. Wong }
154739aff5fdSDarrick J. Wong
154839aff5fdSDarrick J. Wong /*
15496fb10d6dSBrian Foster * Btree format (v3) inodes have the inode number stamped in the bmbt
15506fb10d6dSBrian Foster * block headers. We can't start changing the bmbt blocks until the
15516fb10d6dSBrian Foster * inode owner change is logged so recovery does the right thing in the
15526fb10d6dSBrian Foster * event of a crash. Set the owner change log flags now and leave the
15536fb10d6dSBrian Foster * bmbt scan as the last step.
155439aff5fdSDarrick J. Wong */
155538c26bfdSDave Chinner if (xfs_has_v3inodes(ip->i_mount)) {
1556f7e67b20SChristoph Hellwig if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE)
155739aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DOWNER;
1558f7e67b20SChristoph Hellwig if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE)
155939aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DOWNER;
15606471e9c5SChristoph Hellwig }
156139aff5fdSDarrick J. Wong
156239aff5fdSDarrick J. Wong /*
156339aff5fdSDarrick J. Wong * Swap the data forks of the inodes
156439aff5fdSDarrick J. Wong */
1565897992b7SGustavo A. R. Silva swap(ip->i_df, tip->i_df);
156639aff5fdSDarrick J. Wong
156739aff5fdSDarrick J. Wong /*
156839aff5fdSDarrick J. Wong * Fix the on-disk inode values
156939aff5fdSDarrick J. Wong */
15706e73a545SChristoph Hellwig tmp = (uint64_t)ip->i_nblocks;
15716e73a545SChristoph Hellwig ip->i_nblocks = tip->i_nblocks - taforkblks + aforkblks;
15726e73a545SChristoph Hellwig tip->i_nblocks = tmp + taforkblks - aforkblks;
157339aff5fdSDarrick J. Wong
157439aff5fdSDarrick J. Wong /*
157539aff5fdSDarrick J. Wong * The extents in the source inode could still contain speculative
157639aff5fdSDarrick J. Wong * preallocation beyond EOF (e.g. the file is open but not modified
157739aff5fdSDarrick J. Wong * while defrag is in progress). In that case, we need to copy over the
157839aff5fdSDarrick J. Wong * number of delalloc blocks the data fork in the source inode is
157939aff5fdSDarrick J. Wong * tracking beyond EOF so that when the fork is truncated away when the
158039aff5fdSDarrick J. Wong * temporary inode is unlinked we don't underrun the i_delayed_blks
158139aff5fdSDarrick J. Wong * counter on that inode.
158239aff5fdSDarrick J. Wong */
158339aff5fdSDarrick J. Wong ASSERT(tip->i_delayed_blks == 0);
158439aff5fdSDarrick J. Wong tip->i_delayed_blks = ip->i_delayed_blks;
158539aff5fdSDarrick J. Wong ip->i_delayed_blks = 0;
158639aff5fdSDarrick J. Wong
1587f7e67b20SChristoph Hellwig switch (ip->i_df.if_format) {
158839aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS:
158939aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DEXT;
159039aff5fdSDarrick J. Wong break;
159139aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE:
159238c26bfdSDave Chinner ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
159339aff5fdSDarrick J. Wong (*src_log_flags & XFS_ILOG_DOWNER));
159439aff5fdSDarrick J. Wong (*src_log_flags) |= XFS_ILOG_DBROOT;
159539aff5fdSDarrick J. Wong break;
159639aff5fdSDarrick J. Wong }
159739aff5fdSDarrick J. Wong
1598f7e67b20SChristoph Hellwig switch (tip->i_df.if_format) {
159939aff5fdSDarrick J. Wong case XFS_DINODE_FMT_EXTENTS:
160039aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DEXT;
160139aff5fdSDarrick J. Wong break;
160239aff5fdSDarrick J. Wong case XFS_DINODE_FMT_BTREE:
160339aff5fdSDarrick J. Wong (*target_log_flags) |= XFS_ILOG_DBROOT;
160438c26bfdSDave Chinner ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
160539aff5fdSDarrick J. Wong (*target_log_flags & XFS_ILOG_DOWNER));
160639aff5fdSDarrick J. Wong break;
160739aff5fdSDarrick J. Wong }
160839aff5fdSDarrick J. Wong
160939aff5fdSDarrick J. Wong return 0;
161039aff5fdSDarrick J. Wong }
161139aff5fdSDarrick J. Wong
16122dd3d709SBrian Foster /*
16132dd3d709SBrian Foster * Fix up the owners of the bmbt blocks to refer to the current inode. The
16142dd3d709SBrian Foster * change owner scan attempts to order all modified buffers in the current
16152dd3d709SBrian Foster * transaction. In the event of ordered buffer failure, the offending buffer is
16162dd3d709SBrian Foster * physically logged as a fallback and the scan returns -EAGAIN. We must roll
16172dd3d709SBrian Foster * the transaction in this case to replenish the fallback log reservation and
16182dd3d709SBrian Foster * restart the scan. This process repeats until the scan completes.
16192dd3d709SBrian Foster */
16202dd3d709SBrian Foster static int
xfs_swap_change_owner(struct xfs_trans ** tpp,struct xfs_inode * ip,struct xfs_inode * tmpip)16212dd3d709SBrian Foster xfs_swap_change_owner(
16222dd3d709SBrian Foster struct xfs_trans **tpp,
16232dd3d709SBrian Foster struct xfs_inode *ip,
16242dd3d709SBrian Foster struct xfs_inode *tmpip)
16252dd3d709SBrian Foster {
16262dd3d709SBrian Foster int error;
16272dd3d709SBrian Foster struct xfs_trans *tp = *tpp;
16282dd3d709SBrian Foster
16292dd3d709SBrian Foster do {
16302dd3d709SBrian Foster error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
16312dd3d709SBrian Foster NULL);
16322dd3d709SBrian Foster /* success or fatal error */
16332dd3d709SBrian Foster if (error != -EAGAIN)
16342dd3d709SBrian Foster break;
16352dd3d709SBrian Foster
16362dd3d709SBrian Foster error = xfs_trans_roll(tpp);
16372dd3d709SBrian Foster if (error)
16382dd3d709SBrian Foster break;
16392dd3d709SBrian Foster tp = *tpp;
16402dd3d709SBrian Foster
16412dd3d709SBrian Foster /*
16422dd3d709SBrian Foster * Redirty both inodes so they can relog and keep the log tail
16432dd3d709SBrian Foster * moving forward.
16442dd3d709SBrian Foster */
16452dd3d709SBrian Foster xfs_trans_ijoin(tp, ip, 0);
16462dd3d709SBrian Foster xfs_trans_ijoin(tp, tmpip, 0);
16472dd3d709SBrian Foster xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
16482dd3d709SBrian Foster xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
16492dd3d709SBrian Foster } while (true);
16502dd3d709SBrian Foster
16512dd3d709SBrian Foster return error;
16522dd3d709SBrian Foster }
16532dd3d709SBrian Foster
16544ef897a2SDave Chinner int
xfs_swap_extents(struct xfs_inode * ip,struct xfs_inode * tip,struct xfs_swapext * sxp)1655a133d952SDave Chinner xfs_swap_extents(
1656e06259aaSDarrick J. Wong struct xfs_inode *ip, /* target inode */
1657e06259aaSDarrick J. Wong struct xfs_inode *tip, /* tmp inode */
1658e06259aaSDarrick J. Wong struct xfs_swapext *sxp)
1659a133d952SDave Chinner {
1660e06259aaSDarrick J. Wong struct xfs_mount *mp = ip->i_mount;
1661e06259aaSDarrick J. Wong struct xfs_trans *tp;
1662e06259aaSDarrick J. Wong struct xfs_bstat *sbp = &sxp->sx_stat;
1663a133d952SDave Chinner int src_log_flags, target_log_flags;
1664a133d952SDave Chinner int error = 0;
1665c8ce540dSDarrick J. Wong uint64_t f;
16662dd3d709SBrian Foster int resblks = 0;
1667f74681baSBrian Foster unsigned int flags = 0;
1668a0a415e3SJeff Layton struct timespec64 ctime;
1669a133d952SDave Chinner
1670a133d952SDave Chinner /*
1671723cac48SDave Chinner * Lock the inodes against other IO, page faults and truncate to
1672723cac48SDave Chinner * begin with. Then we can ensure the inodes are flushed and have no
1673723cac48SDave Chinner * page cache safely. Once we have done this we can take the ilocks and
1674723cac48SDave Chinner * do the rest of the checks.
1675a133d952SDave Chinner */
167665523218SChristoph Hellwig lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
1677d2c292d8SJan Kara filemap_invalidate_lock_two(VFS_I(ip)->i_mapping,
1678d2c292d8SJan Kara VFS_I(tip)->i_mapping);
1679a133d952SDave Chinner
1680a133d952SDave Chinner /* Verify that both files have the same format */
1681c19b3b05SDave Chinner if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
16822451337dSDave Chinner error = -EINVAL;
1683a133d952SDave Chinner goto out_unlock;
1684a133d952SDave Chinner }
1685a133d952SDave Chinner
1686a133d952SDave Chinner /* Verify both files are either real-time or non-realtime */
1687a133d952SDave Chinner if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
16882451337dSDave Chinner error = -EINVAL;
1689a133d952SDave Chinner goto out_unlock;
1690a133d952SDave Chinner }
1691a133d952SDave Chinner
16922713fefaSDarrick J. Wong error = xfs_qm_dqattach(ip);
16932713fefaSDarrick J. Wong if (error)
16942713fefaSDarrick J. Wong goto out_unlock;
16952713fefaSDarrick J. Wong
16962713fefaSDarrick J. Wong error = xfs_qm_dqattach(tip);
16972713fefaSDarrick J. Wong if (error)
16982713fefaSDarrick J. Wong goto out_unlock;
16992713fefaSDarrick J. Wong
17004ef897a2SDave Chinner error = xfs_swap_extent_flush(ip);
1701a133d952SDave Chinner if (error)
1702a133d952SDave Chinner goto out_unlock;
17034ef897a2SDave Chinner error = xfs_swap_extent_flush(tip);
17044ef897a2SDave Chinner if (error)
17054ef897a2SDave Chinner goto out_unlock;
1706a133d952SDave Chinner
170796987eeaSChristoph Hellwig if (xfs_inode_has_cow_data(tip)) {
170896987eeaSChristoph Hellwig error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
170996987eeaSChristoph Hellwig if (error)
17108bc3b5e4SDarrick J. Wong goto out_unlock;
171196987eeaSChristoph Hellwig }
171296987eeaSChristoph Hellwig
17131f08af52SDarrick J. Wong /*
17141f08af52SDarrick J. Wong * Extent "swapping" with rmap requires a permanent reservation and
17151f08af52SDarrick J. Wong * a block reservation because it's really just a remap operation
17161f08af52SDarrick J. Wong * performed with log redo items!
17171f08af52SDarrick J. Wong */
171838c26bfdSDave Chinner if (xfs_has_rmapbt(mp)) {
1719b3fed434SBrian Foster int w = XFS_DATA_FORK;
1720daf83964SChristoph Hellwig uint32_t ipnext = ip->i_df.if_nextents;
1721daf83964SChristoph Hellwig uint32_t tipnext = tip->i_df.if_nextents;
1722b3fed434SBrian Foster
17231f08af52SDarrick J. Wong /*
1724b3fed434SBrian Foster * Conceptually this shouldn't affect the shape of either bmbt,
1725b3fed434SBrian Foster * but since we atomically move extents one by one, we reserve
1726b3fed434SBrian Foster * enough space to rebuild both trees.
17271f08af52SDarrick J. Wong */
1728b3fed434SBrian Foster resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w);
1729b3fed434SBrian Foster resblks += XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
1730b3fed434SBrian Foster
1731b3fed434SBrian Foster /*
1732f74681baSBrian Foster * If either inode straddles a bmapbt block allocation boundary,
1733f74681baSBrian Foster * the rmapbt algorithm triggers repeated allocs and frees as
1734f74681baSBrian Foster * extents are remapped. This can exhaust the block reservation
1735f74681baSBrian Foster * prematurely and cause shutdown. Return freed blocks to the
1736f74681baSBrian Foster * transaction reservation to counter this behavior.
1737b3fed434SBrian Foster */
1738f74681baSBrian Foster flags |= XFS_TRANS_RES_FDBLKS;
17392dd3d709SBrian Foster }
1740f74681baSBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags,
1741f74681baSBrian Foster &tp);
1742253f4911SChristoph Hellwig if (error)
1743a133d952SDave Chinner goto out_unlock;
1744723cac48SDave Chinner
1745723cac48SDave Chinner /*
1746723cac48SDave Chinner * Lock and join the inodes to the tansaction so that transaction commit
1747723cac48SDave Chinner * or cancel will unlock the inodes from this point onwards.
1748723cac48SDave Chinner */
17497c2d238aSDarrick J. Wong xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
175039aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, ip, 0);
175139aff5fdSDarrick J. Wong xfs_trans_ijoin(tp, tip, 0);
1752723cac48SDave Chinner
1753a133d952SDave Chinner
1754a133d952SDave Chinner /* Verify all data are being swapped */
1755a133d952SDave Chinner if (sxp->sx_offset != 0 ||
175613d2c10bSChristoph Hellwig sxp->sx_length != ip->i_disk_size ||
175713d2c10bSChristoph Hellwig sxp->sx_length != tip->i_disk_size) {
17582451337dSDave Chinner error = -EFAULT;
17594ef897a2SDave Chinner goto out_trans_cancel;
1760a133d952SDave Chinner }
1761a133d952SDave Chinner
1762a133d952SDave Chinner trace_xfs_swap_extent_before(ip, 0);
1763a133d952SDave Chinner trace_xfs_swap_extent_before(tip, 1);
1764a133d952SDave Chinner
1765a133d952SDave Chinner /* check inode formats now that data is flushed */
1766a133d952SDave Chinner error = xfs_swap_extents_check_format(ip, tip);
1767a133d952SDave Chinner if (error) {
1768a133d952SDave Chinner xfs_notice(mp,
1769a133d952SDave Chinner "%s: inode 0x%llx format is incompatible for exchanging.",
1770a133d952SDave Chinner __func__, ip->i_ino);
17714ef897a2SDave Chinner goto out_trans_cancel;
1772a133d952SDave Chinner }
1773a133d952SDave Chinner
1774a133d952SDave Chinner /*
1775a133d952SDave Chinner * Compare the current change & modify times with that
1776a133d952SDave Chinner * passed in. If they differ, we abort this swap.
1777a133d952SDave Chinner * This is the mechanism used to ensure the calling
1778a133d952SDave Chinner * process that the file was not changed out from
1779a133d952SDave Chinner * under it.
1780a133d952SDave Chinner */
1781a0a415e3SJeff Layton ctime = inode_get_ctime(VFS_I(ip));
1782a0a415e3SJeff Layton if ((sbp->bs_ctime.tv_sec != ctime.tv_sec) ||
1783a0a415e3SJeff Layton (sbp->bs_ctime.tv_nsec != ctime.tv_nsec) ||
1784a133d952SDave Chinner (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
1785a133d952SDave Chinner (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
17862451337dSDave Chinner error = -EBUSY;
178781217683SDave Chinner goto out_trans_cancel;
1788a133d952SDave Chinner }
1789a133d952SDave Chinner
179021b5c978SDave Chinner /*
179121b5c978SDave Chinner * Note the trickiness in setting the log flags - we set the owner log
179221b5c978SDave Chinner * flag on the opposite inode (i.e. the inode we are setting the new
179321b5c978SDave Chinner * owner to be) because once we swap the forks and log that, log
179421b5c978SDave Chinner * recovery is going to see the fork as owned by the swapped inode,
179521b5c978SDave Chinner * not the pre-swapped inodes.
179621b5c978SDave Chinner */
179721b5c978SDave Chinner src_log_flags = XFS_ILOG_CORE;
179821b5c978SDave Chinner target_log_flags = XFS_ILOG_CORE;
179939aff5fdSDarrick J. Wong
180038c26bfdSDave Chinner if (xfs_has_rmapbt(mp))
18011f08af52SDarrick J. Wong error = xfs_swap_extent_rmap(&tp, ip, tip);
18021f08af52SDarrick J. Wong else
180339aff5fdSDarrick J. Wong error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
180439aff5fdSDarrick J. Wong &target_log_flags);
180521b5c978SDave Chinner if (error)
180621b5c978SDave Chinner goto out_trans_cancel;
1807a133d952SDave Chinner
1808f0bc4d13SDarrick J. Wong /* Do we have to swap reflink flags? */
18093e09ab8fSChristoph Hellwig if ((ip->i_diflags2 & XFS_DIFLAG2_REFLINK) ^
18103e09ab8fSChristoph Hellwig (tip->i_diflags2 & XFS_DIFLAG2_REFLINK)) {
18113e09ab8fSChristoph Hellwig f = ip->i_diflags2 & XFS_DIFLAG2_REFLINK;
18123e09ab8fSChristoph Hellwig ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
18133e09ab8fSChristoph Hellwig ip->i_diflags2 |= tip->i_diflags2 & XFS_DIFLAG2_REFLINK;
18143e09ab8fSChristoph Hellwig tip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
18153e09ab8fSChristoph Hellwig tip->i_diflags2 |= f & XFS_DIFLAG2_REFLINK;
181652bfcdd7SDarrick J. Wong }
181752bfcdd7SDarrick J. Wong
181852bfcdd7SDarrick J. Wong /* Swap the cow forks. */
181938c26bfdSDave Chinner if (xfs_has_reflink(mp)) {
1820f7e67b20SChristoph Hellwig ASSERT(!ip->i_cowfp ||
1821f7e67b20SChristoph Hellwig ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
1822f7e67b20SChristoph Hellwig ASSERT(!tip->i_cowfp ||
1823f7e67b20SChristoph Hellwig tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
182452bfcdd7SDarrick J. Wong
1825897992b7SGustavo A. R. Silva swap(ip->i_cowfp, tip->i_cowfp);
182652bfcdd7SDarrick J. Wong
18275bcffe30SChristoph Hellwig if (ip->i_cowfp && ip->i_cowfp->if_bytes)
182883104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(ip);
182952bfcdd7SDarrick J. Wong else
183052bfcdd7SDarrick J. Wong xfs_inode_clear_cowblocks_tag(ip);
18315bcffe30SChristoph Hellwig if (tip->i_cowfp && tip->i_cowfp->if_bytes)
183283104d44SDarrick J. Wong xfs_inode_set_cowblocks_tag(tip);
183352bfcdd7SDarrick J. Wong else
183452bfcdd7SDarrick J. Wong xfs_inode_clear_cowblocks_tag(tip);
1835f0bc4d13SDarrick J. Wong }
1836f0bc4d13SDarrick J. Wong
1837a133d952SDave Chinner xfs_trans_log_inode(tp, ip, src_log_flags);
1838a133d952SDave Chinner xfs_trans_log_inode(tp, tip, target_log_flags);
1839a133d952SDave Chinner
1840a133d952SDave Chinner /*
18416fb10d6dSBrian Foster * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems
18426fb10d6dSBrian Foster * have inode number owner values in the bmbt blocks that still refer to
18436fb10d6dSBrian Foster * the old inode. Scan each bmbt to fix up the owner values with the
18446fb10d6dSBrian Foster * inode number of the current inode.
18456fb10d6dSBrian Foster */
18466fb10d6dSBrian Foster if (src_log_flags & XFS_ILOG_DOWNER) {
18472dd3d709SBrian Foster error = xfs_swap_change_owner(&tp, ip, tip);
18486fb10d6dSBrian Foster if (error)
18496fb10d6dSBrian Foster goto out_trans_cancel;
18506fb10d6dSBrian Foster }
18516fb10d6dSBrian Foster if (target_log_flags & XFS_ILOG_DOWNER) {
18522dd3d709SBrian Foster error = xfs_swap_change_owner(&tp, tip, ip);
18536fb10d6dSBrian Foster if (error)
18546fb10d6dSBrian Foster goto out_trans_cancel;
18556fb10d6dSBrian Foster }
18566fb10d6dSBrian Foster
18576fb10d6dSBrian Foster /*
1858a133d952SDave Chinner * If this is a synchronous mount, make sure that the
1859a133d952SDave Chinner * transaction goes to disk before returning to the user.
1860a133d952SDave Chinner */
18610560f31aSDave Chinner if (xfs_has_wsync(mp))
1862a133d952SDave Chinner xfs_trans_set_sync(tp);
1863a133d952SDave Chinner
186470393313SChristoph Hellwig error = xfs_trans_commit(tp);
1865a133d952SDave Chinner
1866a133d952SDave Chinner trace_xfs_swap_extent_after(ip, 0);
1867a133d952SDave Chinner trace_xfs_swap_extent_after(tip, 1);
186839aff5fdSDarrick J. Wong
1869d2c292d8SJan Kara out_unlock_ilock:
1870d2c292d8SJan Kara xfs_iunlock(ip, XFS_ILOCK_EXCL);
1871d2c292d8SJan Kara xfs_iunlock(tip, XFS_ILOCK_EXCL);
187265523218SChristoph Hellwig out_unlock:
1873d2c292d8SJan Kara filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping,
1874d2c292d8SJan Kara VFS_I(tip)->i_mapping);
187565523218SChristoph Hellwig unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
1876a133d952SDave Chinner return error;
1877a133d952SDave Chinner
187839aff5fdSDarrick J. Wong out_trans_cancel:
187939aff5fdSDarrick J. Wong xfs_trans_cancel(tp);
1880d2c292d8SJan Kara goto out_unlock_ilock;
1881a133d952SDave Chinner }
1882