xref: /openbmc/linux/fs/xfs/xfs_fsmap.c (revision 55e43d6abd078ed6d219902ce8cb4d68e3c993ba)
1  // SPDX-License-Identifier: GPL-2.0+
2  /*
3   * Copyright (C) 2017 Oracle.  All Rights Reserved.
4   * Author: Darrick J. Wong <darrick.wong@oracle.com>
5   */
6  #include "xfs.h"
7  #include "xfs_fs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_log_format.h"
11  #include "xfs_trans_resv.h"
12  #include "xfs_mount.h"
13  #include "xfs_inode.h"
14  #include "xfs_trans.h"
15  #include "xfs_btree.h"
16  #include "xfs_rmap_btree.h"
17  #include "xfs_trace.h"
18  #include "xfs_rmap.h"
19  #include "xfs_alloc.h"
20  #include "xfs_bit.h"
21  #include <linux/fsmap.h>
22  #include "xfs_fsmap.h"
23  #include "xfs_refcount.h"
24  #include "xfs_refcount_btree.h"
25  #include "xfs_alloc_btree.h"
26  #include "xfs_rtbitmap.h"
27  #include "xfs_ag.h"
28  
29  /* Convert an xfs_fsmap to an fsmap. */
30  static void
xfs_fsmap_from_internal(struct fsmap * dest,struct xfs_fsmap * src)31  xfs_fsmap_from_internal(
32  	struct fsmap		*dest,
33  	struct xfs_fsmap	*src)
34  {
35  	dest->fmr_device = src->fmr_device;
36  	dest->fmr_flags = src->fmr_flags;
37  	dest->fmr_physical = BBTOB(src->fmr_physical);
38  	dest->fmr_owner = src->fmr_owner;
39  	dest->fmr_offset = BBTOB(src->fmr_offset);
40  	dest->fmr_length = BBTOB(src->fmr_length);
41  	dest->fmr_reserved[0] = 0;
42  	dest->fmr_reserved[1] = 0;
43  	dest->fmr_reserved[2] = 0;
44  }
45  
46  /* Convert an fsmap to an xfs_fsmap. */
47  void
xfs_fsmap_to_internal(struct xfs_fsmap * dest,struct fsmap * src)48  xfs_fsmap_to_internal(
49  	struct xfs_fsmap	*dest,
50  	struct fsmap		*src)
51  {
52  	dest->fmr_device = src->fmr_device;
53  	dest->fmr_flags = src->fmr_flags;
54  	dest->fmr_physical = BTOBBT(src->fmr_physical);
55  	dest->fmr_owner = src->fmr_owner;
56  	dest->fmr_offset = BTOBBT(src->fmr_offset);
57  	dest->fmr_length = BTOBBT(src->fmr_length);
58  }
59  
60  /* Convert an fsmap owner into an rmapbt owner. */
61  static int
xfs_fsmap_owner_to_rmap(struct xfs_rmap_irec * dest,const struct xfs_fsmap * src)62  xfs_fsmap_owner_to_rmap(
63  	struct xfs_rmap_irec	*dest,
64  	const struct xfs_fsmap	*src)
65  {
66  	if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
67  		dest->rm_owner = src->fmr_owner;
68  		return 0;
69  	}
70  
71  	switch (src->fmr_owner) {
72  	case 0:			/* "lowest owner id possible" */
73  	case -1ULL:		/* "highest owner id possible" */
74  		dest->rm_owner = src->fmr_owner;
75  		break;
76  	case XFS_FMR_OWN_FREE:
77  		dest->rm_owner = XFS_RMAP_OWN_NULL;
78  		break;
79  	case XFS_FMR_OWN_UNKNOWN:
80  		dest->rm_owner = XFS_RMAP_OWN_UNKNOWN;
81  		break;
82  	case XFS_FMR_OWN_FS:
83  		dest->rm_owner = XFS_RMAP_OWN_FS;
84  		break;
85  	case XFS_FMR_OWN_LOG:
86  		dest->rm_owner = XFS_RMAP_OWN_LOG;
87  		break;
88  	case XFS_FMR_OWN_AG:
89  		dest->rm_owner = XFS_RMAP_OWN_AG;
90  		break;
91  	case XFS_FMR_OWN_INOBT:
92  		dest->rm_owner = XFS_RMAP_OWN_INOBT;
93  		break;
94  	case XFS_FMR_OWN_INODES:
95  		dest->rm_owner = XFS_RMAP_OWN_INODES;
96  		break;
97  	case XFS_FMR_OWN_REFC:
98  		dest->rm_owner = XFS_RMAP_OWN_REFC;
99  		break;
100  	case XFS_FMR_OWN_COW:
101  		dest->rm_owner = XFS_RMAP_OWN_COW;
102  		break;
103  	case XFS_FMR_OWN_DEFECTIVE:	/* not implemented */
104  		/* fall through */
105  	default:
106  		return -EINVAL;
107  	}
108  	return 0;
109  }
110  
111  /* Convert an rmapbt owner into an fsmap owner. */
112  static int
xfs_fsmap_owner_from_rmap(struct xfs_fsmap * dest,const struct xfs_rmap_irec * src)113  xfs_fsmap_owner_from_rmap(
114  	struct xfs_fsmap		*dest,
115  	const struct xfs_rmap_irec	*src)
116  {
117  	dest->fmr_flags = 0;
118  	if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) {
119  		dest->fmr_owner = src->rm_owner;
120  		return 0;
121  	}
122  	dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
123  
124  	switch (src->rm_owner) {
125  	case XFS_RMAP_OWN_FS:
126  		dest->fmr_owner = XFS_FMR_OWN_FS;
127  		break;
128  	case XFS_RMAP_OWN_LOG:
129  		dest->fmr_owner = XFS_FMR_OWN_LOG;
130  		break;
131  	case XFS_RMAP_OWN_AG:
132  		dest->fmr_owner = XFS_FMR_OWN_AG;
133  		break;
134  	case XFS_RMAP_OWN_INOBT:
135  		dest->fmr_owner = XFS_FMR_OWN_INOBT;
136  		break;
137  	case XFS_RMAP_OWN_INODES:
138  		dest->fmr_owner = XFS_FMR_OWN_INODES;
139  		break;
140  	case XFS_RMAP_OWN_REFC:
141  		dest->fmr_owner = XFS_FMR_OWN_REFC;
142  		break;
143  	case XFS_RMAP_OWN_COW:
144  		dest->fmr_owner = XFS_FMR_OWN_COW;
145  		break;
146  	case XFS_RMAP_OWN_NULL:	/* "free" */
147  		dest->fmr_owner = XFS_FMR_OWN_FREE;
148  		break;
149  	default:
150  		ASSERT(0);
151  		return -EFSCORRUPTED;
152  	}
153  	return 0;
154  }
155  
156  /* getfsmap query state */
157  struct xfs_getfsmap_info {
158  	struct xfs_fsmap_head	*head;
159  	struct fsmap		*fsmap_recs;	/* mapping records */
160  	struct xfs_buf		*agf_bp;	/* AGF, for refcount queries */
161  	struct xfs_perag	*pag;		/* AG info, if applicable */
162  	xfs_daddr_t		next_daddr;	/* next daddr we expect */
163  	/* daddr of low fsmap key when we're using the rtbitmap */
164  	xfs_daddr_t		low_daddr;
165  	u64			missing_owner;	/* owner of holes */
166  	u32			dev;		/* device id */
167  	/*
168  	 * Low rmap key for the query.  If low.rm_blockcount is nonzero, this
169  	 * is the second (or later) call to retrieve the recordset in pieces.
170  	 * xfs_getfsmap_rec_before_start will compare all records retrieved
171  	 * by the rmapbt query to filter out any records that start before
172  	 * the last record.
173  	 */
174  	struct xfs_rmap_irec	low;
175  	struct xfs_rmap_irec	high;		/* high rmap key */
176  	bool			last;		/* last extent? */
177  };
178  
179  /* Associate a device with a getfsmap handler. */
180  struct xfs_getfsmap_dev {
181  	u32			dev;
182  	int			(*fn)(struct xfs_trans *tp,
183  				      const struct xfs_fsmap *keys,
184  				      struct xfs_getfsmap_info *info);
185  };
186  
187  /* Compare two getfsmap device handlers. */
188  static int
xfs_getfsmap_dev_compare(const void * p1,const void * p2)189  xfs_getfsmap_dev_compare(
190  	const void			*p1,
191  	const void			*p2)
192  {
193  	const struct xfs_getfsmap_dev	*d1 = p1;
194  	const struct xfs_getfsmap_dev	*d2 = p2;
195  
196  	return d1->dev - d2->dev;
197  }
198  
199  /* Decide if this mapping is shared. */
200  STATIC int
xfs_getfsmap_is_shared(struct xfs_trans * tp,struct xfs_getfsmap_info * info,const struct xfs_rmap_irec * rec,bool * stat)201  xfs_getfsmap_is_shared(
202  	struct xfs_trans		*tp,
203  	struct xfs_getfsmap_info	*info,
204  	const struct xfs_rmap_irec	*rec,
205  	bool				*stat)
206  {
207  	struct xfs_mount		*mp = tp->t_mountp;
208  	struct xfs_btree_cur		*cur;
209  	xfs_agblock_t			fbno;
210  	xfs_extlen_t			flen;
211  	int				error;
212  
213  	*stat = false;
214  	if (!xfs_has_reflink(mp))
215  		return 0;
216  	/* rt files will have no perag structure */
217  	if (!info->pag)
218  		return 0;
219  
220  	/* Are there any shared blocks here? */
221  	flen = 0;
222  	cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, info->pag);
223  
224  	error = xfs_refcount_find_shared(cur, rec->rm_startblock,
225  			rec->rm_blockcount, &fbno, &flen, false);
226  
227  	xfs_btree_del_cursor(cur, error);
228  	if (error)
229  		return error;
230  
231  	*stat = flen > 0;
232  	return 0;
233  }
234  
235  static inline void
xfs_getfsmap_format(struct xfs_mount * mp,struct xfs_fsmap * xfm,struct xfs_getfsmap_info * info)236  xfs_getfsmap_format(
237  	struct xfs_mount		*mp,
238  	struct xfs_fsmap		*xfm,
239  	struct xfs_getfsmap_info	*info)
240  {
241  	struct fsmap			*rec;
242  
243  	trace_xfs_getfsmap_mapping(mp, xfm);
244  
245  	rec = &info->fsmap_recs[info->head->fmh_entries++];
246  	xfs_fsmap_from_internal(rec, xfm);
247  }
248  
249  static inline bool
xfs_getfsmap_rec_before_start(struct xfs_getfsmap_info * info,const struct xfs_rmap_irec * rec,xfs_daddr_t rec_daddr)250  xfs_getfsmap_rec_before_start(
251  	struct xfs_getfsmap_info	*info,
252  	const struct xfs_rmap_irec	*rec,
253  	xfs_daddr_t			rec_daddr)
254  {
255  	if (info->low_daddr != XFS_BUF_DADDR_NULL)
256  		return rec_daddr < info->low_daddr;
257  	if (info->low.rm_blockcount)
258  		return xfs_rmap_compare(rec, &info->low) < 0;
259  	return false;
260  }
261  
262  /*
263   * Format a reverse mapping for getfsmap, having translated rm_startblock
264   * into the appropriate daddr units.  Pass in a nonzero @len_daddr if the
265   * length could be larger than rm_blockcount in struct xfs_rmap_irec.
266   */
267  STATIC int
xfs_getfsmap_helper(struct xfs_trans * tp,struct xfs_getfsmap_info * info,const struct xfs_rmap_irec * rec,xfs_daddr_t rec_daddr,xfs_daddr_t len_daddr)268  xfs_getfsmap_helper(
269  	struct xfs_trans		*tp,
270  	struct xfs_getfsmap_info	*info,
271  	const struct xfs_rmap_irec	*rec,
272  	xfs_daddr_t			rec_daddr,
273  	xfs_daddr_t			len_daddr)
274  {
275  	struct xfs_fsmap		fmr;
276  	struct xfs_mount		*mp = tp->t_mountp;
277  	bool				shared;
278  	int				error;
279  
280  	if (fatal_signal_pending(current))
281  		return -EINTR;
282  
283  	if (len_daddr == 0)
284  		len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
285  
286  	/*
287  	 * Filter out records that start before our startpoint, if the
288  	 * caller requested that.
289  	 */
290  	if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) {
291  		rec_daddr += len_daddr;
292  		if (info->next_daddr < rec_daddr)
293  			info->next_daddr = rec_daddr;
294  		return 0;
295  	}
296  
297  	/* Are we just counting mappings? */
298  	if (info->head->fmh_count == 0) {
299  		if (info->head->fmh_entries == UINT_MAX)
300  			return -ECANCELED;
301  
302  		if (rec_daddr > info->next_daddr)
303  			info->head->fmh_entries++;
304  
305  		if (info->last)
306  			return 0;
307  
308  		info->head->fmh_entries++;
309  
310  		rec_daddr += len_daddr;
311  		if (info->next_daddr < rec_daddr)
312  			info->next_daddr = rec_daddr;
313  		return 0;
314  	}
315  
316  	/*
317  	 * If the record starts past the last physical block we saw,
318  	 * then we've found a gap.  Report the gap as being owned by
319  	 * whatever the caller specified is the missing owner.
320  	 */
321  	if (rec_daddr > info->next_daddr) {
322  		if (info->head->fmh_entries >= info->head->fmh_count)
323  			return -ECANCELED;
324  
325  		fmr.fmr_device = info->dev;
326  		fmr.fmr_physical = info->next_daddr;
327  		fmr.fmr_owner = info->missing_owner;
328  		fmr.fmr_offset = 0;
329  		fmr.fmr_length = rec_daddr - info->next_daddr;
330  		fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
331  		xfs_getfsmap_format(mp, &fmr, info);
332  	}
333  
334  	if (info->last)
335  		goto out;
336  
337  	/* Fill out the extent we found */
338  	if (info->head->fmh_entries >= info->head->fmh_count)
339  		return -ECANCELED;
340  
341  	trace_xfs_fsmap_mapping(mp, info->dev,
342  			info->pag ? info->pag->pag_agno : NULLAGNUMBER, rec);
343  
344  	fmr.fmr_device = info->dev;
345  	fmr.fmr_physical = rec_daddr;
346  	error = xfs_fsmap_owner_from_rmap(&fmr, rec);
347  	if (error)
348  		return error;
349  	fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
350  	fmr.fmr_length = len_daddr;
351  	if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
352  		fmr.fmr_flags |= FMR_OF_PREALLOC;
353  	if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
354  		fmr.fmr_flags |= FMR_OF_ATTR_FORK;
355  	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
356  		fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
357  	if (fmr.fmr_flags == 0) {
358  		error = xfs_getfsmap_is_shared(tp, info, rec, &shared);
359  		if (error)
360  			return error;
361  		if (shared)
362  			fmr.fmr_flags |= FMR_OF_SHARED;
363  	}
364  
365  	xfs_getfsmap_format(mp, &fmr, info);
366  out:
367  	rec_daddr += len_daddr;
368  	if (info->next_daddr < rec_daddr)
369  		info->next_daddr = rec_daddr;
370  	return 0;
371  }
372  
373  /* Transform a rmapbt irec into a fsmap */
374  STATIC int
xfs_getfsmap_datadev_helper(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)375  xfs_getfsmap_datadev_helper(
376  	struct xfs_btree_cur		*cur,
377  	const struct xfs_rmap_irec	*rec,
378  	void				*priv)
379  {
380  	struct xfs_mount		*mp = cur->bc_mp;
381  	struct xfs_getfsmap_info	*info = priv;
382  	xfs_fsblock_t			fsb;
383  	xfs_daddr_t			rec_daddr;
384  
385  	fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);
386  	rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
387  
388  	return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0);
389  }
390  
391  /* Transform a bnobt irec into a fsmap */
392  STATIC int
xfs_getfsmap_datadev_bnobt_helper(struct xfs_btree_cur * cur,const struct xfs_alloc_rec_incore * rec,void * priv)393  xfs_getfsmap_datadev_bnobt_helper(
394  	struct xfs_btree_cur		*cur,
395  	const struct xfs_alloc_rec_incore *rec,
396  	void				*priv)
397  {
398  	struct xfs_mount		*mp = cur->bc_mp;
399  	struct xfs_getfsmap_info	*info = priv;
400  	struct xfs_rmap_irec		irec;
401  	xfs_daddr_t			rec_daddr;
402  
403  	rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_ag.pag->pag_agno,
404  			rec->ar_startblock);
405  
406  	irec.rm_startblock = rec->ar_startblock;
407  	irec.rm_blockcount = rec->ar_blockcount;
408  	irec.rm_owner = XFS_RMAP_OWN_NULL;	/* "free" */
409  	irec.rm_offset = 0;
410  	irec.rm_flags = 0;
411  
412  	return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0);
413  }
414  
415  /* Set rmap flags based on the getfsmap flags */
416  static void
xfs_getfsmap_set_irec_flags(struct xfs_rmap_irec * irec,const struct xfs_fsmap * fmr)417  xfs_getfsmap_set_irec_flags(
418  	struct xfs_rmap_irec	*irec,
419  	const struct xfs_fsmap	*fmr)
420  {
421  	irec->rm_flags = 0;
422  	if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
423  		irec->rm_flags |= XFS_RMAP_ATTR_FORK;
424  	if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
425  		irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
426  	if (fmr->fmr_flags & FMR_OF_PREALLOC)
427  		irec->rm_flags |= XFS_RMAP_UNWRITTEN;
428  }
429  
430  /* Execute a getfsmap query against the log device. */
431  STATIC int
xfs_getfsmap_logdev(struct xfs_trans * tp,const struct xfs_fsmap * keys,struct xfs_getfsmap_info * info)432  xfs_getfsmap_logdev(
433  	struct xfs_trans		*tp,
434  	const struct xfs_fsmap		*keys,
435  	struct xfs_getfsmap_info	*info)
436  {
437  	struct xfs_mount		*mp = tp->t_mountp;
438  	struct xfs_rmap_irec		rmap;
439  	xfs_daddr_t			rec_daddr, len_daddr;
440  	xfs_fsblock_t			start_fsb, end_fsb;
441  	uint64_t			eofs;
442  
443  	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
444  	if (keys[0].fmr_physical >= eofs)
445  		return 0;
446  	start_fsb = XFS_BB_TO_FSBT(mp,
447  				keys[0].fmr_physical + keys[0].fmr_length);
448  	end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
449  
450  	/* Adjust the low key if we are continuing from where we left off. */
451  	if (keys[0].fmr_length > 0)
452  		info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
453  
454  	trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb);
455  	trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb);
456  
457  	if (start_fsb > 0)
458  		return 0;
459  
460  	/* Fabricate an rmap entry for the external log device. */
461  	rmap.rm_startblock = 0;
462  	rmap.rm_blockcount = mp->m_sb.sb_logblocks;
463  	rmap.rm_owner = XFS_RMAP_OWN_LOG;
464  	rmap.rm_offset = 0;
465  	rmap.rm_flags = 0;
466  
467  	rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock);
468  	len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount);
469  	return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr);
470  }
471  
472  #ifdef CONFIG_XFS_RT
473  /* Transform a rtbitmap "record" into a fsmap */
474  STATIC int
xfs_getfsmap_rtdev_rtbitmap_helper(struct xfs_mount * mp,struct xfs_trans * tp,const struct xfs_rtalloc_rec * rec,void * priv)475  xfs_getfsmap_rtdev_rtbitmap_helper(
476  	struct xfs_mount		*mp,
477  	struct xfs_trans		*tp,
478  	const struct xfs_rtalloc_rec	*rec,
479  	void				*priv)
480  {
481  	struct xfs_getfsmap_info	*info = priv;
482  	struct xfs_rmap_irec		irec;
483  	xfs_rtblock_t			rtbno;
484  	xfs_daddr_t			rec_daddr, len_daddr;
485  
486  	rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
487  	rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
488  	irec.rm_startblock = rtbno;
489  
490  	rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize;
491  	len_daddr = XFS_FSB_TO_BB(mp, rtbno);
492  	irec.rm_blockcount = rtbno;
493  
494  	irec.rm_owner = XFS_RMAP_OWN_NULL;	/* "free" */
495  	irec.rm_offset = 0;
496  	irec.rm_flags = 0;
497  
498  	return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr);
499  }
500  
501  /* Execute a getfsmap query against the realtime device rtbitmap. */
502  STATIC int
xfs_getfsmap_rtdev_rtbitmap(struct xfs_trans * tp,const struct xfs_fsmap * keys,struct xfs_getfsmap_info * info)503  xfs_getfsmap_rtdev_rtbitmap(
504  	struct xfs_trans		*tp,
505  	const struct xfs_fsmap		*keys,
506  	struct xfs_getfsmap_info	*info)
507  {
508  
509  	struct xfs_rtalloc_rec		alow = { 0 };
510  	struct xfs_rtalloc_rec		ahigh = { 0 };
511  	struct xfs_mount		*mp = tp->t_mountp;
512  	xfs_rtblock_t			start_rtb;
513  	xfs_rtblock_t			end_rtb;
514  	uint64_t			eofs;
515  	int				error;
516  
517  	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize);
518  	if (keys[0].fmr_physical >= eofs)
519  		return 0;
520  	start_rtb = XFS_BB_TO_FSBT(mp,
521  				keys[0].fmr_physical + keys[0].fmr_length);
522  	end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
523  
524  	info->missing_owner = XFS_FMR_OWN_UNKNOWN;
525  
526  	/* Adjust the low key if we are continuing from where we left off. */
527  	if (keys[0].fmr_length > 0) {
528  		info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb);
529  		if (info->low_daddr >= eofs)
530  			return 0;
531  	}
532  
533  	trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb);
534  	trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb);
535  
536  	xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
537  
538  	/*
539  	 * Set up query parameters to return free rtextents covering the range
540  	 * we want.
541  	 */
542  	alow.ar_startext = start_rtb;
543  	ahigh.ar_startext = end_rtb;
544  	do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
545  	if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
546  		ahigh.ar_startext++;
547  	error = xfs_rtalloc_query_range(mp, tp, &alow, &ahigh,
548  			xfs_getfsmap_rtdev_rtbitmap_helper, info);
549  	if (error)
550  		goto err;
551  
552  	/*
553  	 * Report any gaps at the end of the rtbitmap by simulating a null
554  	 * rmap starting at the block after the end of the query range.
555  	 */
556  	info->last = true;
557  	ahigh.ar_startext = min(mp->m_sb.sb_rextents, ahigh.ar_startext);
558  
559  	error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info);
560  	if (error)
561  		goto err;
562  err:
563  	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
564  	return error;
565  }
566  #endif /* CONFIG_XFS_RT */
567  
568  static inline bool
rmap_not_shareable(struct xfs_mount * mp,const struct xfs_rmap_irec * r)569  rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r)
570  {
571  	if (!xfs_has_reflink(mp))
572  		return true;
573  	if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner))
574  		return true;
575  	if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
576  			   XFS_RMAP_UNWRITTEN))
577  		return true;
578  	return false;
579  }
580  
581  /* Execute a getfsmap query against the regular data device. */
582  STATIC int
__xfs_getfsmap_datadev(struct xfs_trans * tp,const struct xfs_fsmap * keys,struct xfs_getfsmap_info * info,int (* query_fn)(struct xfs_trans *,struct xfs_getfsmap_info *,struct xfs_btree_cur **,void *),void * priv)583  __xfs_getfsmap_datadev(
584  	struct xfs_trans		*tp,
585  	const struct xfs_fsmap		*keys,
586  	struct xfs_getfsmap_info	*info,
587  	int				(*query_fn)(struct xfs_trans *,
588  						    struct xfs_getfsmap_info *,
589  						    struct xfs_btree_cur **,
590  						    void *),
591  	void				*priv)
592  {
593  	struct xfs_mount		*mp = tp->t_mountp;
594  	struct xfs_perag		*pag;
595  	struct xfs_btree_cur		*bt_cur = NULL;
596  	xfs_fsblock_t			start_fsb;
597  	xfs_fsblock_t			end_fsb;
598  	xfs_agnumber_t			start_ag;
599  	xfs_agnumber_t			end_ag;
600  	uint64_t			eofs;
601  	int				error = 0;
602  
603  	eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
604  	if (keys[0].fmr_physical >= eofs)
605  		return 0;
606  	start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
607  	end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
608  
609  	/*
610  	 * Convert the fsmap low/high keys to AG based keys.  Initialize
611  	 * low to the fsmap low key and max out the high key to the end
612  	 * of the AG.
613  	 */
614  	info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
615  	error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
616  	if (error)
617  		return error;
618  	info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
619  	xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
620  
621  	/* Adjust the low key if we are continuing from where we left off. */
622  	if (info->low.rm_blockcount == 0) {
623  		/* No previous record from which to continue */
624  	} else if (rmap_not_shareable(mp, &info->low)) {
625  		/* Last record seen was an unshareable extent */
626  		info->low.rm_owner = 0;
627  		info->low.rm_offset = 0;
628  
629  		start_fsb += info->low.rm_blockcount;
630  		if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
631  			return 0;
632  	} else {
633  		/* Last record seen was a shareable file data extent */
634  		info->low.rm_offset += info->low.rm_blockcount;
635  	}
636  	info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
637  
638  	info->high.rm_startblock = -1U;
639  	info->high.rm_owner = ULLONG_MAX;
640  	info->high.rm_offset = ULLONG_MAX;
641  	info->high.rm_blockcount = 0;
642  	info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
643  
644  	start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
645  	end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
646  
647  	for_each_perag_range(mp, start_ag, end_ag, pag) {
648  		/*
649  		 * Set the AG high key from the fsmap high key if this
650  		 * is the last AG that we're querying.
651  		 */
652  		info->pag = pag;
653  		if (pag->pag_agno == end_ag) {
654  			info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
655  					end_fsb);
656  			info->high.rm_offset = XFS_BB_TO_FSBT(mp,
657  					keys[1].fmr_offset);
658  			error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
659  			if (error)
660  				break;
661  			xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
662  		}
663  
664  		if (bt_cur) {
665  			xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
666  			bt_cur = NULL;
667  			xfs_trans_brelse(tp, info->agf_bp);
668  			info->agf_bp = NULL;
669  		}
670  
671  		error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp);
672  		if (error)
673  			break;
674  
675  		trace_xfs_fsmap_low_key(mp, info->dev, pag->pag_agno,
676  				&info->low);
677  		trace_xfs_fsmap_high_key(mp, info->dev, pag->pag_agno,
678  				&info->high);
679  
680  		error = query_fn(tp, info, &bt_cur, priv);
681  		if (error)
682  			break;
683  
684  		/*
685  		 * Set the AG low key to the start of the AG prior to
686  		 * moving on to the next AG.
687  		 */
688  		if (pag->pag_agno == start_ag)
689  			memset(&info->low, 0, sizeof(info->low));
690  
691  		/*
692  		 * If this is the last AG, report any gap at the end of it
693  		 * before we drop the reference to the perag when the loop
694  		 * terminates.
695  		 */
696  		if (pag->pag_agno == end_ag) {
697  			info->last = true;
698  			error = query_fn(tp, info, &bt_cur, priv);
699  			if (error)
700  				break;
701  		}
702  		info->pag = NULL;
703  	}
704  
705  	if (bt_cur)
706  		xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
707  							 XFS_BTREE_NOERROR);
708  	if (info->agf_bp) {
709  		xfs_trans_brelse(tp, info->agf_bp);
710  		info->agf_bp = NULL;
711  	}
712  	if (info->pag) {
713  		xfs_perag_rele(info->pag);
714  		info->pag = NULL;
715  	} else if (pag) {
716  		/* loop termination case */
717  		xfs_perag_rele(pag);
718  	}
719  
720  	return error;
721  }
722  
723  /* Actually query the rmap btree. */
724  STATIC int
xfs_getfsmap_datadev_rmapbt_query(struct xfs_trans * tp,struct xfs_getfsmap_info * info,struct xfs_btree_cur ** curpp,void * priv)725  xfs_getfsmap_datadev_rmapbt_query(
726  	struct xfs_trans		*tp,
727  	struct xfs_getfsmap_info	*info,
728  	struct xfs_btree_cur		**curpp,
729  	void				*priv)
730  {
731  	/* Report any gap at the end of the last AG. */
732  	if (info->last)
733  		return xfs_getfsmap_datadev_helper(*curpp, &info->high, info);
734  
735  	/* Allocate cursor for this AG and query_range it. */
736  	*curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
737  			info->pag);
738  	return xfs_rmap_query_range(*curpp, &info->low, &info->high,
739  			xfs_getfsmap_datadev_helper, info);
740  }
741  
742  /* Execute a getfsmap query against the regular data device rmapbt. */
743  STATIC int
xfs_getfsmap_datadev_rmapbt(struct xfs_trans * tp,const struct xfs_fsmap * keys,struct xfs_getfsmap_info * info)744  xfs_getfsmap_datadev_rmapbt(
745  	struct xfs_trans		*tp,
746  	const struct xfs_fsmap		*keys,
747  	struct xfs_getfsmap_info	*info)
748  {
749  	info->missing_owner = XFS_FMR_OWN_FREE;
750  	return __xfs_getfsmap_datadev(tp, keys, info,
751  			xfs_getfsmap_datadev_rmapbt_query, NULL);
752  }
753  
754  /* Actually query the bno btree. */
755  STATIC int
xfs_getfsmap_datadev_bnobt_query(struct xfs_trans * tp,struct xfs_getfsmap_info * info,struct xfs_btree_cur ** curpp,void * priv)756  xfs_getfsmap_datadev_bnobt_query(
757  	struct xfs_trans		*tp,
758  	struct xfs_getfsmap_info	*info,
759  	struct xfs_btree_cur		**curpp,
760  	void				*priv)
761  {
762  	struct xfs_alloc_rec_incore	*key = priv;
763  
764  	/* Report any gap at the end of the last AG. */
765  	if (info->last)
766  		return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info);
767  
768  	/* Allocate cursor for this AG and query_range it. */
769  	*curpp = xfs_allocbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
770  			info->pag, XFS_BTNUM_BNO);
771  	key->ar_startblock = info->low.rm_startblock;
772  	key[1].ar_startblock = info->high.rm_startblock;
773  	return xfs_alloc_query_range(*curpp, key, &key[1],
774  			xfs_getfsmap_datadev_bnobt_helper, info);
775  }
776  
777  /* Execute a getfsmap query against the regular data device's bnobt. */
778  STATIC int
xfs_getfsmap_datadev_bnobt(struct xfs_trans * tp,const struct xfs_fsmap * keys,struct xfs_getfsmap_info * info)779  xfs_getfsmap_datadev_bnobt(
780  	struct xfs_trans		*tp,
781  	const struct xfs_fsmap		*keys,
782  	struct xfs_getfsmap_info	*info)
783  {
784  	struct xfs_alloc_rec_incore	akeys[2];
785  
786  	memset(akeys, 0, sizeof(akeys));
787  	info->missing_owner = XFS_FMR_OWN_UNKNOWN;
788  	return __xfs_getfsmap_datadev(tp, keys, info,
789  			xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
790  }
791  
792  /* Do we recognize the device? */
793  STATIC bool
xfs_getfsmap_is_valid_device(struct xfs_mount * mp,struct xfs_fsmap * fm)794  xfs_getfsmap_is_valid_device(
795  	struct xfs_mount	*mp,
796  	struct xfs_fsmap	*fm)
797  {
798  	if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
799  	    fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
800  		return true;
801  	if (mp->m_logdev_targp &&
802  	    fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
803  		return true;
804  	if (mp->m_rtdev_targp &&
805  	    fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
806  		return true;
807  	return false;
808  }
809  
810  /* Ensure that the low key is less than the high key. */
811  STATIC bool
xfs_getfsmap_check_keys(struct xfs_fsmap * low_key,struct xfs_fsmap * high_key)812  xfs_getfsmap_check_keys(
813  	struct xfs_fsmap		*low_key,
814  	struct xfs_fsmap		*high_key)
815  {
816  	if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
817  		if (low_key->fmr_offset)
818  			return false;
819  	}
820  	if (high_key->fmr_flags != -1U &&
821  	    (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
822  				    FMR_OF_EXTENT_MAP))) {
823  		if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
824  			return false;
825  	}
826  	if (high_key->fmr_length && high_key->fmr_length != -1ULL)
827  		return false;
828  
829  	if (low_key->fmr_device > high_key->fmr_device)
830  		return false;
831  	if (low_key->fmr_device < high_key->fmr_device)
832  		return true;
833  
834  	if (low_key->fmr_physical > high_key->fmr_physical)
835  		return false;
836  	if (low_key->fmr_physical < high_key->fmr_physical)
837  		return true;
838  
839  	if (low_key->fmr_owner > high_key->fmr_owner)
840  		return false;
841  	if (low_key->fmr_owner < high_key->fmr_owner)
842  		return true;
843  
844  	if (low_key->fmr_offset > high_key->fmr_offset)
845  		return false;
846  	if (low_key->fmr_offset < high_key->fmr_offset)
847  		return true;
848  
849  	return false;
850  }
851  
852  /*
853   * There are only two devices if we didn't configure RT devices at build time.
854   */
855  #ifdef CONFIG_XFS_RT
856  #define XFS_GETFSMAP_DEVS	3
857  #else
858  #define XFS_GETFSMAP_DEVS	2
859  #endif /* CONFIG_XFS_RT */
860  
861  /*
862   * Get filesystem's extents as described in head, and format for output. Fills
863   * in the supplied records array until there are no more reverse mappings to
864   * return or head.fmh_entries == head.fmh_count.  In the second case, this
865   * function returns -ECANCELED to indicate that more records would have been
866   * returned.
867   *
868   * Key to Confusion
869   * ----------------
870   * There are multiple levels of keys and counters at work here:
871   * xfs_fsmap_head.fmh_keys	-- low and high fsmap keys passed in;
872   *				   these reflect fs-wide sector addrs.
873   * dkeys			-- fmh_keys used to query each device;
874   *				   these are fmh_keys but w/ the low key
875   *				   bumped up by fmr_length.
876   * xfs_getfsmap_info.next_daddr	-- next disk addr we expect to see; this
877   *				   is how we detect gaps in the fsmap
878  				   records and report them.
879   * xfs_getfsmap_info.low/high	-- per-AG low/high keys computed from
880   *				   dkeys; used to query the metadata.
881   */
882  int
xfs_getfsmap(struct xfs_mount * mp,struct xfs_fsmap_head * head,struct fsmap * fsmap_recs)883  xfs_getfsmap(
884  	struct xfs_mount		*mp,
885  	struct xfs_fsmap_head		*head,
886  	struct fsmap			*fsmap_recs)
887  {
888  	struct xfs_trans		*tp = NULL;
889  	struct xfs_fsmap		dkeys[2];	/* per-dev keys */
890  	struct xfs_getfsmap_dev		handlers[XFS_GETFSMAP_DEVS];
891  	struct xfs_getfsmap_info	info = { NULL };
892  	bool				use_rmap;
893  	int				i;
894  	int				error = 0;
895  
896  	if (head->fmh_iflags & ~FMH_IF_VALID)
897  		return -EINVAL;
898  	if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
899  	    !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
900  		return -EINVAL;
901  	if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
902  		return -EINVAL;
903  
904  	use_rmap = xfs_has_rmapbt(mp) &&
905  		   has_capability_noaudit(current, CAP_SYS_ADMIN);
906  	head->fmh_entries = 0;
907  
908  	/* Set up our device handlers. */
909  	memset(handlers, 0, sizeof(handlers));
910  	handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
911  	if (use_rmap)
912  		handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
913  	else
914  		handlers[0].fn = xfs_getfsmap_datadev_bnobt;
915  	if (mp->m_logdev_targp != mp->m_ddev_targp) {
916  		handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
917  		handlers[1].fn = xfs_getfsmap_logdev;
918  	}
919  #ifdef CONFIG_XFS_RT
920  	if (mp->m_rtdev_targp) {
921  		handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
922  		handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
923  	}
924  #endif /* CONFIG_XFS_RT */
925  
926  	xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
927  			xfs_getfsmap_dev_compare);
928  
929  	/*
930  	 * To continue where we left off, we allow userspace to use the
931  	 * last mapping from a previous call as the low key of the next.
932  	 * This is identified by a non-zero length in the low key. We
933  	 * have to increment the low key in this scenario to ensure we
934  	 * don't return the same mapping again, and instead return the
935  	 * very next mapping.
936  	 *
937  	 * If the low key mapping refers to file data, the same physical
938  	 * blocks could be mapped to several other files/offsets.
939  	 * According to rmapbt record ordering, the minimal next
940  	 * possible record for the block range is the next starting
941  	 * offset in the same inode. Therefore, each fsmap backend bumps
942  	 * the file offset to continue the search appropriately.  For
943  	 * all other low key mapping types (attr blocks, metadata), each
944  	 * fsmap backend bumps the physical offset as there can be no
945  	 * other mapping for the same physical block range.
946  	 */
947  	dkeys[0] = head->fmh_keys[0];
948  	memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
949  
950  	info.next_daddr = head->fmh_keys[0].fmr_physical +
951  			  head->fmh_keys[0].fmr_length;
952  	info.fsmap_recs = fsmap_recs;
953  	info.head = head;
954  
955  	/* For each device we support... */
956  	for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
957  		/* Is this device within the range the user asked for? */
958  		if (!handlers[i].fn)
959  			continue;
960  		if (head->fmh_keys[0].fmr_device > handlers[i].dev)
961  			continue;
962  		if (head->fmh_keys[1].fmr_device < handlers[i].dev)
963  			break;
964  
965  		/*
966  		 * If this device number matches the high key, we have
967  		 * to pass the high key to the handler to limit the
968  		 * query results.  If the device number exceeds the
969  		 * low key, zero out the low key so that we get
970  		 * everything from the beginning.
971  		 */
972  		if (handlers[i].dev == head->fmh_keys[1].fmr_device)
973  			dkeys[1] = head->fmh_keys[1];
974  		if (handlers[i].dev > head->fmh_keys[0].fmr_device)
975  			memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
976  
977  		/*
978  		 * Grab an empty transaction so that we can use its recursive
979  		 * buffer locking abilities to detect cycles in the rmapbt
980  		 * without deadlocking.
981  		 */
982  		error = xfs_trans_alloc_empty(mp, &tp);
983  		if (error)
984  			break;
985  
986  		info.dev = handlers[i].dev;
987  		info.last = false;
988  		info.pag = NULL;
989  		info.low_daddr = XFS_BUF_DADDR_NULL;
990  		info.low.rm_blockcount = 0;
991  		error = handlers[i].fn(tp, dkeys, &info);
992  		if (error)
993  			break;
994  		xfs_trans_cancel(tp);
995  		tp = NULL;
996  		info.next_daddr = 0;
997  	}
998  
999  	if (tp)
1000  		xfs_trans_cancel(tp);
1001  	head->fmh_oflags = FMH_OF_DEV_T;
1002  	return error;
1003  }
1004