xref: /openbmc/linux/fs/xfs/scrub/bmap.c (revision c4c3c32d)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "xfs_bit.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_alloc.h"
18 #include "xfs_bmap.h"
19 #include "xfs_bmap_btree.h"
20 #include "xfs_rmap.h"
21 #include "xfs_rmap_btree.h"
22 #include "scrub/scrub.h"
23 #include "scrub/common.h"
24 #include "scrub/btree.h"
25 #include "xfs_ag.h"
26 
27 /* Set us up with an inode's bmap. */
28 int
29 xchk_setup_inode_bmap(
30 	struct xfs_scrub	*sc)
31 {
32 	int			error;
33 
34 	if (xchk_need_intent_drain(sc))
35 		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
36 
37 	error = xchk_iget_for_scrubbing(sc);
38 	if (error)
39 		goto out;
40 
41 	sc->ilock_flags = XFS_IOLOCK_EXCL;
42 	xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
43 
44 	/*
45 	 * We don't want any ephemeral data/cow fork updates sitting around
46 	 * while we inspect block mappings, so wait for directio to finish
47 	 * and flush dirty data if we have delalloc reservations.
48 	 */
49 	if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
50 	    sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
51 		struct address_space	*mapping = VFS_I(sc->ip)->i_mapping;
52 
53 		sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
54 		xfs_ilock(sc->ip, XFS_MMAPLOCK_EXCL);
55 
56 		inode_dio_wait(VFS_I(sc->ip));
57 
58 		/*
59 		 * Try to flush all incore state to disk before we examine the
60 		 * space mappings for the data fork.  Leave accumulated errors
61 		 * in the mapping for the writer threads to consume.
62 		 *
63 		 * On ENOSPC or EIO writeback errors, we continue into the
64 		 * extent mapping checks because write failures do not
65 		 * necessarily imply anything about the correctness of the file
66 		 * metadata.  The metadata and the file data could be on
67 		 * completely separate devices; a media failure might only
68 		 * affect a subset of the disk, etc.  We can handle delalloc
69 		 * extents in the scrubber, so leaving them in memory is fine.
70 		 */
71 		error = filemap_fdatawrite(mapping);
72 		if (!error)
73 			error = filemap_fdatawait_keep_errors(mapping);
74 		if (error && (error != -ENOSPC && error != -EIO))
75 			goto out;
76 	}
77 
78 	/* Got the inode, lock it and we're ready to go. */
79 	error = xchk_trans_alloc(sc, 0);
80 	if (error)
81 		goto out;
82 	sc->ilock_flags |= XFS_ILOCK_EXCL;
83 	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
84 
85 out:
86 	/* scrub teardown will unlock and release the inode */
87 	return error;
88 }
89 
90 /*
91  * Inode fork block mapping (BMBT) scrubber.
92  * More complex than the others because we have to scrub
93  * all the extents regardless of whether or not the fork
94  * is in btree format.
95  */
96 
97 struct xchk_bmap_info {
98 	struct xfs_scrub	*sc;
99 
100 	/* Incore extent tree cursor */
101 	struct xfs_iext_cursor	icur;
102 
103 	/* Previous fork mapping that we examined */
104 	struct xfs_bmbt_irec	prev_rec;
105 
106 	/* Is this a realtime fork? */
107 	bool			is_rt;
108 
109 	/* May mappings point to shared space? */
110 	bool			is_shared;
111 
112 	/* Was the incore extent tree loaded? */
113 	bool			was_loaded;
114 
115 	/* Which inode fork are we checking? */
116 	int			whichfork;
117 };
118 
119 /* Look for a corresponding rmap for this irec. */
120 static inline bool
121 xchk_bmap_get_rmap(
122 	struct xchk_bmap_info	*info,
123 	struct xfs_bmbt_irec	*irec,
124 	xfs_agblock_t		agbno,
125 	uint64_t		owner,
126 	struct xfs_rmap_irec	*rmap)
127 {
128 	xfs_fileoff_t		offset;
129 	unsigned int		rflags = 0;
130 	int			has_rmap;
131 	int			error;
132 
133 	if (info->whichfork == XFS_ATTR_FORK)
134 		rflags |= XFS_RMAP_ATTR_FORK;
135 	if (irec->br_state == XFS_EXT_UNWRITTEN)
136 		rflags |= XFS_RMAP_UNWRITTEN;
137 
138 	/*
139 	 * CoW staging extents are owned (on disk) by the refcountbt, so
140 	 * their rmaps do not have offsets.
141 	 */
142 	if (info->whichfork == XFS_COW_FORK)
143 		offset = 0;
144 	else
145 		offset = irec->br_startoff;
146 
147 	/*
148 	 * If the caller thinks this could be a shared bmbt extent (IOWs,
149 	 * any data fork extent of a reflink inode) then we have to use the
150 	 * range rmap lookup to make sure we get the correct owner/offset.
151 	 */
152 	if (info->is_shared) {
153 		error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
154 				owner, offset, rflags, rmap, &has_rmap);
155 	} else {
156 		error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
157 				owner, offset, rflags, rmap, &has_rmap);
158 	}
159 	if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
160 		return false;
161 
162 	if (!has_rmap)
163 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
164 			irec->br_startoff);
165 	return has_rmap;
166 }
167 
168 /* Make sure that we have rmapbt records for this data/attr fork extent. */
169 STATIC void
170 xchk_bmap_xref_rmap(
171 	struct xchk_bmap_info	*info,
172 	struct xfs_bmbt_irec	*irec,
173 	xfs_agblock_t		agbno)
174 {
175 	struct xfs_rmap_irec	rmap;
176 	unsigned long long	rmap_end;
177 	uint64_t		owner = info->sc->ip->i_ino;
178 
179 	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
180 		return;
181 
182 	/* Find the rmap record for this irec. */
183 	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
184 		return;
185 
186 	/*
187 	 * The rmap must be an exact match for this incore file mapping record,
188 	 * which may have arisen from multiple ondisk records.
189 	 */
190 	if (rmap.rm_startblock != agbno)
191 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
192 				irec->br_startoff);
193 
194 	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
195 	if (rmap_end != agbno + irec->br_blockcount)
196 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
197 				irec->br_startoff);
198 
199 	/* Check the logical offsets. */
200 	if (rmap.rm_offset != irec->br_startoff)
201 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
202 				irec->br_startoff);
203 
204 	rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
205 	if (rmap_end != irec->br_startoff + irec->br_blockcount)
206 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
207 				irec->br_startoff);
208 
209 	/* Check the owner */
210 	if (rmap.rm_owner != owner)
211 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
212 				irec->br_startoff);
213 
214 	/*
215 	 * Check for discrepancies between the unwritten flag in the irec and
216 	 * the rmap.  Note that the (in-memory) CoW fork distinguishes between
217 	 * unwritten and written extents, but we don't track that in the rmap
218 	 * records because the blocks are owned (on-disk) by the refcountbt,
219 	 * which doesn't track unwritten state.
220 	 */
221 	if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
222 	    !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
223 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
224 				irec->br_startoff);
225 
226 	if (!!(info->whichfork == XFS_ATTR_FORK) !=
227 	    !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
228 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
229 				irec->br_startoff);
230 	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
231 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
232 				irec->br_startoff);
233 }
234 
235 /* Make sure that we have rmapbt records for this COW fork extent. */
236 STATIC void
237 xchk_bmap_xref_rmap_cow(
238 	struct xchk_bmap_info	*info,
239 	struct xfs_bmbt_irec	*irec,
240 	xfs_agblock_t		agbno)
241 {
242 	struct xfs_rmap_irec	rmap;
243 	unsigned long long	rmap_end;
244 	uint64_t		owner = XFS_RMAP_OWN_COW;
245 
246 	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
247 		return;
248 
249 	/* Find the rmap record for this irec. */
250 	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
251 		return;
252 
253 	/*
254 	 * CoW staging extents are owned by the refcount btree, so the rmap
255 	 * can start before and end after the physical space allocated to this
256 	 * mapping.  There are no offsets to check.
257 	 */
258 	if (rmap.rm_startblock > agbno)
259 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
260 				irec->br_startoff);
261 
262 	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
263 	if (rmap_end < agbno + irec->br_blockcount)
264 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
265 				irec->br_startoff);
266 
267 	/* Check the owner */
268 	if (rmap.rm_owner != owner)
269 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
270 				irec->br_startoff);
271 
272 	/*
273 	 * No flags allowed.  Note that the (in-memory) CoW fork distinguishes
274 	 * between unwritten and written extents, but we don't track that in
275 	 * the rmap records because the blocks are owned (on-disk) by the
276 	 * refcountbt, which doesn't track unwritten state.
277 	 */
278 	if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
279 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
280 				irec->br_startoff);
281 	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
282 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
283 				irec->br_startoff);
284 	if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
285 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
286 				irec->br_startoff);
287 }
288 
289 /* Cross-reference a single rtdev extent record. */
290 STATIC void
291 xchk_bmap_rt_iextent_xref(
292 	struct xfs_inode	*ip,
293 	struct xchk_bmap_info	*info,
294 	struct xfs_bmbt_irec	*irec)
295 {
296 	xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
297 			irec->br_blockcount);
298 }
299 
300 /* Cross-reference a single datadev extent record. */
301 STATIC void
302 xchk_bmap_iextent_xref(
303 	struct xfs_inode	*ip,
304 	struct xchk_bmap_info	*info,
305 	struct xfs_bmbt_irec	*irec)
306 {
307 	struct xfs_owner_info	oinfo;
308 	struct xfs_mount	*mp = info->sc->mp;
309 	xfs_agnumber_t		agno;
310 	xfs_agblock_t		agbno;
311 	xfs_extlen_t		len;
312 	int			error;
313 
314 	agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
315 	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
316 	len = irec->br_blockcount;
317 
318 	error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
319 	if (!xchk_fblock_process_error(info->sc, info->whichfork,
320 			irec->br_startoff, &error))
321 		goto out_free;
322 
323 	xchk_xref_is_used_space(info->sc, agbno, len);
324 	xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
325 	switch (info->whichfork) {
326 	case XFS_DATA_FORK:
327 		xchk_bmap_xref_rmap(info, irec, agbno);
328 		if (!xfs_is_reflink_inode(info->sc->ip)) {
329 			xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
330 					info->whichfork, irec->br_startoff);
331 			xchk_xref_is_only_owned_by(info->sc, agbno,
332 					irec->br_blockcount, &oinfo);
333 			xchk_xref_is_not_shared(info->sc, agbno,
334 					irec->br_blockcount);
335 		}
336 		xchk_xref_is_not_cow_staging(info->sc, agbno,
337 				irec->br_blockcount);
338 		break;
339 	case XFS_ATTR_FORK:
340 		xchk_bmap_xref_rmap(info, irec, agbno);
341 		xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
342 				info->whichfork, irec->br_startoff);
343 		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
344 				&oinfo);
345 		xchk_xref_is_not_shared(info->sc, agbno,
346 				irec->br_blockcount);
347 		xchk_xref_is_not_cow_staging(info->sc, agbno,
348 				irec->br_blockcount);
349 		break;
350 	case XFS_COW_FORK:
351 		xchk_bmap_xref_rmap_cow(info, irec, agbno);
352 		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
353 				&XFS_RMAP_OINFO_COW);
354 		xchk_xref_is_cow_staging(info->sc, agbno,
355 				irec->br_blockcount);
356 		xchk_xref_is_not_shared(info->sc, agbno,
357 				irec->br_blockcount);
358 		break;
359 	}
360 
361 out_free:
362 	xchk_ag_free(info->sc, &info->sc->sa);
363 }
364 
365 /*
366  * Directories and attr forks should never have blocks that can't be addressed
367  * by a xfs_dablk_t.
368  */
369 STATIC void
370 xchk_bmap_dirattr_extent(
371 	struct xfs_inode	*ip,
372 	struct xchk_bmap_info	*info,
373 	struct xfs_bmbt_irec	*irec)
374 {
375 	struct xfs_mount	*mp = ip->i_mount;
376 	xfs_fileoff_t		off;
377 
378 	if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
379 		return;
380 
381 	if (!xfs_verify_dablk(mp, irec->br_startoff))
382 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
383 				irec->br_startoff);
384 
385 	off = irec->br_startoff + irec->br_blockcount - 1;
386 	if (!xfs_verify_dablk(mp, off))
387 		xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
388 }
389 
390 /* Scrub a single extent record. */
391 STATIC void
392 xchk_bmap_iextent(
393 	struct xfs_inode	*ip,
394 	struct xchk_bmap_info	*info,
395 	struct xfs_bmbt_irec	*irec)
396 {
397 	struct xfs_mount	*mp = info->sc->mp;
398 
399 	/*
400 	 * Check for out-of-order extents.  This record could have come
401 	 * from the incore list, for which there is no ordering check.
402 	 */
403 	if (irec->br_startoff < info->prev_rec.br_startoff +
404 				info->prev_rec.br_blockcount)
405 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
406 				irec->br_startoff);
407 
408 	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
409 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
410 				irec->br_startoff);
411 
412 	xchk_bmap_dirattr_extent(ip, info, irec);
413 
414 	/* Make sure the extent points to a valid place. */
415 	if (info->is_rt &&
416 	    !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
417 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
418 				irec->br_startoff);
419 	if (!info->is_rt &&
420 	    !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
421 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
422 				irec->br_startoff);
423 
424 	/* We don't allow unwritten extents on attr forks. */
425 	if (irec->br_state == XFS_EXT_UNWRITTEN &&
426 	    info->whichfork == XFS_ATTR_FORK)
427 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
428 				irec->br_startoff);
429 
430 	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
431 		return;
432 
433 	if (info->is_rt)
434 		xchk_bmap_rt_iextent_xref(ip, info, irec);
435 	else
436 		xchk_bmap_iextent_xref(ip, info, irec);
437 }
438 
439 /* Scrub a bmbt record. */
440 STATIC int
441 xchk_bmapbt_rec(
442 	struct xchk_btree	*bs,
443 	const union xfs_btree_rec *rec)
444 {
445 	struct xfs_bmbt_irec	irec;
446 	struct xfs_bmbt_irec	iext_irec;
447 	struct xfs_iext_cursor	icur;
448 	struct xchk_bmap_info	*info = bs->private;
449 	struct xfs_inode	*ip = bs->cur->bc_ino.ip;
450 	struct xfs_buf		*bp = NULL;
451 	struct xfs_btree_block	*block;
452 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, info->whichfork);
453 	uint64_t		owner;
454 	int			i;
455 
456 	/*
457 	 * Check the owners of the btree blocks up to the level below
458 	 * the root since the verifiers don't do that.
459 	 */
460 	if (xfs_has_crc(bs->cur->bc_mp) &&
461 	    bs->cur->bc_levels[0].ptr == 1) {
462 		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
463 			block = xfs_btree_get_block(bs->cur, i, &bp);
464 			owner = be64_to_cpu(block->bb_u.l.bb_owner);
465 			if (owner != ip->i_ino)
466 				xchk_fblock_set_corrupt(bs->sc,
467 						info->whichfork, 0);
468 		}
469 	}
470 
471 	/*
472 	 * Check that the incore extent tree contains an extent that matches
473 	 * this one exactly.  We validate those cached bmaps later, so we don't
474 	 * need to check them here.  If the incore extent tree was just loaded
475 	 * from disk by the scrubber, we assume that its contents match what's
476 	 * on disk (we still hold the ILOCK) and skip the equivalence check.
477 	 */
478 	if (!info->was_loaded)
479 		return 0;
480 
481 	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
482 	if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
483 		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
484 				irec.br_startoff);
485 		return 0;
486 	}
487 
488 	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
489 				&iext_irec) ||
490 	    irec.br_startoff != iext_irec.br_startoff ||
491 	    irec.br_startblock != iext_irec.br_startblock ||
492 	    irec.br_blockcount != iext_irec.br_blockcount ||
493 	    irec.br_state != iext_irec.br_state)
494 		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
495 				irec.br_startoff);
496 	return 0;
497 }
498 
499 /* Scan the btree records. */
500 STATIC int
501 xchk_bmap_btree(
502 	struct xfs_scrub	*sc,
503 	int			whichfork,
504 	struct xchk_bmap_info	*info)
505 {
506 	struct xfs_owner_info	oinfo;
507 	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
508 	struct xfs_mount	*mp = sc->mp;
509 	struct xfs_inode	*ip = sc->ip;
510 	struct xfs_btree_cur	*cur;
511 	int			error;
512 
513 	/* Load the incore bmap cache if it's not loaded. */
514 	info->was_loaded = !xfs_need_iread_extents(ifp);
515 
516 	error = xfs_iread_extents(sc->tp, ip, whichfork);
517 	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
518 		goto out;
519 
520 	/* Check the btree structure. */
521 	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
522 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
523 	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
524 	xfs_btree_del_cursor(cur, error);
525 out:
526 	return error;
527 }
528 
529 struct xchk_bmap_check_rmap_info {
530 	struct xfs_scrub	*sc;
531 	int			whichfork;
532 	struct xfs_iext_cursor	icur;
533 };
534 
535 /* Can we find bmaps that fit this rmap? */
536 STATIC int
537 xchk_bmap_check_rmap(
538 	struct xfs_btree_cur		*cur,
539 	const struct xfs_rmap_irec	*rec,
540 	void				*priv)
541 {
542 	struct xfs_bmbt_irec		irec;
543 	struct xfs_rmap_irec		check_rec;
544 	struct xchk_bmap_check_rmap_info	*sbcri = priv;
545 	struct xfs_ifork		*ifp;
546 	struct xfs_scrub		*sc = sbcri->sc;
547 	bool				have_map;
548 
549 	/* Is this even the right fork? */
550 	if (rec->rm_owner != sc->ip->i_ino)
551 		return 0;
552 	if ((sbcri->whichfork == XFS_ATTR_FORK) ^
553 	    !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
554 		return 0;
555 	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
556 		return 0;
557 
558 	/* Now look up the bmbt record. */
559 	ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
560 	if (!ifp) {
561 		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
562 				rec->rm_offset);
563 		goto out;
564 	}
565 	have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
566 			&sbcri->icur, &irec);
567 	if (!have_map)
568 		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
569 				rec->rm_offset);
570 	/*
571 	 * bmap extent record lengths are constrained to 2^21 blocks in length
572 	 * because of space constraints in the on-disk metadata structure.
573 	 * However, rmap extent record lengths are constrained only by AG
574 	 * length, so we have to loop through the bmbt to make sure that the
575 	 * entire rmap is covered by bmbt records.
576 	 */
577 	check_rec = *rec;
578 	while (have_map) {
579 		if (irec.br_startoff != check_rec.rm_offset)
580 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
581 					check_rec.rm_offset);
582 		if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
583 				cur->bc_ag.pag->pag_agno,
584 				check_rec.rm_startblock))
585 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
586 					check_rec.rm_offset);
587 		if (irec.br_blockcount > check_rec.rm_blockcount)
588 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
589 					check_rec.rm_offset);
590 		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
591 			break;
592 		check_rec.rm_startblock += irec.br_blockcount;
593 		check_rec.rm_offset += irec.br_blockcount;
594 		check_rec.rm_blockcount -= irec.br_blockcount;
595 		if (check_rec.rm_blockcount == 0)
596 			break;
597 		have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
598 		if (!have_map)
599 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
600 					check_rec.rm_offset);
601 	}
602 
603 out:
604 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
605 		return -ECANCELED;
606 	return 0;
607 }
608 
609 /* Make sure each rmap has a corresponding bmbt entry. */
610 STATIC int
611 xchk_bmap_check_ag_rmaps(
612 	struct xfs_scrub		*sc,
613 	int				whichfork,
614 	struct xfs_perag		*pag)
615 {
616 	struct xchk_bmap_check_rmap_info	sbcri;
617 	struct xfs_btree_cur		*cur;
618 	struct xfs_buf			*agf;
619 	int				error;
620 
621 	error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
622 	if (error)
623 		return error;
624 
625 	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
626 
627 	sbcri.sc = sc;
628 	sbcri.whichfork = whichfork;
629 	error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
630 	if (error == -ECANCELED)
631 		error = 0;
632 
633 	xfs_btree_del_cursor(cur, error);
634 	xfs_trans_brelse(sc->tp, agf);
635 	return error;
636 }
637 
638 /*
639  * Decide if we want to walk every rmap btree in the fs to make sure that each
640  * rmap for this file fork has corresponding bmbt entries.
641  */
642 static bool
643 xchk_bmap_want_check_rmaps(
644 	struct xchk_bmap_info	*info)
645 {
646 	struct xfs_scrub	*sc = info->sc;
647 	struct xfs_ifork	*ifp;
648 
649 	if (!xfs_has_rmapbt(sc->mp))
650 		return false;
651 	if (info->whichfork == XFS_COW_FORK)
652 		return false;
653 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
654 		return false;
655 
656 	/* Don't support realtime rmap checks yet. */
657 	if (info->is_rt)
658 		return false;
659 
660 	/*
661 	 * The inode repair code zaps broken inode forks by resetting them back
662 	 * to EXTENTS format and zero extent records.  If we encounter a fork
663 	 * in this state along with evidence that the fork isn't supposed to be
664 	 * empty, we need to scan the reverse mappings to decide if we're going
665 	 * to rebuild the fork.  Data forks with nonzero file size are scanned.
666 	 * xattr forks are never empty of content, so they are always scanned.
667 	 */
668 	ifp = xfs_ifork_ptr(sc->ip, info->whichfork);
669 	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) {
670 		if (info->whichfork == XFS_DATA_FORK &&
671 		    i_size_read(VFS_I(sc->ip)) == 0)
672 			return false;
673 
674 		return true;
675 	}
676 
677 	return false;
678 }
679 
680 /* Make sure each rmap has a corresponding bmbt entry. */
681 STATIC int
682 xchk_bmap_check_rmaps(
683 	struct xfs_scrub	*sc,
684 	int			whichfork)
685 {
686 	struct xfs_perag	*pag;
687 	xfs_agnumber_t		agno;
688 	int			error;
689 
690 	for_each_perag(sc->mp, agno, pag) {
691 		error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
692 		if (error ||
693 		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
694 			xfs_perag_rele(pag);
695 			return error;
696 		}
697 	}
698 
699 	return 0;
700 }
701 
702 /* Scrub a delalloc reservation from the incore extent map tree. */
703 STATIC void
704 xchk_bmap_iextent_delalloc(
705 	struct xfs_inode	*ip,
706 	struct xchk_bmap_info	*info,
707 	struct xfs_bmbt_irec	*irec)
708 {
709 	struct xfs_mount	*mp = info->sc->mp;
710 
711 	/*
712 	 * Check for out-of-order extents.  This record could have come
713 	 * from the incore list, for which there is no ordering check.
714 	 */
715 	if (irec->br_startoff < info->prev_rec.br_startoff +
716 				info->prev_rec.br_blockcount)
717 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
718 				irec->br_startoff);
719 
720 	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
721 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
722 				irec->br_startoff);
723 
724 	/* Make sure the extent points to a valid place. */
725 	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
726 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
727 				irec->br_startoff);
728 }
729 
730 /* Decide if this individual fork mapping is ok. */
731 static bool
732 xchk_bmap_iext_mapping(
733 	struct xchk_bmap_info		*info,
734 	const struct xfs_bmbt_irec	*irec)
735 {
736 	/* There should never be a "hole" extent in either extent list. */
737 	if (irec->br_startblock == HOLESTARTBLOCK)
738 		return false;
739 	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
740 		return false;
741 	return true;
742 }
743 
744 /* Are these two mappings contiguous with each other? */
745 static inline bool
746 xchk_are_bmaps_contiguous(
747 	const struct xfs_bmbt_irec	*b1,
748 	const struct xfs_bmbt_irec	*b2)
749 {
750 	/* Don't try to combine unallocated mappings. */
751 	if (!xfs_bmap_is_real_extent(b1))
752 		return false;
753 	if (!xfs_bmap_is_real_extent(b2))
754 		return false;
755 
756 	/* Does b2 come right after b1 in the logical and physical range? */
757 	if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
758 		return false;
759 	if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
760 		return false;
761 	if (b1->br_state != b2->br_state)
762 		return false;
763 	return true;
764 }
765 
766 /*
767  * Walk the incore extent records, accumulating consecutive contiguous records
768  * into a single incore mapping.  Returns true if @irec has been set to a
769  * mapping or false if there are no more mappings.  Caller must ensure that
770  * @info.icur is zeroed before the first call.
771  */
772 static bool
773 xchk_bmap_iext_iter(
774 	struct xchk_bmap_info	*info,
775 	struct xfs_bmbt_irec	*irec)
776 {
777 	struct xfs_bmbt_irec	got;
778 	struct xfs_ifork	*ifp;
779 	unsigned int		nr = 0;
780 
781 	ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
782 
783 	/* Advance to the next iextent record and check the mapping. */
784 	xfs_iext_next(ifp, &info->icur);
785 	if (!xfs_iext_get_extent(ifp, &info->icur, irec))
786 		return false;
787 
788 	if (!xchk_bmap_iext_mapping(info, irec)) {
789 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
790 				irec->br_startoff);
791 		return false;
792 	}
793 	nr++;
794 
795 	/*
796 	 * Iterate subsequent iextent records and merge them with the one
797 	 * that we just read, if possible.
798 	 */
799 	while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
800 		if (!xchk_are_bmaps_contiguous(irec, &got))
801 			break;
802 
803 		if (!xchk_bmap_iext_mapping(info, &got)) {
804 			xchk_fblock_set_corrupt(info->sc, info->whichfork,
805 					got.br_startoff);
806 			return false;
807 		}
808 		nr++;
809 
810 		irec->br_blockcount += got.br_blockcount;
811 		xfs_iext_next(ifp, &info->icur);
812 	}
813 
814 	/*
815 	 * If the merged mapping could be expressed with fewer bmbt records
816 	 * than we actually found, notify the user that this fork could be
817 	 * optimized.  CoW forks only exist in memory so we ignore them.
818 	 */
819 	if (nr > 1 && info->whichfork != XFS_COW_FORK &&
820 	    howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
821 		xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
822 
823 	return true;
824 }
825 
826 /*
827  * Scrub an inode fork's block mappings.
828  *
829  * First we scan every record in every btree block, if applicable.
830  * Then we unconditionally scan the incore extent cache.
831  */
832 STATIC int
833 xchk_bmap(
834 	struct xfs_scrub	*sc,
835 	int			whichfork)
836 {
837 	struct xfs_bmbt_irec	irec;
838 	struct xchk_bmap_info	info = { NULL };
839 	struct xfs_mount	*mp = sc->mp;
840 	struct xfs_inode	*ip = sc->ip;
841 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
842 	xfs_fileoff_t		endoff;
843 	int			error = 0;
844 
845 	/* Non-existent forks can be ignored. */
846 	if (!ifp)
847 		goto out;
848 
849 	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
850 	info.whichfork = whichfork;
851 	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
852 	info.sc = sc;
853 
854 	switch (whichfork) {
855 	case XFS_COW_FORK:
856 		/* No CoW forks on non-reflink inodes/filesystems. */
857 		if (!xfs_is_reflink_inode(ip)) {
858 			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
859 			goto out;
860 		}
861 		break;
862 	case XFS_ATTR_FORK:
863 		if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
864 			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
865 		break;
866 	default:
867 		ASSERT(whichfork == XFS_DATA_FORK);
868 		break;
869 	}
870 
871 	/* Check the fork values */
872 	switch (ifp->if_format) {
873 	case XFS_DINODE_FMT_UUID:
874 	case XFS_DINODE_FMT_DEV:
875 	case XFS_DINODE_FMT_LOCAL:
876 		/* No mappings to check. */
877 		if (whichfork == XFS_COW_FORK)
878 			xchk_fblock_set_corrupt(sc, whichfork, 0);
879 		goto out;
880 	case XFS_DINODE_FMT_EXTENTS:
881 		break;
882 	case XFS_DINODE_FMT_BTREE:
883 		if (whichfork == XFS_COW_FORK) {
884 			xchk_fblock_set_corrupt(sc, whichfork, 0);
885 			goto out;
886 		}
887 
888 		error = xchk_bmap_btree(sc, whichfork, &info);
889 		if (error)
890 			goto out;
891 		break;
892 	default:
893 		xchk_fblock_set_corrupt(sc, whichfork, 0);
894 		goto out;
895 	}
896 
897 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
898 		goto out;
899 
900 	/* Find the offset of the last extent in the mapping. */
901 	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
902 	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
903 		goto out;
904 
905 	/*
906 	 * Scrub extent records.  We use a special iterator function here that
907 	 * combines adjacent mappings if they are logically and physically
908 	 * contiguous.   For large allocations that require multiple bmbt
909 	 * records, this reduces the number of cross-referencing calls, which
910 	 * reduces runtime.  Cross referencing with the rmap is simpler because
911 	 * the rmap must match the combined mapping exactly.
912 	 */
913 	while (xchk_bmap_iext_iter(&info, &irec)) {
914 		if (xchk_should_terminate(sc, &error) ||
915 		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
916 			goto out;
917 
918 		if (irec.br_startoff >= endoff) {
919 			xchk_fblock_set_corrupt(sc, whichfork,
920 					irec.br_startoff);
921 			goto out;
922 		}
923 
924 		if (isnullstartblock(irec.br_startblock))
925 			xchk_bmap_iextent_delalloc(ip, &info, &irec);
926 		else
927 			xchk_bmap_iextent(ip, &info, &irec);
928 		memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
929 	}
930 
931 	if (xchk_bmap_want_check_rmaps(&info)) {
932 		error = xchk_bmap_check_rmaps(sc, whichfork);
933 		if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
934 			goto out;
935 	}
936 out:
937 	return error;
938 }
939 
940 /* Scrub an inode's data fork. */
941 int
942 xchk_bmap_data(
943 	struct xfs_scrub	*sc)
944 {
945 	return xchk_bmap(sc, XFS_DATA_FORK);
946 }
947 
948 /* Scrub an inode's attr fork. */
949 int
950 xchk_bmap_attr(
951 	struct xfs_scrub	*sc)
952 {
953 	return xchk_bmap(sc, XFS_ATTR_FORK);
954 }
955 
956 /* Scrub an inode's CoW fork. */
957 int
958 xchk_bmap_cow(
959 	struct xfs_scrub	*sc)
960 {
961 	if (!xfs_is_reflink_inode(sc->ip))
962 		return -ENOENT;
963 
964 	return xchk_bmap(sc, XFS_COW_FORK);
965 }
966