xref: /openbmc/linux/fs/xfs/scrub/bmap.c (revision a5d2bb06)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "xfs_bit.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_alloc.h"
18 #include "xfs_bmap.h"
19 #include "xfs_bmap_btree.h"
20 #include "xfs_rmap.h"
21 #include "xfs_rmap_btree.h"
22 #include "scrub/scrub.h"
23 #include "scrub/common.h"
24 #include "scrub/btree.h"
25 #include "xfs_ag.h"
26 
27 /* Set us up with an inode's bmap. */
28 int
29 xchk_setup_inode_bmap(
30 	struct xfs_scrub	*sc)
31 {
32 	int			error;
33 
34 	if (xchk_need_intent_drain(sc))
35 		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
36 
37 	error = xchk_iget_for_scrubbing(sc);
38 	if (error)
39 		goto out;
40 
41 	xchk_ilock(sc, XFS_IOLOCK_EXCL);
42 
43 	/*
44 	 * We don't want any ephemeral data/cow fork updates sitting around
45 	 * while we inspect block mappings, so wait for directio to finish
46 	 * and flush dirty data if we have delalloc reservations.
47 	 */
48 	if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
49 	    sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
50 		struct address_space	*mapping = VFS_I(sc->ip)->i_mapping;
51 
52 		xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
53 
54 		inode_dio_wait(VFS_I(sc->ip));
55 
56 		/*
57 		 * Try to flush all incore state to disk before we examine the
58 		 * space mappings for the data fork.  Leave accumulated errors
59 		 * in the mapping for the writer threads to consume.
60 		 *
61 		 * On ENOSPC or EIO writeback errors, we continue into the
62 		 * extent mapping checks because write failures do not
63 		 * necessarily imply anything about the correctness of the file
64 		 * metadata.  The metadata and the file data could be on
65 		 * completely separate devices; a media failure might only
66 		 * affect a subset of the disk, etc.  We can handle delalloc
67 		 * extents in the scrubber, so leaving them in memory is fine.
68 		 */
69 		error = filemap_fdatawrite(mapping);
70 		if (!error)
71 			error = filemap_fdatawait_keep_errors(mapping);
72 		if (error && (error != -ENOSPC && error != -EIO))
73 			goto out;
74 	}
75 
76 	/* Got the inode, lock it and we're ready to go. */
77 	error = xchk_trans_alloc(sc, 0);
78 	if (error)
79 		goto out;
80 
81 	xchk_ilock(sc, XFS_ILOCK_EXCL);
82 out:
83 	/* scrub teardown will unlock and release the inode */
84 	return error;
85 }
86 
87 /*
88  * Inode fork block mapping (BMBT) scrubber.
89  * More complex than the others because we have to scrub
90  * all the extents regardless of whether or not the fork
91  * is in btree format.
92  */
93 
94 struct xchk_bmap_info {
95 	struct xfs_scrub	*sc;
96 
97 	/* Incore extent tree cursor */
98 	struct xfs_iext_cursor	icur;
99 
100 	/* Previous fork mapping that we examined */
101 	struct xfs_bmbt_irec	prev_rec;
102 
103 	/* Is this a realtime fork? */
104 	bool			is_rt;
105 
106 	/* May mappings point to shared space? */
107 	bool			is_shared;
108 
109 	/* Was the incore extent tree loaded? */
110 	bool			was_loaded;
111 
112 	/* Which inode fork are we checking? */
113 	int			whichfork;
114 };
115 
116 /* Look for a corresponding rmap for this irec. */
117 static inline bool
118 xchk_bmap_get_rmap(
119 	struct xchk_bmap_info	*info,
120 	struct xfs_bmbt_irec	*irec,
121 	xfs_agblock_t		agbno,
122 	uint64_t		owner,
123 	struct xfs_rmap_irec	*rmap)
124 {
125 	xfs_fileoff_t		offset;
126 	unsigned int		rflags = 0;
127 	int			has_rmap;
128 	int			error;
129 
130 	if (info->whichfork == XFS_ATTR_FORK)
131 		rflags |= XFS_RMAP_ATTR_FORK;
132 	if (irec->br_state == XFS_EXT_UNWRITTEN)
133 		rflags |= XFS_RMAP_UNWRITTEN;
134 
135 	/*
136 	 * CoW staging extents are owned (on disk) by the refcountbt, so
137 	 * their rmaps do not have offsets.
138 	 */
139 	if (info->whichfork == XFS_COW_FORK)
140 		offset = 0;
141 	else
142 		offset = irec->br_startoff;
143 
144 	/*
145 	 * If the caller thinks this could be a shared bmbt extent (IOWs,
146 	 * any data fork extent of a reflink inode) then we have to use the
147 	 * range rmap lookup to make sure we get the correct owner/offset.
148 	 */
149 	if (info->is_shared) {
150 		error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
151 				owner, offset, rflags, rmap, &has_rmap);
152 	} else {
153 		error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
154 				owner, offset, rflags, rmap, &has_rmap);
155 	}
156 	if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
157 		return false;
158 
159 	if (!has_rmap)
160 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
161 			irec->br_startoff);
162 	return has_rmap;
163 }
164 
165 /* Make sure that we have rmapbt records for this data/attr fork extent. */
166 STATIC void
167 xchk_bmap_xref_rmap(
168 	struct xchk_bmap_info	*info,
169 	struct xfs_bmbt_irec	*irec,
170 	xfs_agblock_t		agbno)
171 {
172 	struct xfs_rmap_irec	rmap;
173 	unsigned long long	rmap_end;
174 	uint64_t		owner = info->sc->ip->i_ino;
175 
176 	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
177 		return;
178 
179 	/* Find the rmap record for this irec. */
180 	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
181 		return;
182 
183 	/*
184 	 * The rmap must be an exact match for this incore file mapping record,
185 	 * which may have arisen from multiple ondisk records.
186 	 */
187 	if (rmap.rm_startblock != agbno)
188 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
189 				irec->br_startoff);
190 
191 	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
192 	if (rmap_end != agbno + irec->br_blockcount)
193 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
194 				irec->br_startoff);
195 
196 	/* Check the logical offsets. */
197 	if (rmap.rm_offset != irec->br_startoff)
198 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
199 				irec->br_startoff);
200 
201 	rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
202 	if (rmap_end != irec->br_startoff + irec->br_blockcount)
203 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
204 				irec->br_startoff);
205 
206 	/* Check the owner */
207 	if (rmap.rm_owner != owner)
208 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
209 				irec->br_startoff);
210 
211 	/*
212 	 * Check for discrepancies between the unwritten flag in the irec and
213 	 * the rmap.  Note that the (in-memory) CoW fork distinguishes between
214 	 * unwritten and written extents, but we don't track that in the rmap
215 	 * records because the blocks are owned (on-disk) by the refcountbt,
216 	 * which doesn't track unwritten state.
217 	 */
218 	if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
219 	    !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
220 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
221 				irec->br_startoff);
222 
223 	if (!!(info->whichfork == XFS_ATTR_FORK) !=
224 	    !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
225 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
226 				irec->br_startoff);
227 	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
228 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
229 				irec->br_startoff);
230 }
231 
232 /* Make sure that we have rmapbt records for this COW fork extent. */
233 STATIC void
234 xchk_bmap_xref_rmap_cow(
235 	struct xchk_bmap_info	*info,
236 	struct xfs_bmbt_irec	*irec,
237 	xfs_agblock_t		agbno)
238 {
239 	struct xfs_rmap_irec	rmap;
240 	unsigned long long	rmap_end;
241 	uint64_t		owner = XFS_RMAP_OWN_COW;
242 
243 	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
244 		return;
245 
246 	/* Find the rmap record for this irec. */
247 	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
248 		return;
249 
250 	/*
251 	 * CoW staging extents are owned by the refcount btree, so the rmap
252 	 * can start before and end after the physical space allocated to this
253 	 * mapping.  There are no offsets to check.
254 	 */
255 	if (rmap.rm_startblock > agbno)
256 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
257 				irec->br_startoff);
258 
259 	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
260 	if (rmap_end < agbno + irec->br_blockcount)
261 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
262 				irec->br_startoff);
263 
264 	/* Check the owner */
265 	if (rmap.rm_owner != owner)
266 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
267 				irec->br_startoff);
268 
269 	/*
270 	 * No flags allowed.  Note that the (in-memory) CoW fork distinguishes
271 	 * between unwritten and written extents, but we don't track that in
272 	 * the rmap records because the blocks are owned (on-disk) by the
273 	 * refcountbt, which doesn't track unwritten state.
274 	 */
275 	if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
276 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
277 				irec->br_startoff);
278 	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
279 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
280 				irec->br_startoff);
281 	if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
282 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
283 				irec->br_startoff);
284 }
285 
286 /* Cross-reference a single rtdev extent record. */
287 STATIC void
288 xchk_bmap_rt_iextent_xref(
289 	struct xfs_inode	*ip,
290 	struct xchk_bmap_info	*info,
291 	struct xfs_bmbt_irec	*irec)
292 {
293 	xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
294 			irec->br_blockcount);
295 }
296 
297 /* Cross-reference a single datadev extent record. */
298 STATIC void
299 xchk_bmap_iextent_xref(
300 	struct xfs_inode	*ip,
301 	struct xchk_bmap_info	*info,
302 	struct xfs_bmbt_irec	*irec)
303 {
304 	struct xfs_owner_info	oinfo;
305 	struct xfs_mount	*mp = info->sc->mp;
306 	xfs_agnumber_t		agno;
307 	xfs_agblock_t		agbno;
308 	xfs_extlen_t		len;
309 	int			error;
310 
311 	agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
312 	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
313 	len = irec->br_blockcount;
314 
315 	error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
316 	if (!xchk_fblock_process_error(info->sc, info->whichfork,
317 			irec->br_startoff, &error))
318 		goto out_free;
319 
320 	xchk_xref_is_used_space(info->sc, agbno, len);
321 	xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
322 	switch (info->whichfork) {
323 	case XFS_DATA_FORK:
324 		xchk_bmap_xref_rmap(info, irec, agbno);
325 		if (!xfs_is_reflink_inode(info->sc->ip)) {
326 			xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
327 					info->whichfork, irec->br_startoff);
328 			xchk_xref_is_only_owned_by(info->sc, agbno,
329 					irec->br_blockcount, &oinfo);
330 			xchk_xref_is_not_shared(info->sc, agbno,
331 					irec->br_blockcount);
332 		}
333 		xchk_xref_is_not_cow_staging(info->sc, agbno,
334 				irec->br_blockcount);
335 		break;
336 	case XFS_ATTR_FORK:
337 		xchk_bmap_xref_rmap(info, irec, agbno);
338 		xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
339 				info->whichfork, irec->br_startoff);
340 		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
341 				&oinfo);
342 		xchk_xref_is_not_shared(info->sc, agbno,
343 				irec->br_blockcount);
344 		xchk_xref_is_not_cow_staging(info->sc, agbno,
345 				irec->br_blockcount);
346 		break;
347 	case XFS_COW_FORK:
348 		xchk_bmap_xref_rmap_cow(info, irec, agbno);
349 		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
350 				&XFS_RMAP_OINFO_COW);
351 		xchk_xref_is_cow_staging(info->sc, agbno,
352 				irec->br_blockcount);
353 		xchk_xref_is_not_shared(info->sc, agbno,
354 				irec->br_blockcount);
355 		break;
356 	}
357 
358 out_free:
359 	xchk_ag_free(info->sc, &info->sc->sa);
360 }
361 
362 /*
363  * Directories and attr forks should never have blocks that can't be addressed
364  * by a xfs_dablk_t.
365  */
366 STATIC void
367 xchk_bmap_dirattr_extent(
368 	struct xfs_inode	*ip,
369 	struct xchk_bmap_info	*info,
370 	struct xfs_bmbt_irec	*irec)
371 {
372 	struct xfs_mount	*mp = ip->i_mount;
373 	xfs_fileoff_t		off;
374 
375 	if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
376 		return;
377 
378 	if (!xfs_verify_dablk(mp, irec->br_startoff))
379 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
380 				irec->br_startoff);
381 
382 	off = irec->br_startoff + irec->br_blockcount - 1;
383 	if (!xfs_verify_dablk(mp, off))
384 		xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
385 }
386 
387 /* Scrub a single extent record. */
388 STATIC void
389 xchk_bmap_iextent(
390 	struct xfs_inode	*ip,
391 	struct xchk_bmap_info	*info,
392 	struct xfs_bmbt_irec	*irec)
393 {
394 	struct xfs_mount	*mp = info->sc->mp;
395 
396 	/*
397 	 * Check for out-of-order extents.  This record could have come
398 	 * from the incore list, for which there is no ordering check.
399 	 */
400 	if (irec->br_startoff < info->prev_rec.br_startoff +
401 				info->prev_rec.br_blockcount)
402 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
403 				irec->br_startoff);
404 
405 	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
406 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
407 				irec->br_startoff);
408 
409 	xchk_bmap_dirattr_extent(ip, info, irec);
410 
411 	/* Make sure the extent points to a valid place. */
412 	if (info->is_rt &&
413 	    !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
414 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
415 				irec->br_startoff);
416 	if (!info->is_rt &&
417 	    !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
418 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
419 				irec->br_startoff);
420 
421 	/* We don't allow unwritten extents on attr forks. */
422 	if (irec->br_state == XFS_EXT_UNWRITTEN &&
423 	    info->whichfork == XFS_ATTR_FORK)
424 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
425 				irec->br_startoff);
426 
427 	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
428 		return;
429 
430 	if (info->is_rt)
431 		xchk_bmap_rt_iextent_xref(ip, info, irec);
432 	else
433 		xchk_bmap_iextent_xref(ip, info, irec);
434 }
435 
436 /* Scrub a bmbt record. */
437 STATIC int
438 xchk_bmapbt_rec(
439 	struct xchk_btree	*bs,
440 	const union xfs_btree_rec *rec)
441 {
442 	struct xfs_bmbt_irec	irec;
443 	struct xfs_bmbt_irec	iext_irec;
444 	struct xfs_iext_cursor	icur;
445 	struct xchk_bmap_info	*info = bs->private;
446 	struct xfs_inode	*ip = bs->cur->bc_ino.ip;
447 	struct xfs_buf		*bp = NULL;
448 	struct xfs_btree_block	*block;
449 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, info->whichfork);
450 	uint64_t		owner;
451 	int			i;
452 
453 	/*
454 	 * Check the owners of the btree blocks up to the level below
455 	 * the root since the verifiers don't do that.
456 	 */
457 	if (xfs_has_crc(bs->cur->bc_mp) &&
458 	    bs->cur->bc_levels[0].ptr == 1) {
459 		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
460 			block = xfs_btree_get_block(bs->cur, i, &bp);
461 			owner = be64_to_cpu(block->bb_u.l.bb_owner);
462 			if (owner != ip->i_ino)
463 				xchk_fblock_set_corrupt(bs->sc,
464 						info->whichfork, 0);
465 		}
466 	}
467 
468 	/*
469 	 * Check that the incore extent tree contains an extent that matches
470 	 * this one exactly.  We validate those cached bmaps later, so we don't
471 	 * need to check them here.  If the incore extent tree was just loaded
472 	 * from disk by the scrubber, we assume that its contents match what's
473 	 * on disk (we still hold the ILOCK) and skip the equivalence check.
474 	 */
475 	if (!info->was_loaded)
476 		return 0;
477 
478 	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
479 	if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
480 		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
481 				irec.br_startoff);
482 		return 0;
483 	}
484 
485 	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
486 				&iext_irec) ||
487 	    irec.br_startoff != iext_irec.br_startoff ||
488 	    irec.br_startblock != iext_irec.br_startblock ||
489 	    irec.br_blockcount != iext_irec.br_blockcount ||
490 	    irec.br_state != iext_irec.br_state)
491 		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
492 				irec.br_startoff);
493 	return 0;
494 }
495 
496 /* Scan the btree records. */
497 STATIC int
498 xchk_bmap_btree(
499 	struct xfs_scrub	*sc,
500 	int			whichfork,
501 	struct xchk_bmap_info	*info)
502 {
503 	struct xfs_owner_info	oinfo;
504 	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
505 	struct xfs_mount	*mp = sc->mp;
506 	struct xfs_inode	*ip = sc->ip;
507 	struct xfs_btree_cur	*cur;
508 	int			error;
509 
510 	/* Load the incore bmap cache if it's not loaded. */
511 	info->was_loaded = !xfs_need_iread_extents(ifp);
512 
513 	error = xfs_iread_extents(sc->tp, ip, whichfork);
514 	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
515 		goto out;
516 
517 	/* Check the btree structure. */
518 	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
519 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
520 	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
521 	xfs_btree_del_cursor(cur, error);
522 out:
523 	return error;
524 }
525 
526 struct xchk_bmap_check_rmap_info {
527 	struct xfs_scrub	*sc;
528 	int			whichfork;
529 	struct xfs_iext_cursor	icur;
530 };
531 
532 /* Can we find bmaps that fit this rmap? */
533 STATIC int
534 xchk_bmap_check_rmap(
535 	struct xfs_btree_cur		*cur,
536 	const struct xfs_rmap_irec	*rec,
537 	void				*priv)
538 {
539 	struct xfs_bmbt_irec		irec;
540 	struct xfs_rmap_irec		check_rec;
541 	struct xchk_bmap_check_rmap_info	*sbcri = priv;
542 	struct xfs_ifork		*ifp;
543 	struct xfs_scrub		*sc = sbcri->sc;
544 	bool				have_map;
545 
546 	/* Is this even the right fork? */
547 	if (rec->rm_owner != sc->ip->i_ino)
548 		return 0;
549 	if ((sbcri->whichfork == XFS_ATTR_FORK) ^
550 	    !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
551 		return 0;
552 	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
553 		return 0;
554 
555 	/* Now look up the bmbt record. */
556 	ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
557 	if (!ifp) {
558 		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
559 				rec->rm_offset);
560 		goto out;
561 	}
562 	have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
563 			&sbcri->icur, &irec);
564 	if (!have_map)
565 		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
566 				rec->rm_offset);
567 	/*
568 	 * bmap extent record lengths are constrained to 2^21 blocks in length
569 	 * because of space constraints in the on-disk metadata structure.
570 	 * However, rmap extent record lengths are constrained only by AG
571 	 * length, so we have to loop through the bmbt to make sure that the
572 	 * entire rmap is covered by bmbt records.
573 	 */
574 	check_rec = *rec;
575 	while (have_map) {
576 		if (irec.br_startoff != check_rec.rm_offset)
577 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
578 					check_rec.rm_offset);
579 		if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
580 				cur->bc_ag.pag->pag_agno,
581 				check_rec.rm_startblock))
582 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
583 					check_rec.rm_offset);
584 		if (irec.br_blockcount > check_rec.rm_blockcount)
585 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
586 					check_rec.rm_offset);
587 		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
588 			break;
589 		check_rec.rm_startblock += irec.br_blockcount;
590 		check_rec.rm_offset += irec.br_blockcount;
591 		check_rec.rm_blockcount -= irec.br_blockcount;
592 		if (check_rec.rm_blockcount == 0)
593 			break;
594 		have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
595 		if (!have_map)
596 			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
597 					check_rec.rm_offset);
598 	}
599 
600 out:
601 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
602 		return -ECANCELED;
603 	return 0;
604 }
605 
606 /* Make sure each rmap has a corresponding bmbt entry. */
607 STATIC int
608 xchk_bmap_check_ag_rmaps(
609 	struct xfs_scrub		*sc,
610 	int				whichfork,
611 	struct xfs_perag		*pag)
612 {
613 	struct xchk_bmap_check_rmap_info	sbcri;
614 	struct xfs_btree_cur		*cur;
615 	struct xfs_buf			*agf;
616 	int				error;
617 
618 	error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
619 	if (error)
620 		return error;
621 
622 	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
623 
624 	sbcri.sc = sc;
625 	sbcri.whichfork = whichfork;
626 	error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
627 	if (error == -ECANCELED)
628 		error = 0;
629 
630 	xfs_btree_del_cursor(cur, error);
631 	xfs_trans_brelse(sc->tp, agf);
632 	return error;
633 }
634 
635 /*
636  * Decide if we want to walk every rmap btree in the fs to make sure that each
637  * rmap for this file fork has corresponding bmbt entries.
638  */
639 static bool
640 xchk_bmap_want_check_rmaps(
641 	struct xchk_bmap_info	*info)
642 {
643 	struct xfs_scrub	*sc = info->sc;
644 	struct xfs_ifork	*ifp;
645 
646 	if (!xfs_has_rmapbt(sc->mp))
647 		return false;
648 	if (info->whichfork == XFS_COW_FORK)
649 		return false;
650 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
651 		return false;
652 
653 	/* Don't support realtime rmap checks yet. */
654 	if (info->is_rt)
655 		return false;
656 
657 	/*
658 	 * The inode repair code zaps broken inode forks by resetting them back
659 	 * to EXTENTS format and zero extent records.  If we encounter a fork
660 	 * in this state along with evidence that the fork isn't supposed to be
661 	 * empty, we need to scan the reverse mappings to decide if we're going
662 	 * to rebuild the fork.  Data forks with nonzero file size are scanned.
663 	 * xattr forks are never empty of content, so they are always scanned.
664 	 */
665 	ifp = xfs_ifork_ptr(sc->ip, info->whichfork);
666 	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) {
667 		if (info->whichfork == XFS_DATA_FORK &&
668 		    i_size_read(VFS_I(sc->ip)) == 0)
669 			return false;
670 
671 		return true;
672 	}
673 
674 	return false;
675 }
676 
677 /* Make sure each rmap has a corresponding bmbt entry. */
678 STATIC int
679 xchk_bmap_check_rmaps(
680 	struct xfs_scrub	*sc,
681 	int			whichfork)
682 {
683 	struct xfs_perag	*pag;
684 	xfs_agnumber_t		agno;
685 	int			error;
686 
687 	for_each_perag(sc->mp, agno, pag) {
688 		error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
689 		if (error ||
690 		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
691 			xfs_perag_rele(pag);
692 			return error;
693 		}
694 	}
695 
696 	return 0;
697 }
698 
699 /* Scrub a delalloc reservation from the incore extent map tree. */
700 STATIC void
701 xchk_bmap_iextent_delalloc(
702 	struct xfs_inode	*ip,
703 	struct xchk_bmap_info	*info,
704 	struct xfs_bmbt_irec	*irec)
705 {
706 	struct xfs_mount	*mp = info->sc->mp;
707 
708 	/*
709 	 * Check for out-of-order extents.  This record could have come
710 	 * from the incore list, for which there is no ordering check.
711 	 */
712 	if (irec->br_startoff < info->prev_rec.br_startoff +
713 				info->prev_rec.br_blockcount)
714 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
715 				irec->br_startoff);
716 
717 	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
718 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
719 				irec->br_startoff);
720 
721 	/* Make sure the extent points to a valid place. */
722 	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
723 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
724 				irec->br_startoff);
725 }
726 
727 /* Decide if this individual fork mapping is ok. */
728 static bool
729 xchk_bmap_iext_mapping(
730 	struct xchk_bmap_info		*info,
731 	const struct xfs_bmbt_irec	*irec)
732 {
733 	/* There should never be a "hole" extent in either extent list. */
734 	if (irec->br_startblock == HOLESTARTBLOCK)
735 		return false;
736 	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
737 		return false;
738 	return true;
739 }
740 
741 /* Are these two mappings contiguous with each other? */
742 static inline bool
743 xchk_are_bmaps_contiguous(
744 	const struct xfs_bmbt_irec	*b1,
745 	const struct xfs_bmbt_irec	*b2)
746 {
747 	/* Don't try to combine unallocated mappings. */
748 	if (!xfs_bmap_is_real_extent(b1))
749 		return false;
750 	if (!xfs_bmap_is_real_extent(b2))
751 		return false;
752 
753 	/* Does b2 come right after b1 in the logical and physical range? */
754 	if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
755 		return false;
756 	if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
757 		return false;
758 	if (b1->br_state != b2->br_state)
759 		return false;
760 	return true;
761 }
762 
763 /*
764  * Walk the incore extent records, accumulating consecutive contiguous records
765  * into a single incore mapping.  Returns true if @irec has been set to a
766  * mapping or false if there are no more mappings.  Caller must ensure that
767  * @info.icur is zeroed before the first call.
768  */
769 static bool
770 xchk_bmap_iext_iter(
771 	struct xchk_bmap_info	*info,
772 	struct xfs_bmbt_irec	*irec)
773 {
774 	struct xfs_bmbt_irec	got;
775 	struct xfs_ifork	*ifp;
776 	unsigned int		nr = 0;
777 
778 	ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
779 
780 	/* Advance to the next iextent record and check the mapping. */
781 	xfs_iext_next(ifp, &info->icur);
782 	if (!xfs_iext_get_extent(ifp, &info->icur, irec))
783 		return false;
784 
785 	if (!xchk_bmap_iext_mapping(info, irec)) {
786 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
787 				irec->br_startoff);
788 		return false;
789 	}
790 	nr++;
791 
792 	/*
793 	 * Iterate subsequent iextent records and merge them with the one
794 	 * that we just read, if possible.
795 	 */
796 	while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
797 		if (!xchk_are_bmaps_contiguous(irec, &got))
798 			break;
799 
800 		if (!xchk_bmap_iext_mapping(info, &got)) {
801 			xchk_fblock_set_corrupt(info->sc, info->whichfork,
802 					got.br_startoff);
803 			return false;
804 		}
805 		nr++;
806 
807 		irec->br_blockcount += got.br_blockcount;
808 		xfs_iext_next(ifp, &info->icur);
809 	}
810 
811 	/*
812 	 * If the merged mapping could be expressed with fewer bmbt records
813 	 * than we actually found, notify the user that this fork could be
814 	 * optimized.  CoW forks only exist in memory so we ignore them.
815 	 */
816 	if (nr > 1 && info->whichfork != XFS_COW_FORK &&
817 	    howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
818 		xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
819 
820 	return true;
821 }
822 
823 /*
824  * Scrub an inode fork's block mappings.
825  *
826  * First we scan every record in every btree block, if applicable.
827  * Then we unconditionally scan the incore extent cache.
828  */
829 STATIC int
830 xchk_bmap(
831 	struct xfs_scrub	*sc,
832 	int			whichfork)
833 {
834 	struct xfs_bmbt_irec	irec;
835 	struct xchk_bmap_info	info = { NULL };
836 	struct xfs_mount	*mp = sc->mp;
837 	struct xfs_inode	*ip = sc->ip;
838 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
839 	xfs_fileoff_t		endoff;
840 	int			error = 0;
841 
842 	/* Non-existent forks can be ignored. */
843 	if (!ifp)
844 		return -ENOENT;
845 
846 	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
847 	info.whichfork = whichfork;
848 	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
849 	info.sc = sc;
850 
851 	switch (whichfork) {
852 	case XFS_COW_FORK:
853 		/* No CoW forks on non-reflink filesystems. */
854 		if (!xfs_has_reflink(mp)) {
855 			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
856 			return 0;
857 		}
858 		break;
859 	case XFS_ATTR_FORK:
860 		if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
861 			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
862 		break;
863 	default:
864 		ASSERT(whichfork == XFS_DATA_FORK);
865 		break;
866 	}
867 
868 	/* Check the fork values */
869 	switch (ifp->if_format) {
870 	case XFS_DINODE_FMT_UUID:
871 	case XFS_DINODE_FMT_DEV:
872 	case XFS_DINODE_FMT_LOCAL:
873 		/* No mappings to check. */
874 		if (whichfork == XFS_COW_FORK)
875 			xchk_fblock_set_corrupt(sc, whichfork, 0);
876 		return 0;
877 	case XFS_DINODE_FMT_EXTENTS:
878 		break;
879 	case XFS_DINODE_FMT_BTREE:
880 		if (whichfork == XFS_COW_FORK) {
881 			xchk_fblock_set_corrupt(sc, whichfork, 0);
882 			return 0;
883 		}
884 
885 		error = xchk_bmap_btree(sc, whichfork, &info);
886 		if (error)
887 			return error;
888 		break;
889 	default:
890 		xchk_fblock_set_corrupt(sc, whichfork, 0);
891 		return 0;
892 	}
893 
894 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
895 		return 0;
896 
897 	/* Find the offset of the last extent in the mapping. */
898 	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
899 	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
900 		return error;
901 
902 	/*
903 	 * Scrub extent records.  We use a special iterator function here that
904 	 * combines adjacent mappings if they are logically and physically
905 	 * contiguous.   For large allocations that require multiple bmbt
906 	 * records, this reduces the number of cross-referencing calls, which
907 	 * reduces runtime.  Cross referencing with the rmap is simpler because
908 	 * the rmap must match the combined mapping exactly.
909 	 */
910 	while (xchk_bmap_iext_iter(&info, &irec)) {
911 		if (xchk_should_terminate(sc, &error) ||
912 		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
913 			return 0;
914 
915 		if (irec.br_startoff >= endoff) {
916 			xchk_fblock_set_corrupt(sc, whichfork,
917 					irec.br_startoff);
918 			return 0;
919 		}
920 
921 		if (isnullstartblock(irec.br_startblock))
922 			xchk_bmap_iextent_delalloc(ip, &info, &irec);
923 		else
924 			xchk_bmap_iextent(ip, &info, &irec);
925 		memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
926 	}
927 
928 	if (xchk_bmap_want_check_rmaps(&info)) {
929 		error = xchk_bmap_check_rmaps(sc, whichfork);
930 		if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
931 			return error;
932 	}
933 
934 	return 0;
935 }
936 
937 /* Scrub an inode's data fork. */
938 int
939 xchk_bmap_data(
940 	struct xfs_scrub	*sc)
941 {
942 	return xchk_bmap(sc, XFS_DATA_FORK);
943 }
944 
945 /* Scrub an inode's attr fork. */
946 int
947 xchk_bmap_attr(
948 	struct xfs_scrub	*sc)
949 {
950 	return xchk_bmap(sc, XFS_ATTR_FORK);
951 }
952 
953 /* Scrub an inode's CoW fork. */
954 int
955 xchk_bmap_cow(
956 	struct xfs_scrub	*sc)
957 {
958 	return xchk_bmap(sc, XFS_COW_FORK);
959 }
960