1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "xfs_bit.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_alloc.h"
18 #include "xfs_bmap.h"
19 #include "xfs_bmap_btree.h"
20 #include "xfs_rmap.h"
21 #include "xfs_rmap_btree.h"
22 #include "scrub/scrub.h"
23 #include "scrub/common.h"
24 #include "scrub/btree.h"
25 #include "xfs_ag.h"
26
27 /* Set us up with an inode's bmap. */
28 int
xchk_setup_inode_bmap(struct xfs_scrub * sc)29 xchk_setup_inode_bmap(
30 struct xfs_scrub *sc)
31 {
32 int error;
33
34 if (xchk_need_intent_drain(sc))
35 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
36
37 error = xchk_iget_for_scrubbing(sc);
38 if (error)
39 goto out;
40
41 xchk_ilock(sc, XFS_IOLOCK_EXCL);
42
43 /*
44 * We don't want any ephemeral data/cow fork updates sitting around
45 * while we inspect block mappings, so wait for directio to finish
46 * and flush dirty data if we have delalloc reservations.
47 */
48 if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
49 sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
50 struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
51
52 xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
53
54 inode_dio_wait(VFS_I(sc->ip));
55
56 /*
57 * Try to flush all incore state to disk before we examine the
58 * space mappings for the data fork. Leave accumulated errors
59 * in the mapping for the writer threads to consume.
60 *
61 * On ENOSPC or EIO writeback errors, we continue into the
62 * extent mapping checks because write failures do not
63 * necessarily imply anything about the correctness of the file
64 * metadata. The metadata and the file data could be on
65 * completely separate devices; a media failure might only
66 * affect a subset of the disk, etc. We can handle delalloc
67 * extents in the scrubber, so leaving them in memory is fine.
68 */
69 error = filemap_fdatawrite(mapping);
70 if (!error)
71 error = filemap_fdatawait_keep_errors(mapping);
72 if (error && (error != -ENOSPC && error != -EIO))
73 goto out;
74 }
75
76 /* Got the inode, lock it and we're ready to go. */
77 error = xchk_trans_alloc(sc, 0);
78 if (error)
79 goto out;
80
81 xchk_ilock(sc, XFS_ILOCK_EXCL);
82 out:
83 /* scrub teardown will unlock and release the inode */
84 return error;
85 }
86
87 /*
88 * Inode fork block mapping (BMBT) scrubber.
89 * More complex than the others because we have to scrub
90 * all the extents regardless of whether or not the fork
91 * is in btree format.
92 */
93
94 struct xchk_bmap_info {
95 struct xfs_scrub *sc;
96
97 /* Incore extent tree cursor */
98 struct xfs_iext_cursor icur;
99
100 /* Previous fork mapping that we examined */
101 struct xfs_bmbt_irec prev_rec;
102
103 /* Is this a realtime fork? */
104 bool is_rt;
105
106 /* May mappings point to shared space? */
107 bool is_shared;
108
109 /* Was the incore extent tree loaded? */
110 bool was_loaded;
111
112 /* Which inode fork are we checking? */
113 int whichfork;
114 };
115
116 /* Look for a corresponding rmap for this irec. */
117 static inline bool
xchk_bmap_get_rmap(struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec,xfs_agblock_t agbno,uint64_t owner,struct xfs_rmap_irec * rmap)118 xchk_bmap_get_rmap(
119 struct xchk_bmap_info *info,
120 struct xfs_bmbt_irec *irec,
121 xfs_agblock_t agbno,
122 uint64_t owner,
123 struct xfs_rmap_irec *rmap)
124 {
125 xfs_fileoff_t offset;
126 unsigned int rflags = 0;
127 int has_rmap;
128 int error;
129
130 if (info->whichfork == XFS_ATTR_FORK)
131 rflags |= XFS_RMAP_ATTR_FORK;
132 if (irec->br_state == XFS_EXT_UNWRITTEN)
133 rflags |= XFS_RMAP_UNWRITTEN;
134
135 /*
136 * CoW staging extents are owned (on disk) by the refcountbt, so
137 * their rmaps do not have offsets.
138 */
139 if (info->whichfork == XFS_COW_FORK)
140 offset = 0;
141 else
142 offset = irec->br_startoff;
143
144 /*
145 * If the caller thinks this could be a shared bmbt extent (IOWs,
146 * any data fork extent of a reflink inode) then we have to use the
147 * range rmap lookup to make sure we get the correct owner/offset.
148 */
149 if (info->is_shared) {
150 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
151 owner, offset, rflags, rmap, &has_rmap);
152 } else {
153 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
154 owner, offset, rflags, rmap, &has_rmap);
155 }
156 if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
157 return false;
158
159 if (!has_rmap)
160 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
161 irec->br_startoff);
162 return has_rmap;
163 }
164
165 /* Make sure that we have rmapbt records for this data/attr fork extent. */
166 STATIC void
xchk_bmap_xref_rmap(struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec,xfs_agblock_t agbno)167 xchk_bmap_xref_rmap(
168 struct xchk_bmap_info *info,
169 struct xfs_bmbt_irec *irec,
170 xfs_agblock_t agbno)
171 {
172 struct xfs_rmap_irec rmap;
173 unsigned long long rmap_end;
174 uint64_t owner = info->sc->ip->i_ino;
175
176 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
177 return;
178
179 /* Find the rmap record for this irec. */
180 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
181 return;
182
183 /*
184 * The rmap must be an exact match for this incore file mapping record,
185 * which may have arisen from multiple ondisk records.
186 */
187 if (rmap.rm_startblock != agbno)
188 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
189 irec->br_startoff);
190
191 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
192 if (rmap_end != agbno + irec->br_blockcount)
193 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
194 irec->br_startoff);
195
196 /* Check the logical offsets. */
197 if (rmap.rm_offset != irec->br_startoff)
198 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
199 irec->br_startoff);
200
201 rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
202 if (rmap_end != irec->br_startoff + irec->br_blockcount)
203 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
204 irec->br_startoff);
205
206 /* Check the owner */
207 if (rmap.rm_owner != owner)
208 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
209 irec->br_startoff);
210
211 /*
212 * Check for discrepancies between the unwritten flag in the irec and
213 * the rmap. Note that the (in-memory) CoW fork distinguishes between
214 * unwritten and written extents, but we don't track that in the rmap
215 * records because the blocks are owned (on-disk) by the refcountbt,
216 * which doesn't track unwritten state.
217 */
218 if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
219 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
220 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
221 irec->br_startoff);
222
223 if (!!(info->whichfork == XFS_ATTR_FORK) !=
224 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
225 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
226 irec->br_startoff);
227 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
228 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
229 irec->br_startoff);
230 }
231
232 /* Make sure that we have rmapbt records for this COW fork extent. */
233 STATIC void
xchk_bmap_xref_rmap_cow(struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec,xfs_agblock_t agbno)234 xchk_bmap_xref_rmap_cow(
235 struct xchk_bmap_info *info,
236 struct xfs_bmbt_irec *irec,
237 xfs_agblock_t agbno)
238 {
239 struct xfs_rmap_irec rmap;
240 unsigned long long rmap_end;
241 uint64_t owner = XFS_RMAP_OWN_COW;
242
243 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
244 return;
245
246 /* Find the rmap record for this irec. */
247 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
248 return;
249
250 /*
251 * CoW staging extents are owned by the refcount btree, so the rmap
252 * can start before and end after the physical space allocated to this
253 * mapping. There are no offsets to check.
254 */
255 if (rmap.rm_startblock > agbno)
256 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
257 irec->br_startoff);
258
259 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
260 if (rmap_end < agbno + irec->br_blockcount)
261 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
262 irec->br_startoff);
263
264 /* Check the owner */
265 if (rmap.rm_owner != owner)
266 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
267 irec->br_startoff);
268
269 /*
270 * No flags allowed. Note that the (in-memory) CoW fork distinguishes
271 * between unwritten and written extents, but we don't track that in
272 * the rmap records because the blocks are owned (on-disk) by the
273 * refcountbt, which doesn't track unwritten state.
274 */
275 if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
276 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
277 irec->br_startoff);
278 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
279 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
280 irec->br_startoff);
281 if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
282 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
283 irec->br_startoff);
284 }
285
286 /* Cross-reference a single rtdev extent record. */
287 STATIC void
xchk_bmap_rt_iextent_xref(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)288 xchk_bmap_rt_iextent_xref(
289 struct xfs_inode *ip,
290 struct xchk_bmap_info *info,
291 struct xfs_bmbt_irec *irec)
292 {
293 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
294 irec->br_blockcount);
295 }
296
297 /* Cross-reference a single datadev extent record. */
298 STATIC void
xchk_bmap_iextent_xref(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)299 xchk_bmap_iextent_xref(
300 struct xfs_inode *ip,
301 struct xchk_bmap_info *info,
302 struct xfs_bmbt_irec *irec)
303 {
304 struct xfs_owner_info oinfo;
305 struct xfs_mount *mp = info->sc->mp;
306 xfs_agnumber_t agno;
307 xfs_agblock_t agbno;
308 xfs_extlen_t len;
309 int error;
310
311 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
312 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
313 len = irec->br_blockcount;
314
315 error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
316 if (!xchk_fblock_process_error(info->sc, info->whichfork,
317 irec->br_startoff, &error))
318 goto out_free;
319
320 xchk_xref_is_used_space(info->sc, agbno, len);
321 xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
322 switch (info->whichfork) {
323 case XFS_DATA_FORK:
324 xchk_bmap_xref_rmap(info, irec, agbno);
325 if (!xfs_is_reflink_inode(info->sc->ip)) {
326 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
327 info->whichfork, irec->br_startoff);
328 xchk_xref_is_only_owned_by(info->sc, agbno,
329 irec->br_blockcount, &oinfo);
330 xchk_xref_is_not_shared(info->sc, agbno,
331 irec->br_blockcount);
332 }
333 xchk_xref_is_not_cow_staging(info->sc, agbno,
334 irec->br_blockcount);
335 break;
336 case XFS_ATTR_FORK:
337 xchk_bmap_xref_rmap(info, irec, agbno);
338 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
339 info->whichfork, irec->br_startoff);
340 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
341 &oinfo);
342 xchk_xref_is_not_shared(info->sc, agbno,
343 irec->br_blockcount);
344 xchk_xref_is_not_cow_staging(info->sc, agbno,
345 irec->br_blockcount);
346 break;
347 case XFS_COW_FORK:
348 xchk_bmap_xref_rmap_cow(info, irec, agbno);
349 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
350 &XFS_RMAP_OINFO_COW);
351 xchk_xref_is_cow_staging(info->sc, agbno,
352 irec->br_blockcount);
353 xchk_xref_is_not_shared(info->sc, agbno,
354 irec->br_blockcount);
355 break;
356 }
357
358 out_free:
359 xchk_ag_free(info->sc, &info->sc->sa);
360 }
361
362 /*
363 * Directories and attr forks should never have blocks that can't be addressed
364 * by a xfs_dablk_t.
365 */
366 STATIC void
xchk_bmap_dirattr_extent(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)367 xchk_bmap_dirattr_extent(
368 struct xfs_inode *ip,
369 struct xchk_bmap_info *info,
370 struct xfs_bmbt_irec *irec)
371 {
372 struct xfs_mount *mp = ip->i_mount;
373 xfs_fileoff_t off;
374
375 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
376 return;
377
378 if (!xfs_verify_dablk(mp, irec->br_startoff))
379 xchk_fblock_set_corrupt(info->sc, info->whichfork,
380 irec->br_startoff);
381
382 off = irec->br_startoff + irec->br_blockcount - 1;
383 if (!xfs_verify_dablk(mp, off))
384 xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
385 }
386
387 /* Scrub a single extent record. */
388 STATIC void
xchk_bmap_iextent(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)389 xchk_bmap_iextent(
390 struct xfs_inode *ip,
391 struct xchk_bmap_info *info,
392 struct xfs_bmbt_irec *irec)
393 {
394 struct xfs_mount *mp = info->sc->mp;
395
396 /*
397 * Check for out-of-order extents. This record could have come
398 * from the incore list, for which there is no ordering check.
399 */
400 if (irec->br_startoff < info->prev_rec.br_startoff +
401 info->prev_rec.br_blockcount)
402 xchk_fblock_set_corrupt(info->sc, info->whichfork,
403 irec->br_startoff);
404
405 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
406 xchk_fblock_set_corrupt(info->sc, info->whichfork,
407 irec->br_startoff);
408
409 xchk_bmap_dirattr_extent(ip, info, irec);
410
411 /* Make sure the extent points to a valid place. */
412 if (info->is_rt &&
413 !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
414 xchk_fblock_set_corrupt(info->sc, info->whichfork,
415 irec->br_startoff);
416 if (!info->is_rt &&
417 !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
418 xchk_fblock_set_corrupt(info->sc, info->whichfork,
419 irec->br_startoff);
420
421 /* We don't allow unwritten extents on attr forks. */
422 if (irec->br_state == XFS_EXT_UNWRITTEN &&
423 info->whichfork == XFS_ATTR_FORK)
424 xchk_fblock_set_corrupt(info->sc, info->whichfork,
425 irec->br_startoff);
426
427 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
428 return;
429
430 if (info->is_rt)
431 xchk_bmap_rt_iextent_xref(ip, info, irec);
432 else
433 xchk_bmap_iextent_xref(ip, info, irec);
434 }
435
436 /* Scrub a bmbt record. */
437 STATIC int
xchk_bmapbt_rec(struct xchk_btree * bs,const union xfs_btree_rec * rec)438 xchk_bmapbt_rec(
439 struct xchk_btree *bs,
440 const union xfs_btree_rec *rec)
441 {
442 struct xfs_bmbt_irec irec;
443 struct xfs_bmbt_irec iext_irec;
444 struct xfs_iext_cursor icur;
445 struct xchk_bmap_info *info = bs->private;
446 struct xfs_inode *ip = bs->cur->bc_ino.ip;
447 struct xfs_buf *bp = NULL;
448 struct xfs_btree_block *block;
449 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork);
450 uint64_t owner;
451 int i;
452
453 /*
454 * Check the owners of the btree blocks up to the level below
455 * the root since the verifiers don't do that.
456 */
457 if (xfs_has_crc(bs->cur->bc_mp) &&
458 bs->cur->bc_levels[0].ptr == 1) {
459 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
460 block = xfs_btree_get_block(bs->cur, i, &bp);
461 owner = be64_to_cpu(block->bb_u.l.bb_owner);
462 if (owner != ip->i_ino)
463 xchk_fblock_set_corrupt(bs->sc,
464 info->whichfork, 0);
465 }
466 }
467
468 /*
469 * Check that the incore extent tree contains an extent that matches
470 * this one exactly. We validate those cached bmaps later, so we don't
471 * need to check them here. If the incore extent tree was just loaded
472 * from disk by the scrubber, we assume that its contents match what's
473 * on disk (we still hold the ILOCK) and skip the equivalence check.
474 */
475 if (!info->was_loaded)
476 return 0;
477
478 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
479 if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
480 xchk_fblock_set_corrupt(bs->sc, info->whichfork,
481 irec.br_startoff);
482 return 0;
483 }
484
485 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
486 &iext_irec) ||
487 irec.br_startoff != iext_irec.br_startoff ||
488 irec.br_startblock != iext_irec.br_startblock ||
489 irec.br_blockcount != iext_irec.br_blockcount ||
490 irec.br_state != iext_irec.br_state)
491 xchk_fblock_set_corrupt(bs->sc, info->whichfork,
492 irec.br_startoff);
493 return 0;
494 }
495
496 /* Scan the btree records. */
497 STATIC int
xchk_bmap_btree(struct xfs_scrub * sc,int whichfork,struct xchk_bmap_info * info)498 xchk_bmap_btree(
499 struct xfs_scrub *sc,
500 int whichfork,
501 struct xchk_bmap_info *info)
502 {
503 struct xfs_owner_info oinfo;
504 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
505 struct xfs_mount *mp = sc->mp;
506 struct xfs_inode *ip = sc->ip;
507 struct xfs_btree_cur *cur;
508 int error;
509
510 /* Load the incore bmap cache if it's not loaded. */
511 info->was_loaded = !xfs_need_iread_extents(ifp);
512
513 error = xfs_iread_extents(sc->tp, ip, whichfork);
514 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
515 goto out;
516
517 /* Check the btree structure. */
518 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
519 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
520 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
521 xfs_btree_del_cursor(cur, error);
522 out:
523 return error;
524 }
525
526 struct xchk_bmap_check_rmap_info {
527 struct xfs_scrub *sc;
528 int whichfork;
529 struct xfs_iext_cursor icur;
530 };
531
532 /* Can we find bmaps that fit this rmap? */
533 STATIC int
xchk_bmap_check_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)534 xchk_bmap_check_rmap(
535 struct xfs_btree_cur *cur,
536 const struct xfs_rmap_irec *rec,
537 void *priv)
538 {
539 struct xfs_bmbt_irec irec;
540 struct xfs_rmap_irec check_rec;
541 struct xchk_bmap_check_rmap_info *sbcri = priv;
542 struct xfs_ifork *ifp;
543 struct xfs_scrub *sc = sbcri->sc;
544 bool have_map;
545
546 /* Is this even the right fork? */
547 if (rec->rm_owner != sc->ip->i_ino)
548 return 0;
549 if ((sbcri->whichfork == XFS_ATTR_FORK) ^
550 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
551 return 0;
552 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
553 return 0;
554
555 /* Now look up the bmbt record. */
556 ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
557 if (!ifp) {
558 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
559 rec->rm_offset);
560 goto out;
561 }
562 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
563 &sbcri->icur, &irec);
564 if (!have_map)
565 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
566 rec->rm_offset);
567 /*
568 * bmap extent record lengths are constrained to 2^21 blocks in length
569 * because of space constraints in the on-disk metadata structure.
570 * However, rmap extent record lengths are constrained only by AG
571 * length, so we have to loop through the bmbt to make sure that the
572 * entire rmap is covered by bmbt records.
573 */
574 check_rec = *rec;
575 while (have_map) {
576 if (irec.br_startoff != check_rec.rm_offset)
577 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
578 check_rec.rm_offset);
579 if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
580 cur->bc_ag.pag->pag_agno,
581 check_rec.rm_startblock))
582 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
583 check_rec.rm_offset);
584 if (irec.br_blockcount > check_rec.rm_blockcount)
585 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
586 check_rec.rm_offset);
587 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
588 break;
589 check_rec.rm_startblock += irec.br_blockcount;
590 check_rec.rm_offset += irec.br_blockcount;
591 check_rec.rm_blockcount -= irec.br_blockcount;
592 if (check_rec.rm_blockcount == 0)
593 break;
594 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
595 if (!have_map)
596 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
597 check_rec.rm_offset);
598 }
599
600 out:
601 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
602 return -ECANCELED;
603 return 0;
604 }
605
606 /* Make sure each rmap has a corresponding bmbt entry. */
607 STATIC int
xchk_bmap_check_ag_rmaps(struct xfs_scrub * sc,int whichfork,struct xfs_perag * pag)608 xchk_bmap_check_ag_rmaps(
609 struct xfs_scrub *sc,
610 int whichfork,
611 struct xfs_perag *pag)
612 {
613 struct xchk_bmap_check_rmap_info sbcri;
614 struct xfs_btree_cur *cur;
615 struct xfs_buf *agf;
616 int error;
617
618 error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
619 if (error)
620 return error;
621
622 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
623
624 sbcri.sc = sc;
625 sbcri.whichfork = whichfork;
626 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
627 if (error == -ECANCELED)
628 error = 0;
629
630 xfs_btree_del_cursor(cur, error);
631 xfs_trans_brelse(sc->tp, agf);
632 return error;
633 }
634
635 /*
636 * Decide if we want to walk every rmap btree in the fs to make sure that each
637 * rmap for this file fork has corresponding bmbt entries.
638 */
639 static bool
xchk_bmap_want_check_rmaps(struct xchk_bmap_info * info)640 xchk_bmap_want_check_rmaps(
641 struct xchk_bmap_info *info)
642 {
643 struct xfs_scrub *sc = info->sc;
644 struct xfs_ifork *ifp;
645
646 if (!xfs_has_rmapbt(sc->mp))
647 return false;
648 if (info->whichfork == XFS_COW_FORK)
649 return false;
650 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
651 return false;
652
653 /* Don't support realtime rmap checks yet. */
654 if (info->is_rt)
655 return false;
656
657 /*
658 * The inode repair code zaps broken inode forks by resetting them back
659 * to EXTENTS format and zero extent records. If we encounter a fork
660 * in this state along with evidence that the fork isn't supposed to be
661 * empty, we need to scan the reverse mappings to decide if we're going
662 * to rebuild the fork. Data forks with nonzero file size are scanned.
663 * xattr forks are never empty of content, so they are always scanned.
664 */
665 ifp = xfs_ifork_ptr(sc->ip, info->whichfork);
666 if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) {
667 if (info->whichfork == XFS_DATA_FORK &&
668 i_size_read(VFS_I(sc->ip)) == 0)
669 return false;
670
671 return true;
672 }
673
674 return false;
675 }
676
677 /* Make sure each rmap has a corresponding bmbt entry. */
678 STATIC int
xchk_bmap_check_rmaps(struct xfs_scrub * sc,int whichfork)679 xchk_bmap_check_rmaps(
680 struct xfs_scrub *sc,
681 int whichfork)
682 {
683 struct xfs_perag *pag;
684 xfs_agnumber_t agno;
685 int error;
686
687 for_each_perag(sc->mp, agno, pag) {
688 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
689 if (error ||
690 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
691 xfs_perag_rele(pag);
692 return error;
693 }
694 }
695
696 return 0;
697 }
698
699 /* Scrub a delalloc reservation from the incore extent map tree. */
700 STATIC void
xchk_bmap_iextent_delalloc(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)701 xchk_bmap_iextent_delalloc(
702 struct xfs_inode *ip,
703 struct xchk_bmap_info *info,
704 struct xfs_bmbt_irec *irec)
705 {
706 struct xfs_mount *mp = info->sc->mp;
707
708 /*
709 * Check for out-of-order extents. This record could have come
710 * from the incore list, for which there is no ordering check.
711 */
712 if (irec->br_startoff < info->prev_rec.br_startoff +
713 info->prev_rec.br_blockcount)
714 xchk_fblock_set_corrupt(info->sc, info->whichfork,
715 irec->br_startoff);
716
717 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
718 xchk_fblock_set_corrupt(info->sc, info->whichfork,
719 irec->br_startoff);
720
721 /* Make sure the extent points to a valid place. */
722 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
723 xchk_fblock_set_corrupt(info->sc, info->whichfork,
724 irec->br_startoff);
725 }
726
727 /* Decide if this individual fork mapping is ok. */
728 static bool
xchk_bmap_iext_mapping(struct xchk_bmap_info * info,const struct xfs_bmbt_irec * irec)729 xchk_bmap_iext_mapping(
730 struct xchk_bmap_info *info,
731 const struct xfs_bmbt_irec *irec)
732 {
733 /* There should never be a "hole" extent in either extent list. */
734 if (irec->br_startblock == HOLESTARTBLOCK)
735 return false;
736 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
737 return false;
738 return true;
739 }
740
741 /* Are these two mappings contiguous with each other? */
742 static inline bool
xchk_are_bmaps_contiguous(const struct xfs_bmbt_irec * b1,const struct xfs_bmbt_irec * b2)743 xchk_are_bmaps_contiguous(
744 const struct xfs_bmbt_irec *b1,
745 const struct xfs_bmbt_irec *b2)
746 {
747 /* Don't try to combine unallocated mappings. */
748 if (!xfs_bmap_is_real_extent(b1))
749 return false;
750 if (!xfs_bmap_is_real_extent(b2))
751 return false;
752
753 /* Does b2 come right after b1 in the logical and physical range? */
754 if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
755 return false;
756 if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
757 return false;
758 if (b1->br_state != b2->br_state)
759 return false;
760 return true;
761 }
762
763 /*
764 * Walk the incore extent records, accumulating consecutive contiguous records
765 * into a single incore mapping. Returns true if @irec has been set to a
766 * mapping or false if there are no more mappings. Caller must ensure that
767 * @info.icur is zeroed before the first call.
768 */
769 static bool
xchk_bmap_iext_iter(struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)770 xchk_bmap_iext_iter(
771 struct xchk_bmap_info *info,
772 struct xfs_bmbt_irec *irec)
773 {
774 struct xfs_bmbt_irec got;
775 struct xfs_ifork *ifp;
776 unsigned int nr = 0;
777
778 ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
779
780 /* Advance to the next iextent record and check the mapping. */
781 xfs_iext_next(ifp, &info->icur);
782 if (!xfs_iext_get_extent(ifp, &info->icur, irec))
783 return false;
784
785 if (!xchk_bmap_iext_mapping(info, irec)) {
786 xchk_fblock_set_corrupt(info->sc, info->whichfork,
787 irec->br_startoff);
788 return false;
789 }
790 nr++;
791
792 /*
793 * Iterate subsequent iextent records and merge them with the one
794 * that we just read, if possible.
795 */
796 while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
797 if (!xchk_are_bmaps_contiguous(irec, &got))
798 break;
799
800 if (!xchk_bmap_iext_mapping(info, &got)) {
801 xchk_fblock_set_corrupt(info->sc, info->whichfork,
802 got.br_startoff);
803 return false;
804 }
805 nr++;
806
807 irec->br_blockcount += got.br_blockcount;
808 xfs_iext_next(ifp, &info->icur);
809 }
810
811 /*
812 * If the merged mapping could be expressed with fewer bmbt records
813 * than we actually found, notify the user that this fork could be
814 * optimized. CoW forks only exist in memory so we ignore them.
815 */
816 if (nr > 1 && info->whichfork != XFS_COW_FORK &&
817 howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
818 xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
819
820 return true;
821 }
822
823 /*
824 * Scrub an inode fork's block mappings.
825 *
826 * First we scan every record in every btree block, if applicable.
827 * Then we unconditionally scan the incore extent cache.
828 */
829 STATIC int
xchk_bmap(struct xfs_scrub * sc,int whichfork)830 xchk_bmap(
831 struct xfs_scrub *sc,
832 int whichfork)
833 {
834 struct xfs_bmbt_irec irec;
835 struct xchk_bmap_info info = { NULL };
836 struct xfs_mount *mp = sc->mp;
837 struct xfs_inode *ip = sc->ip;
838 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
839 xfs_fileoff_t endoff;
840 int error = 0;
841
842 /* Non-existent forks can be ignored. */
843 if (!ifp)
844 return -ENOENT;
845
846 info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
847 info.whichfork = whichfork;
848 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
849 info.sc = sc;
850
851 switch (whichfork) {
852 case XFS_COW_FORK:
853 /* No CoW forks on non-reflink filesystems. */
854 if (!xfs_has_reflink(mp)) {
855 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
856 return 0;
857 }
858 break;
859 case XFS_ATTR_FORK:
860 if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
861 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
862 break;
863 default:
864 ASSERT(whichfork == XFS_DATA_FORK);
865 break;
866 }
867
868 /* Check the fork values */
869 switch (ifp->if_format) {
870 case XFS_DINODE_FMT_UUID:
871 case XFS_DINODE_FMT_DEV:
872 case XFS_DINODE_FMT_LOCAL:
873 /* No mappings to check. */
874 if (whichfork == XFS_COW_FORK)
875 xchk_fblock_set_corrupt(sc, whichfork, 0);
876 return 0;
877 case XFS_DINODE_FMT_EXTENTS:
878 break;
879 case XFS_DINODE_FMT_BTREE:
880 if (whichfork == XFS_COW_FORK) {
881 xchk_fblock_set_corrupt(sc, whichfork, 0);
882 return 0;
883 }
884
885 error = xchk_bmap_btree(sc, whichfork, &info);
886 if (error)
887 return error;
888 break;
889 default:
890 xchk_fblock_set_corrupt(sc, whichfork, 0);
891 return 0;
892 }
893
894 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
895 return 0;
896
897 /* Find the offset of the last extent in the mapping. */
898 error = xfs_bmap_last_offset(ip, &endoff, whichfork);
899 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
900 return error;
901
902 /*
903 * Scrub extent records. We use a special iterator function here that
904 * combines adjacent mappings if they are logically and physically
905 * contiguous. For large allocations that require multiple bmbt
906 * records, this reduces the number of cross-referencing calls, which
907 * reduces runtime. Cross referencing with the rmap is simpler because
908 * the rmap must match the combined mapping exactly.
909 */
910 while (xchk_bmap_iext_iter(&info, &irec)) {
911 if (xchk_should_terminate(sc, &error) ||
912 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
913 return 0;
914
915 if (irec.br_startoff >= endoff) {
916 xchk_fblock_set_corrupt(sc, whichfork,
917 irec.br_startoff);
918 return 0;
919 }
920
921 if (isnullstartblock(irec.br_startblock))
922 xchk_bmap_iextent_delalloc(ip, &info, &irec);
923 else
924 xchk_bmap_iextent(ip, &info, &irec);
925 memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
926 }
927
928 if (xchk_bmap_want_check_rmaps(&info)) {
929 error = xchk_bmap_check_rmaps(sc, whichfork);
930 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
931 return error;
932 }
933
934 return 0;
935 }
936
937 /* Scrub an inode's data fork. */
938 int
xchk_bmap_data(struct xfs_scrub * sc)939 xchk_bmap_data(
940 struct xfs_scrub *sc)
941 {
942 return xchk_bmap(sc, XFS_DATA_FORK);
943 }
944
945 /* Scrub an inode's attr fork. */
946 int
xchk_bmap_attr(struct xfs_scrub * sc)947 xchk_bmap_attr(
948 struct xfs_scrub *sc)
949 {
950 return xchk_bmap(sc, XFS_ATTR_FORK);
951 }
952
953 /* Scrub an inode's CoW fork. */
954 int
xchk_bmap_cow(struct xfs_scrub * sc)955 xchk_bmap_cow(
956 struct xfs_scrub *sc)
957 {
958 return xchk_bmap(sc, XFS_COW_FORK);
959 }
960