xref: /openbmc/linux/fs/xfs/scrub/bmap.c (revision e2c75e76)
1 /*
2  * Copyright (C) 2017 Oracle.  All Rights Reserved.
3  *
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it would be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write the Free Software Foundation,
18  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
19  */
20 #include "xfs.h"
21 #include "xfs_fs.h"
22 #include "xfs_shared.h"
23 #include "xfs_format.h"
24 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_btree.h"
28 #include "xfs_bit.h"
29 #include "xfs_log_format.h"
30 #include "xfs_trans.h"
31 #include "xfs_sb.h"
32 #include "xfs_inode.h"
33 #include "xfs_inode_fork.h"
34 #include "xfs_alloc.h"
35 #include "xfs_rtalloc.h"
36 #include "xfs_bmap.h"
37 #include "xfs_bmap_util.h"
38 #include "xfs_bmap_btree.h"
39 #include "xfs_rmap.h"
40 #include "xfs_refcount.h"
41 #include "scrub/xfs_scrub.h"
42 #include "scrub/scrub.h"
43 #include "scrub/common.h"
44 #include "scrub/btree.h"
45 #include "scrub/trace.h"
46 
47 /* Set us up with an inode's bmap. */
48 int
49 xfs_scrub_setup_inode_bmap(
50 	struct xfs_scrub_context	*sc,
51 	struct xfs_inode		*ip)
52 {
53 	struct xfs_mount		*mp = sc->mp;
54 	int				error;
55 
56 	error = xfs_scrub_get_inode(sc, ip);
57 	if (error)
58 		goto out;
59 
60 	sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
61 	xfs_ilock(sc->ip, sc->ilock_flags);
62 
63 	/*
64 	 * We don't want any ephemeral data fork updates sitting around
65 	 * while we inspect block mappings, so wait for directio to finish
66 	 * and flush dirty data if we have delalloc reservations.
67 	 */
68 	if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
69 	    sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
70 		inode_dio_wait(VFS_I(sc->ip));
71 		error = filemap_write_and_wait(VFS_I(sc->ip)->i_mapping);
72 		if (error)
73 			goto out;
74 	}
75 
76 	/* Got the inode, lock it and we're ready to go. */
77 	error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp);
78 	if (error)
79 		goto out;
80 	sc->ilock_flags |= XFS_ILOCK_EXCL;
81 	xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
82 
83 out:
84 	/* scrub teardown will unlock and release the inode */
85 	return error;
86 }
87 
88 /*
89  * Inode fork block mapping (BMBT) scrubber.
90  * More complex than the others because we have to scrub
91  * all the extents regardless of whether or not the fork
92  * is in btree format.
93  */
94 
95 struct xfs_scrub_bmap_info {
96 	struct xfs_scrub_context	*sc;
97 	xfs_fileoff_t			lastoff;
98 	bool				is_rt;
99 	bool				is_shared;
100 	int				whichfork;
101 };
102 
103 /* Look for a corresponding rmap for this irec. */
104 static inline bool
105 xfs_scrub_bmap_get_rmap(
106 	struct xfs_scrub_bmap_info	*info,
107 	struct xfs_bmbt_irec		*irec,
108 	xfs_agblock_t			agbno,
109 	uint64_t			owner,
110 	struct xfs_rmap_irec		*rmap)
111 {
112 	xfs_fileoff_t			offset;
113 	unsigned int			rflags = 0;
114 	int				has_rmap;
115 	int				error;
116 
117 	if (info->whichfork == XFS_ATTR_FORK)
118 		rflags |= XFS_RMAP_ATTR_FORK;
119 
120 	/*
121 	 * CoW staging extents are owned (on disk) by the refcountbt, so
122 	 * their rmaps do not have offsets.
123 	 */
124 	if (info->whichfork == XFS_COW_FORK)
125 		offset = 0;
126 	else
127 		offset = irec->br_startoff;
128 
129 	/*
130 	 * If the caller thinks this could be a shared bmbt extent (IOWs,
131 	 * any data fork extent of a reflink inode) then we have to use the
132 	 * range rmap lookup to make sure we get the correct owner/offset.
133 	 */
134 	if (info->is_shared) {
135 		error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
136 				owner, offset, rflags, rmap, &has_rmap);
137 		if (!xfs_scrub_should_check_xref(info->sc, &error,
138 				&info->sc->sa.rmap_cur))
139 			return false;
140 		goto out;
141 	}
142 
143 	/*
144 	 * Otherwise, use the (faster) regular lookup.
145 	 */
146 	error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 0, owner,
147 			offset, rflags, &has_rmap);
148 	if (!xfs_scrub_should_check_xref(info->sc, &error,
149 			&info->sc->sa.rmap_cur))
150 		return false;
151 	if (!has_rmap)
152 		goto out;
153 
154 	error = xfs_rmap_get_rec(info->sc->sa.rmap_cur, rmap, &has_rmap);
155 	if (!xfs_scrub_should_check_xref(info->sc, &error,
156 			&info->sc->sa.rmap_cur))
157 		return false;
158 
159 out:
160 	if (!has_rmap)
161 		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
162 			irec->br_startoff);
163 	return has_rmap;
164 }
165 
166 /* Make sure that we have rmapbt records for this extent. */
167 STATIC void
168 xfs_scrub_bmap_xref_rmap(
169 	struct xfs_scrub_bmap_info	*info,
170 	struct xfs_bmbt_irec		*irec,
171 	xfs_agblock_t			agbno)
172 {
173 	struct xfs_rmap_irec		rmap;
174 	unsigned long long		rmap_end;
175 	uint64_t			owner;
176 
177 	if (!info->sc->sa.rmap_cur)
178 		return;
179 
180 	if (info->whichfork == XFS_COW_FORK)
181 		owner = XFS_RMAP_OWN_COW;
182 	else
183 		owner = info->sc->ip->i_ino;
184 
185 	/* Find the rmap record for this irec. */
186 	if (!xfs_scrub_bmap_get_rmap(info, irec, agbno, owner, &rmap))
187 		return;
188 
189 	/* Check the rmap. */
190 	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
191 	if (rmap.rm_startblock > agbno ||
192 	    agbno + irec->br_blockcount > rmap_end)
193 		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
194 				irec->br_startoff);
195 
196 	/*
197 	 * Check the logical offsets if applicable.  CoW staging extents
198 	 * don't track logical offsets since the mappings only exist in
199 	 * memory.
200 	 */
201 	if (info->whichfork != XFS_COW_FORK) {
202 		rmap_end = (unsigned long long)rmap.rm_offset +
203 				rmap.rm_blockcount;
204 		if (rmap.rm_offset > irec->br_startoff ||
205 		    irec->br_startoff + irec->br_blockcount > rmap_end)
206 			xfs_scrub_fblock_xref_set_corrupt(info->sc,
207 					info->whichfork, irec->br_startoff);
208 	}
209 
210 	if (rmap.rm_owner != owner)
211 		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
212 				irec->br_startoff);
213 
214 	/*
215 	 * Check for discrepancies between the unwritten flag in the irec and
216 	 * the rmap.  Note that the (in-memory) CoW fork distinguishes between
217 	 * unwritten and written extents, but we don't track that in the rmap
218 	 * records because the blocks are owned (on-disk) by the refcountbt,
219 	 * which doesn't track unwritten state.
220 	 */
221 	if (owner != XFS_RMAP_OWN_COW &&
222 	    irec->br_state == XFS_EXT_UNWRITTEN &&
223 	    !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
224 		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
225 				irec->br_startoff);
226 
227 	if (info->whichfork == XFS_ATTR_FORK &&
228 	    !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
229 		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
230 				irec->br_startoff);
231 	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
232 		xfs_scrub_fblock_xref_set_corrupt(info->sc, info->whichfork,
233 				irec->br_startoff);
234 }
235 
236 /* Cross-reference a single rtdev extent record. */
237 STATIC void
238 xfs_scrub_bmap_rt_extent_xref(
239 	struct xfs_scrub_bmap_info	*info,
240 	struct xfs_inode		*ip,
241 	struct xfs_btree_cur		*cur,
242 	struct xfs_bmbt_irec		*irec)
243 {
244 	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
245 		return;
246 
247 	xfs_scrub_xref_is_used_rt_space(info->sc, irec->br_startblock,
248 			irec->br_blockcount);
249 }
250 
251 /* Cross-reference a single datadev extent record. */
252 STATIC void
253 xfs_scrub_bmap_extent_xref(
254 	struct xfs_scrub_bmap_info	*info,
255 	struct xfs_inode		*ip,
256 	struct xfs_btree_cur		*cur,
257 	struct xfs_bmbt_irec		*irec)
258 {
259 	struct xfs_mount		*mp = info->sc->mp;
260 	xfs_agnumber_t			agno;
261 	xfs_agblock_t			agbno;
262 	xfs_extlen_t			len;
263 	int				error;
264 
265 	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
266 		return;
267 
268 	agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
269 	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
270 	len = irec->br_blockcount;
271 
272 	error = xfs_scrub_ag_init(info->sc, agno, &info->sc->sa);
273 	if (!xfs_scrub_fblock_process_error(info->sc, info->whichfork,
274 			irec->br_startoff, &error))
275 		return;
276 
277 	xfs_scrub_xref_is_used_space(info->sc, agbno, len);
278 	xfs_scrub_xref_is_not_inode_chunk(info->sc, agbno, len);
279 	xfs_scrub_bmap_xref_rmap(info, irec, agbno);
280 	switch (info->whichfork) {
281 	case XFS_DATA_FORK:
282 		if (xfs_is_reflink_inode(info->sc->ip))
283 			break;
284 		/* fall through */
285 	case XFS_ATTR_FORK:
286 		xfs_scrub_xref_is_not_shared(info->sc, agbno,
287 				irec->br_blockcount);
288 		break;
289 	case XFS_COW_FORK:
290 		xfs_scrub_xref_is_cow_staging(info->sc, agbno,
291 				irec->br_blockcount);
292 		break;
293 	}
294 
295 	xfs_scrub_ag_free(info->sc, &info->sc->sa);
296 }
297 
298 /* Scrub a single extent record. */
299 STATIC int
300 xfs_scrub_bmap_extent(
301 	struct xfs_inode		*ip,
302 	struct xfs_btree_cur		*cur,
303 	struct xfs_scrub_bmap_info	*info,
304 	struct xfs_bmbt_irec		*irec)
305 {
306 	struct xfs_mount		*mp = info->sc->mp;
307 	struct xfs_buf			*bp = NULL;
308 	xfs_filblks_t			end;
309 	int				error = 0;
310 
311 	if (cur)
312 		xfs_btree_get_block(cur, 0, &bp);
313 
314 	/*
315 	 * Check for out-of-order extents.  This record could have come
316 	 * from the incore list, for which there is no ordering check.
317 	 */
318 	if (irec->br_startoff < info->lastoff)
319 		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
320 				irec->br_startoff);
321 
322 	/* There should never be a "hole" extent in either extent list. */
323 	if (irec->br_startblock == HOLESTARTBLOCK)
324 		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
325 				irec->br_startoff);
326 
327 	/*
328 	 * Check for delalloc extents.  We never iterate the ones in the
329 	 * in-core extent scan, and we should never see these in the bmbt.
330 	 */
331 	if (isnullstartblock(irec->br_startblock))
332 		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
333 				irec->br_startoff);
334 
335 	/* Make sure the extent points to a valid place. */
336 	if (irec->br_blockcount > MAXEXTLEN)
337 		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
338 				irec->br_startoff);
339 	if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
340 		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
341 				irec->br_startoff);
342 	end = irec->br_startblock + irec->br_blockcount - 1;
343 	if (info->is_rt &&
344 	    (!xfs_verify_rtbno(mp, irec->br_startblock) ||
345 	     !xfs_verify_rtbno(mp, end)))
346 		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
347 				irec->br_startoff);
348 	if (!info->is_rt &&
349 	    (!xfs_verify_fsbno(mp, irec->br_startblock) ||
350 	     !xfs_verify_fsbno(mp, end) ||
351 	     XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
352 				XFS_FSB_TO_AGNO(mp, end)))
353 		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
354 				irec->br_startoff);
355 
356 	/* We don't allow unwritten extents on attr forks. */
357 	if (irec->br_state == XFS_EXT_UNWRITTEN &&
358 	    info->whichfork == XFS_ATTR_FORK)
359 		xfs_scrub_fblock_set_corrupt(info->sc, info->whichfork,
360 				irec->br_startoff);
361 
362 	if (info->is_rt)
363 		xfs_scrub_bmap_rt_extent_xref(info, ip, cur, irec);
364 	else
365 		xfs_scrub_bmap_extent_xref(info, ip, cur, irec);
366 
367 	info->lastoff = irec->br_startoff + irec->br_blockcount;
368 	return error;
369 }
370 
371 /* Scrub a bmbt record. */
372 STATIC int
373 xfs_scrub_bmapbt_rec(
374 	struct xfs_scrub_btree		*bs,
375 	union xfs_btree_rec		*rec)
376 {
377 	struct xfs_bmbt_irec		irec;
378 	struct xfs_scrub_bmap_info	*info = bs->private;
379 	struct xfs_inode		*ip = bs->cur->bc_private.b.ip;
380 	struct xfs_buf			*bp = NULL;
381 	struct xfs_btree_block		*block;
382 	uint64_t			owner;
383 	int				i;
384 
385 	/*
386 	 * Check the owners of the btree blocks up to the level below
387 	 * the root since the verifiers don't do that.
388 	 */
389 	if (xfs_sb_version_hascrc(&bs->cur->bc_mp->m_sb) &&
390 	    bs->cur->bc_ptrs[0] == 1) {
391 		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
392 			block = xfs_btree_get_block(bs->cur, i, &bp);
393 			owner = be64_to_cpu(block->bb_u.l.bb_owner);
394 			if (owner != ip->i_ino)
395 				xfs_scrub_fblock_set_corrupt(bs->sc,
396 						info->whichfork, 0);
397 		}
398 	}
399 
400 	/* Set up the in-core record and scrub it. */
401 	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
402 	return xfs_scrub_bmap_extent(ip, bs->cur, info, &irec);
403 }
404 
405 /* Scan the btree records. */
406 STATIC int
407 xfs_scrub_bmap_btree(
408 	struct xfs_scrub_context	*sc,
409 	int				whichfork,
410 	struct xfs_scrub_bmap_info	*info)
411 {
412 	struct xfs_owner_info		oinfo;
413 	struct xfs_mount		*mp = sc->mp;
414 	struct xfs_inode		*ip = sc->ip;
415 	struct xfs_btree_cur		*cur;
416 	int				error;
417 
418 	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
419 	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
420 	error = xfs_scrub_btree(sc, cur, xfs_scrub_bmapbt_rec, &oinfo, info);
421 	xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR :
422 					  XFS_BTREE_NOERROR);
423 	return error;
424 }
425 
426 /*
427  * Scrub an inode fork's block mappings.
428  *
429  * First we scan every record in every btree block, if applicable.
430  * Then we unconditionally scan the incore extent cache.
431  */
432 STATIC int
433 xfs_scrub_bmap(
434 	struct xfs_scrub_context	*sc,
435 	int				whichfork)
436 {
437 	struct xfs_bmbt_irec		irec;
438 	struct xfs_scrub_bmap_info	info = { NULL };
439 	struct xfs_mount		*mp = sc->mp;
440 	struct xfs_inode		*ip = sc->ip;
441 	struct xfs_ifork		*ifp;
442 	xfs_fileoff_t			endoff;
443 	struct xfs_iext_cursor		icur;
444 	int				error = 0;
445 
446 	ifp = XFS_IFORK_PTR(ip, whichfork);
447 
448 	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
449 	info.whichfork = whichfork;
450 	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
451 	info.sc = sc;
452 
453 	switch (whichfork) {
454 	case XFS_COW_FORK:
455 		/* Non-existent CoW forks are ignorable. */
456 		if (!ifp)
457 			goto out;
458 		/* No CoW forks on non-reflink inodes/filesystems. */
459 		if (!xfs_is_reflink_inode(ip)) {
460 			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
461 			goto out;
462 		}
463 		break;
464 	case XFS_ATTR_FORK:
465 		if (!ifp)
466 			goto out;
467 		if (!xfs_sb_version_hasattr(&mp->m_sb) &&
468 		    !xfs_sb_version_hasattr2(&mp->m_sb))
469 			xfs_scrub_ino_set_corrupt(sc, sc->ip->i_ino, NULL);
470 		break;
471 	default:
472 		ASSERT(whichfork == XFS_DATA_FORK);
473 		break;
474 	}
475 
476 	/* Check the fork values */
477 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
478 	case XFS_DINODE_FMT_UUID:
479 	case XFS_DINODE_FMT_DEV:
480 	case XFS_DINODE_FMT_LOCAL:
481 		/* No mappings to check. */
482 		goto out;
483 	case XFS_DINODE_FMT_EXTENTS:
484 		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
485 			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
486 			goto out;
487 		}
488 		break;
489 	case XFS_DINODE_FMT_BTREE:
490 		if (whichfork == XFS_COW_FORK) {
491 			xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
492 			goto out;
493 		}
494 
495 		error = xfs_scrub_bmap_btree(sc, whichfork, &info);
496 		if (error)
497 			goto out;
498 		break;
499 	default:
500 		xfs_scrub_fblock_set_corrupt(sc, whichfork, 0);
501 		goto out;
502 	}
503 
504 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
505 		goto out;
506 
507 	/* Now try to scrub the in-memory extent list. */
508         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
509 		error = xfs_iread_extents(sc->tp, ip, whichfork);
510 		if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error))
511 			goto out;
512 	}
513 
514 	/* Find the offset of the last extent in the mapping. */
515 	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
516 	if (!xfs_scrub_fblock_process_error(sc, whichfork, 0, &error))
517 		goto out;
518 
519 	/* Scrub extent records. */
520 	info.lastoff = 0;
521 	ifp = XFS_IFORK_PTR(ip, whichfork);
522 	for_each_xfs_iext(ifp, &icur, &irec) {
523 		if (xfs_scrub_should_terminate(sc, &error))
524 			break;
525 		if (isnullstartblock(irec.br_startblock))
526 			continue;
527 		if (irec.br_startoff >= endoff) {
528 			xfs_scrub_fblock_set_corrupt(sc, whichfork,
529 					irec.br_startoff);
530 			goto out;
531 		}
532 		error = xfs_scrub_bmap_extent(ip, NULL, &info, &irec);
533 		if (error)
534 			goto out;
535 	}
536 
537 out:
538 	return error;
539 }
540 
541 /* Scrub an inode's data fork. */
542 int
543 xfs_scrub_bmap_data(
544 	struct xfs_scrub_context	*sc)
545 {
546 	return xfs_scrub_bmap(sc, XFS_DATA_FORK);
547 }
548 
549 /* Scrub an inode's attr fork. */
550 int
551 xfs_scrub_bmap_attr(
552 	struct xfs_scrub_context	*sc)
553 {
554 	return xfs_scrub_bmap(sc, XFS_ATTR_FORK);
555 }
556 
557 /* Scrub an inode's CoW fork. */
558 int
559 xfs_scrub_bmap_cow(
560 	struct xfs_scrub_context	*sc)
561 {
562 	if (!xfs_is_reflink_inode(sc->ip))
563 		return -ENOENT;
564 
565 	return xfs_scrub_bmap(sc, XFS_COW_FORK);
566 }
567