xref: /openbmc/linux/fs/xfs/scrub/agheader_repair.c (revision 8631f940b81bf0da3d375fce166d381fa8c47bb2)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2018 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_alloc.h"
20 #include "xfs_alloc_btree.h"
21 #include "xfs_ialloc.h"
22 #include "xfs_ialloc_btree.h"
23 #include "xfs_rmap.h"
24 #include "xfs_rmap_btree.h"
25 #include "xfs_refcount.h"
26 #include "xfs_refcount_btree.h"
27 #include "scrub/xfs_scrub.h"
28 #include "scrub/scrub.h"
29 #include "scrub/common.h"
30 #include "scrub/trace.h"
31 #include "scrub/repair.h"
32 #include "scrub/bitmap.h"
33 
34 /* Superblock */
35 
36 /* Repair the superblock. */
37 int
38 xrep_superblock(
39 	struct xfs_scrub	*sc)
40 {
41 	struct xfs_mount	*mp = sc->mp;
42 	struct xfs_buf		*bp;
43 	xfs_agnumber_t		agno;
44 	int			error;
45 
46 	/* Don't try to repair AG 0's sb; let xfs_repair deal with it. */
47 	agno = sc->sm->sm_agno;
48 	if (agno == 0)
49 		return -EOPNOTSUPP;
50 
51 	error = xfs_sb_get_secondary(mp, sc->tp, agno, &bp);
52 	if (error)
53 		return error;
54 
55 	/* Copy AG 0's superblock to this one. */
56 	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
57 	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
58 
59 	/* Write this to disk. */
60 	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
61 	xfs_trans_log_buf(sc->tp, bp, 0, BBTOB(bp->b_length) - 1);
62 	return error;
63 }
64 
65 /* AGF */
66 
67 struct xrep_agf_allocbt {
68 	struct xfs_scrub	*sc;
69 	xfs_agblock_t		freeblks;
70 	xfs_agblock_t		longest;
71 };
72 
73 /* Record free space shape information. */
74 STATIC int
75 xrep_agf_walk_allocbt(
76 	struct xfs_btree_cur		*cur,
77 	struct xfs_alloc_rec_incore	*rec,
78 	void				*priv)
79 {
80 	struct xrep_agf_allocbt		*raa = priv;
81 	int				error = 0;
82 
83 	if (xchk_should_terminate(raa->sc, &error))
84 		return error;
85 
86 	raa->freeblks += rec->ar_blockcount;
87 	if (rec->ar_blockcount > raa->longest)
88 		raa->longest = rec->ar_blockcount;
89 	return error;
90 }
91 
92 /* Does this AGFL block look sane? */
93 STATIC int
94 xrep_agf_check_agfl_block(
95 	struct xfs_mount	*mp,
96 	xfs_agblock_t		agbno,
97 	void			*priv)
98 {
99 	struct xfs_scrub	*sc = priv;
100 
101 	if (!xfs_verify_agbno(mp, sc->sa.agno, agbno))
102 		return -EFSCORRUPTED;
103 	return 0;
104 }
105 
106 /*
107  * Offset within the xrep_find_ag_btree array for each btree type.  Avoid the
108  * XFS_BTNUM_ names here to avoid creating a sparse array.
109  */
110 enum {
111 	XREP_AGF_BNOBT = 0,
112 	XREP_AGF_CNTBT,
113 	XREP_AGF_RMAPBT,
114 	XREP_AGF_REFCOUNTBT,
115 	XREP_AGF_END,
116 	XREP_AGF_MAX
117 };
118 
119 /* Check a btree root candidate. */
120 static inline bool
121 xrep_check_btree_root(
122 	struct xfs_scrub		*sc,
123 	struct xrep_find_ag_btree	*fab)
124 {
125 	struct xfs_mount		*mp = sc->mp;
126 	xfs_agnumber_t			agno = sc->sm->sm_agno;
127 
128 	return xfs_verify_agbno(mp, agno, fab->root) &&
129 	       fab->height <= XFS_BTREE_MAXLEVELS;
130 }
131 
132 /*
133  * Given the btree roots described by *fab, find the roots, check them for
134  * sanity, and pass the root data back out via *fab.
135  *
136  * This is /also/ a chicken and egg problem because we have to use the rmapbt
137  * (rooted in the AGF) to find the btrees rooted in the AGF.  We also have no
138  * idea if the btrees make any sense.  If we hit obvious corruptions in those
139  * btrees we'll bail out.
140  */
141 STATIC int
142 xrep_agf_find_btrees(
143 	struct xfs_scrub		*sc,
144 	struct xfs_buf			*agf_bp,
145 	struct xrep_find_ag_btree	*fab,
146 	struct xfs_buf			*agfl_bp)
147 {
148 	struct xfs_agf			*old_agf = XFS_BUF_TO_AGF(agf_bp);
149 	int				error;
150 
151 	/* Go find the root data. */
152 	error = xrep_find_ag_btree_roots(sc, agf_bp, fab, agfl_bp);
153 	if (error)
154 		return error;
155 
156 	/* We must find the bnobt, cntbt, and rmapbt roots. */
157 	if (!xrep_check_btree_root(sc, &fab[XREP_AGF_BNOBT]) ||
158 	    !xrep_check_btree_root(sc, &fab[XREP_AGF_CNTBT]) ||
159 	    !xrep_check_btree_root(sc, &fab[XREP_AGF_RMAPBT]))
160 		return -EFSCORRUPTED;
161 
162 	/*
163 	 * We relied on the rmapbt to reconstruct the AGF.  If we get a
164 	 * different root then something's seriously wrong.
165 	 */
166 	if (fab[XREP_AGF_RMAPBT].root !=
167 	    be32_to_cpu(old_agf->agf_roots[XFS_BTNUM_RMAPi]))
168 		return -EFSCORRUPTED;
169 
170 	/* We must find the refcountbt root if that feature is enabled. */
171 	if (xfs_sb_version_hasreflink(&sc->mp->m_sb) &&
172 	    !xrep_check_btree_root(sc, &fab[XREP_AGF_REFCOUNTBT]))
173 		return -EFSCORRUPTED;
174 
175 	return 0;
176 }
177 
178 /*
179  * Reinitialize the AGF header, making an in-core copy of the old contents so
180  * that we know which in-core state needs to be reinitialized.
181  */
182 STATIC void
183 xrep_agf_init_header(
184 	struct xfs_scrub	*sc,
185 	struct xfs_buf		*agf_bp,
186 	struct xfs_agf		*old_agf)
187 {
188 	struct xfs_mount	*mp = sc->mp;
189 	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
190 
191 	memcpy(old_agf, agf, sizeof(*old_agf));
192 	memset(agf, 0, BBTOB(agf_bp->b_length));
193 	agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
194 	agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
195 	agf->agf_seqno = cpu_to_be32(sc->sa.agno);
196 	agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno));
197 	agf->agf_flfirst = old_agf->agf_flfirst;
198 	agf->agf_fllast = old_agf->agf_fllast;
199 	agf->agf_flcount = old_agf->agf_flcount;
200 	if (xfs_sb_version_hascrc(&mp->m_sb))
201 		uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
202 
203 	/* Mark the incore AGF data stale until we're done fixing things. */
204 	ASSERT(sc->sa.pag->pagf_init);
205 	sc->sa.pag->pagf_init = 0;
206 }
207 
208 /* Set btree root information in an AGF. */
209 STATIC void
210 xrep_agf_set_roots(
211 	struct xfs_scrub		*sc,
212 	struct xfs_agf			*agf,
213 	struct xrep_find_ag_btree	*fab)
214 {
215 	agf->agf_roots[XFS_BTNUM_BNOi] =
216 			cpu_to_be32(fab[XREP_AGF_BNOBT].root);
217 	agf->agf_levels[XFS_BTNUM_BNOi] =
218 			cpu_to_be32(fab[XREP_AGF_BNOBT].height);
219 
220 	agf->agf_roots[XFS_BTNUM_CNTi] =
221 			cpu_to_be32(fab[XREP_AGF_CNTBT].root);
222 	agf->agf_levels[XFS_BTNUM_CNTi] =
223 			cpu_to_be32(fab[XREP_AGF_CNTBT].height);
224 
225 	agf->agf_roots[XFS_BTNUM_RMAPi] =
226 			cpu_to_be32(fab[XREP_AGF_RMAPBT].root);
227 	agf->agf_levels[XFS_BTNUM_RMAPi] =
228 			cpu_to_be32(fab[XREP_AGF_RMAPBT].height);
229 
230 	if (xfs_sb_version_hasreflink(&sc->mp->m_sb)) {
231 		agf->agf_refcount_root =
232 				cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].root);
233 		agf->agf_refcount_level =
234 				cpu_to_be32(fab[XREP_AGF_REFCOUNTBT].height);
235 	}
236 }
237 
238 /* Update all AGF fields which derive from btree contents. */
239 STATIC int
240 xrep_agf_calc_from_btrees(
241 	struct xfs_scrub	*sc,
242 	struct xfs_buf		*agf_bp)
243 {
244 	struct xrep_agf_allocbt	raa = { .sc = sc };
245 	struct xfs_btree_cur	*cur = NULL;
246 	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
247 	struct xfs_mount	*mp = sc->mp;
248 	xfs_agblock_t		btreeblks;
249 	xfs_agblock_t		blocks;
250 	int			error;
251 
252 	/* Update the AGF counters from the bnobt. */
253 	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
254 			XFS_BTNUM_BNO);
255 	error = xfs_alloc_query_all(cur, xrep_agf_walk_allocbt, &raa);
256 	if (error)
257 		goto err;
258 	error = xfs_btree_count_blocks(cur, &blocks);
259 	if (error)
260 		goto err;
261 	xfs_btree_del_cursor(cur, error);
262 	btreeblks = blocks - 1;
263 	agf->agf_freeblks = cpu_to_be32(raa.freeblks);
264 	agf->agf_longest = cpu_to_be32(raa.longest);
265 
266 	/* Update the AGF counters from the cntbt. */
267 	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
268 			XFS_BTNUM_CNT);
269 	error = xfs_btree_count_blocks(cur, &blocks);
270 	if (error)
271 		goto err;
272 	xfs_btree_del_cursor(cur, error);
273 	btreeblks += blocks - 1;
274 
275 	/* Update the AGF counters from the rmapbt. */
276 	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
277 	error = xfs_btree_count_blocks(cur, &blocks);
278 	if (error)
279 		goto err;
280 	xfs_btree_del_cursor(cur, error);
281 	agf->agf_rmap_blocks = cpu_to_be32(blocks);
282 	btreeblks += blocks - 1;
283 
284 	agf->agf_btreeblks = cpu_to_be32(btreeblks);
285 
286 	/* Update the AGF counters from the refcountbt. */
287 	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
288 		cur = xfs_refcountbt_init_cursor(mp, sc->tp, agf_bp,
289 				sc->sa.agno);
290 		error = xfs_btree_count_blocks(cur, &blocks);
291 		if (error)
292 			goto err;
293 		xfs_btree_del_cursor(cur, error);
294 		agf->agf_refcount_blocks = cpu_to_be32(blocks);
295 	}
296 
297 	return 0;
298 err:
299 	xfs_btree_del_cursor(cur, error);
300 	return error;
301 }
302 
303 /* Commit the new AGF and reinitialize the incore state. */
304 STATIC int
305 xrep_agf_commit_new(
306 	struct xfs_scrub	*sc,
307 	struct xfs_buf		*agf_bp)
308 {
309 	struct xfs_perag	*pag;
310 	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
311 
312 	/* Trigger fdblocks recalculation */
313 	xfs_force_summary_recalc(sc->mp);
314 
315 	/* Write this to disk. */
316 	xfs_trans_buf_set_type(sc->tp, agf_bp, XFS_BLFT_AGF_BUF);
317 	xfs_trans_log_buf(sc->tp, agf_bp, 0, BBTOB(agf_bp->b_length) - 1);
318 
319 	/* Now reinitialize the in-core counters we changed. */
320 	pag = sc->sa.pag;
321 	pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
322 	pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
323 	pag->pagf_longest = be32_to_cpu(agf->agf_longest);
324 	pag->pagf_levels[XFS_BTNUM_BNOi] =
325 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
326 	pag->pagf_levels[XFS_BTNUM_CNTi] =
327 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
328 	pag->pagf_levels[XFS_BTNUM_RMAPi] =
329 			be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
330 	pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
331 	pag->pagf_init = 1;
332 
333 	return 0;
334 }
335 
336 /* Repair the AGF. v5 filesystems only. */
337 int
338 xrep_agf(
339 	struct xfs_scrub		*sc)
340 {
341 	struct xrep_find_ag_btree	fab[XREP_AGF_MAX] = {
342 		[XREP_AGF_BNOBT] = {
343 			.rmap_owner = XFS_RMAP_OWN_AG,
344 			.buf_ops = &xfs_allocbt_buf_ops,
345 			.magic = XFS_ABTB_CRC_MAGIC,
346 		},
347 		[XREP_AGF_CNTBT] = {
348 			.rmap_owner = XFS_RMAP_OWN_AG,
349 			.buf_ops = &xfs_allocbt_buf_ops,
350 			.magic = XFS_ABTC_CRC_MAGIC,
351 		},
352 		[XREP_AGF_RMAPBT] = {
353 			.rmap_owner = XFS_RMAP_OWN_AG,
354 			.buf_ops = &xfs_rmapbt_buf_ops,
355 			.magic = XFS_RMAP_CRC_MAGIC,
356 		},
357 		[XREP_AGF_REFCOUNTBT] = {
358 			.rmap_owner = XFS_RMAP_OWN_REFC,
359 			.buf_ops = &xfs_refcountbt_buf_ops,
360 			.magic = XFS_REFC_CRC_MAGIC,
361 		},
362 		[XREP_AGF_END] = {
363 			.buf_ops = NULL,
364 		},
365 	};
366 	struct xfs_agf			old_agf;
367 	struct xfs_mount		*mp = sc->mp;
368 	struct xfs_buf			*agf_bp;
369 	struct xfs_buf			*agfl_bp;
370 	struct xfs_agf			*agf;
371 	int				error;
372 
373 	/* We require the rmapbt to rebuild anything. */
374 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
375 		return -EOPNOTSUPP;
376 
377 	xchk_perag_get(sc->mp, &sc->sa);
378 	/*
379 	 * Make sure we have the AGF buffer, as scrub might have decided it
380 	 * was corrupt after xfs_alloc_read_agf failed with -EFSCORRUPTED.
381 	 */
382 	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
383 			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGF_DADDR(mp)),
384 			XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL);
385 	if (error)
386 		return error;
387 	agf_bp->b_ops = &xfs_agf_buf_ops;
388 	agf = XFS_BUF_TO_AGF(agf_bp);
389 
390 	/*
391 	 * Load the AGFL so that we can screen out OWN_AG blocks that are on
392 	 * the AGFL now; these blocks might have once been part of the
393 	 * bno/cnt/rmap btrees but are not now.  This is a chicken and egg
394 	 * problem: the AGF is corrupt, so we have to trust the AGFL contents
395 	 * because we can't do any serious cross-referencing with any of the
396 	 * btrees rooted in the AGF.  If the AGFL contents are obviously bad
397 	 * then we'll bail out.
398 	 */
399 	error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.agno, &agfl_bp);
400 	if (error)
401 		return error;
402 
403 	/*
404 	 * Spot-check the AGFL blocks; if they're obviously corrupt then
405 	 * there's nothing we can do but bail out.
406 	 */
407 	error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(agf_bp), agfl_bp,
408 			xrep_agf_check_agfl_block, sc);
409 	if (error)
410 		return error;
411 
412 	/*
413 	 * Find the AGF btree roots.  This is also a chicken-and-egg situation;
414 	 * see the function for more details.
415 	 */
416 	error = xrep_agf_find_btrees(sc, agf_bp, fab, agfl_bp);
417 	if (error)
418 		return error;
419 
420 	/* Start rewriting the header and implant the btrees we found. */
421 	xrep_agf_init_header(sc, agf_bp, &old_agf);
422 	xrep_agf_set_roots(sc, agf, fab);
423 	error = xrep_agf_calc_from_btrees(sc, agf_bp);
424 	if (error)
425 		goto out_revert;
426 
427 	/* Commit the changes and reinitialize incore state. */
428 	return xrep_agf_commit_new(sc, agf_bp);
429 
430 out_revert:
431 	/* Mark the incore AGF state stale and revert the AGF. */
432 	sc->sa.pag->pagf_init = 0;
433 	memcpy(agf, &old_agf, sizeof(old_agf));
434 	return error;
435 }
436 
437 /* AGFL */
438 
439 struct xrep_agfl {
440 	/* Bitmap of other OWN_AG metadata blocks. */
441 	struct xfs_bitmap	agmetablocks;
442 
443 	/* Bitmap of free space. */
444 	struct xfs_bitmap	*freesp;
445 
446 	struct xfs_scrub	*sc;
447 };
448 
449 /* Record all OWN_AG (free space btree) information from the rmap data. */
450 STATIC int
451 xrep_agfl_walk_rmap(
452 	struct xfs_btree_cur	*cur,
453 	struct xfs_rmap_irec	*rec,
454 	void			*priv)
455 {
456 	struct xrep_agfl	*ra = priv;
457 	xfs_fsblock_t		fsb;
458 	int			error = 0;
459 
460 	if (xchk_should_terminate(ra->sc, &error))
461 		return error;
462 
463 	/* Record all the OWN_AG blocks. */
464 	if (rec->rm_owner == XFS_RMAP_OWN_AG) {
465 		fsb = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_private.a.agno,
466 				rec->rm_startblock);
467 		error = xfs_bitmap_set(ra->freesp, fsb, rec->rm_blockcount);
468 		if (error)
469 			return error;
470 	}
471 
472 	return xfs_bitmap_set_btcur_path(&ra->agmetablocks, cur);
473 }
474 
475 /*
476  * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce
477  * which blocks belong to the AGFL.
478  *
479  * Compute the set of old AGFL blocks by subtracting from the list of OWN_AG
480  * blocks the list of blocks owned by all other OWN_AG metadata (bnobt, cntbt,
481  * rmapbt).  These are the old AGFL blocks, so return that list and the number
482  * of blocks we're actually going to put back on the AGFL.
483  */
484 STATIC int
485 xrep_agfl_collect_blocks(
486 	struct xfs_scrub	*sc,
487 	struct xfs_buf		*agf_bp,
488 	struct xfs_bitmap	*agfl_extents,
489 	xfs_agblock_t		*flcount)
490 {
491 	struct xrep_agfl	ra;
492 	struct xfs_mount	*mp = sc->mp;
493 	struct xfs_btree_cur	*cur;
494 	struct xfs_bitmap_range	*br;
495 	struct xfs_bitmap_range	*n;
496 	int			error;
497 
498 	ra.sc = sc;
499 	ra.freesp = agfl_extents;
500 	xfs_bitmap_init(&ra.agmetablocks);
501 
502 	/* Find all space used by the free space btrees & rmapbt. */
503 	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno);
504 	error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra);
505 	if (error)
506 		goto err;
507 	xfs_btree_del_cursor(cur, error);
508 
509 	/* Find all blocks currently being used by the bnobt. */
510 	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
511 			XFS_BTNUM_BNO);
512 	error = xfs_bitmap_set_btblocks(&ra.agmetablocks, cur);
513 	if (error)
514 		goto err;
515 	xfs_btree_del_cursor(cur, error);
516 
517 	/* Find all blocks currently being used by the cntbt. */
518 	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.agno,
519 			XFS_BTNUM_CNT);
520 	error = xfs_bitmap_set_btblocks(&ra.agmetablocks, cur);
521 	if (error)
522 		goto err;
523 
524 	xfs_btree_del_cursor(cur, error);
525 
526 	/*
527 	 * Drop the freesp meta blocks that are in use by btrees.
528 	 * The remaining blocks /should/ be AGFL blocks.
529 	 */
530 	error = xfs_bitmap_disunion(agfl_extents, &ra.agmetablocks);
531 	xfs_bitmap_destroy(&ra.agmetablocks);
532 	if (error)
533 		return error;
534 
535 	/*
536 	 * Calculate the new AGFL size.  If we found more blocks than fit in
537 	 * the AGFL we'll free them later.
538 	 */
539 	*flcount = 0;
540 	for_each_xfs_bitmap_extent(br, n, agfl_extents) {
541 		*flcount += br->len;
542 		if (*flcount > xfs_agfl_size(mp))
543 			break;
544 	}
545 	if (*flcount > xfs_agfl_size(mp))
546 		*flcount = xfs_agfl_size(mp);
547 	return 0;
548 
549 err:
550 	xfs_bitmap_destroy(&ra.agmetablocks);
551 	xfs_btree_del_cursor(cur, error);
552 	return error;
553 }
554 
555 /* Update the AGF and reset the in-core state. */
556 STATIC void
557 xrep_agfl_update_agf(
558 	struct xfs_scrub	*sc,
559 	struct xfs_buf		*agf_bp,
560 	xfs_agblock_t		flcount)
561 {
562 	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agf_bp);
563 
564 	ASSERT(flcount <= xfs_agfl_size(sc->mp));
565 
566 	/* Trigger fdblocks recalculation */
567 	xfs_force_summary_recalc(sc->mp);
568 
569 	/* Update the AGF counters. */
570 	if (sc->sa.pag->pagf_init)
571 		sc->sa.pag->pagf_flcount = flcount;
572 	agf->agf_flfirst = cpu_to_be32(0);
573 	agf->agf_flcount = cpu_to_be32(flcount);
574 	agf->agf_fllast = cpu_to_be32(flcount - 1);
575 
576 	xfs_alloc_log_agf(sc->tp, agf_bp,
577 			XFS_AGF_FLFIRST | XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
578 }
579 
580 /* Write out a totally new AGFL. */
581 STATIC void
582 xrep_agfl_init_header(
583 	struct xfs_scrub	*sc,
584 	struct xfs_buf		*agfl_bp,
585 	struct xfs_bitmap	*agfl_extents,
586 	xfs_agblock_t		flcount)
587 {
588 	struct xfs_mount	*mp = sc->mp;
589 	__be32			*agfl_bno;
590 	struct xfs_bitmap_range	*br;
591 	struct xfs_bitmap_range	*n;
592 	struct xfs_agfl		*agfl;
593 	xfs_agblock_t		agbno;
594 	unsigned int		fl_off;
595 
596 	ASSERT(flcount <= xfs_agfl_size(mp));
597 
598 	/*
599 	 * Start rewriting the header by setting the bno[] array to
600 	 * NULLAGBLOCK, then setting AGFL header fields.
601 	 */
602 	agfl = XFS_BUF_TO_AGFL(agfl_bp);
603 	memset(agfl, 0xFF, BBTOB(agfl_bp->b_length));
604 	agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
605 	agfl->agfl_seqno = cpu_to_be32(sc->sa.agno);
606 	uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
607 
608 	/*
609 	 * Fill the AGFL with the remaining blocks.  If agfl_extents has more
610 	 * blocks than fit in the AGFL, they will be freed in a subsequent
611 	 * step.
612 	 */
613 	fl_off = 0;
614 	agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agfl_bp);
615 	for_each_xfs_bitmap_extent(br, n, agfl_extents) {
616 		agbno = XFS_FSB_TO_AGBNO(mp, br->start);
617 
618 		trace_xrep_agfl_insert(mp, sc->sa.agno, agbno, br->len);
619 
620 		while (br->len > 0 && fl_off < flcount) {
621 			agfl_bno[fl_off] = cpu_to_be32(agbno);
622 			fl_off++;
623 			agbno++;
624 
625 			/*
626 			 * We've now used br->start by putting it in the AGFL,
627 			 * so bump br so that we don't reap the block later.
628 			 */
629 			br->start++;
630 			br->len--;
631 		}
632 
633 		if (br->len)
634 			break;
635 		list_del(&br->list);
636 		kmem_free(br);
637 	}
638 
639 	/* Write new AGFL to disk. */
640 	xfs_trans_buf_set_type(sc->tp, agfl_bp, XFS_BLFT_AGFL_BUF);
641 	xfs_trans_log_buf(sc->tp, agfl_bp, 0, BBTOB(agfl_bp->b_length) - 1);
642 }
643 
644 /* Repair the AGFL. */
645 int
646 xrep_agfl(
647 	struct xfs_scrub	*sc)
648 {
649 	struct xfs_bitmap	agfl_extents;
650 	struct xfs_mount	*mp = sc->mp;
651 	struct xfs_buf		*agf_bp;
652 	struct xfs_buf		*agfl_bp;
653 	xfs_agblock_t		flcount;
654 	int			error;
655 
656 	/* We require the rmapbt to rebuild anything. */
657 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
658 		return -EOPNOTSUPP;
659 
660 	xchk_perag_get(sc->mp, &sc->sa);
661 	xfs_bitmap_init(&agfl_extents);
662 
663 	/*
664 	 * Read the AGF so that we can query the rmapbt.  We hope that there's
665 	 * nothing wrong with the AGF, but all the AG header repair functions
666 	 * have this chicken-and-egg problem.
667 	 */
668 	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
669 	if (error)
670 		return error;
671 	if (!agf_bp)
672 		return -ENOMEM;
673 
674 	/*
675 	 * Make sure we have the AGFL buffer, as scrub might have decided it
676 	 * was corrupt after xfs_alloc_read_agfl failed with -EFSCORRUPTED.
677 	 */
678 	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
679 			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGFL_DADDR(mp)),
680 			XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL);
681 	if (error)
682 		return error;
683 	agfl_bp->b_ops = &xfs_agfl_buf_ops;
684 
685 	/* Gather all the extents we're going to put on the new AGFL. */
686 	error = xrep_agfl_collect_blocks(sc, agf_bp, &agfl_extents, &flcount);
687 	if (error)
688 		goto err;
689 
690 	/*
691 	 * Update AGF and AGFL.  We reset the global free block counter when
692 	 * we adjust the AGF flcount (which can fail) so avoid updating any
693 	 * buffers until we know that part works.
694 	 */
695 	xrep_agfl_update_agf(sc, agf_bp, flcount);
696 	xrep_agfl_init_header(sc, agfl_bp, &agfl_extents, flcount);
697 
698 	/*
699 	 * Ok, the AGFL should be ready to go now.  Roll the transaction to
700 	 * make the new AGFL permanent before we start using it to return
701 	 * freespace overflow to the freespace btrees.
702 	 */
703 	sc->sa.agf_bp = agf_bp;
704 	sc->sa.agfl_bp = agfl_bp;
705 	error = xrep_roll_ag_trans(sc);
706 	if (error)
707 		goto err;
708 
709 	/* Dump any AGFL overflow. */
710 	return xrep_reap_extents(sc, &agfl_extents, &XFS_RMAP_OINFO_AG,
711 			XFS_AG_RESV_AGFL);
712 err:
713 	xfs_bitmap_destroy(&agfl_extents);
714 	return error;
715 }
716 
717 /* AGI */
718 
719 /*
720  * Offset within the xrep_find_ag_btree array for each btree type.  Avoid the
721  * XFS_BTNUM_ names here to avoid creating a sparse array.
722  */
723 enum {
724 	XREP_AGI_INOBT = 0,
725 	XREP_AGI_FINOBT,
726 	XREP_AGI_END,
727 	XREP_AGI_MAX
728 };
729 
730 /*
731  * Given the inode btree roots described by *fab, find the roots, check them
732  * for sanity, and pass the root data back out via *fab.
733  */
734 STATIC int
735 xrep_agi_find_btrees(
736 	struct xfs_scrub		*sc,
737 	struct xrep_find_ag_btree	*fab)
738 {
739 	struct xfs_buf			*agf_bp;
740 	struct xfs_mount		*mp = sc->mp;
741 	int				error;
742 
743 	/* Read the AGF. */
744 	error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
745 	if (error)
746 		return error;
747 	if (!agf_bp)
748 		return -ENOMEM;
749 
750 	/* Find the btree roots. */
751 	error = xrep_find_ag_btree_roots(sc, agf_bp, fab, NULL);
752 	if (error)
753 		return error;
754 
755 	/* We must find the inobt root. */
756 	if (!xrep_check_btree_root(sc, &fab[XREP_AGI_INOBT]))
757 		return -EFSCORRUPTED;
758 
759 	/* We must find the finobt root if that feature is enabled. */
760 	if (xfs_sb_version_hasfinobt(&mp->m_sb) &&
761 	    !xrep_check_btree_root(sc, &fab[XREP_AGI_FINOBT]))
762 		return -EFSCORRUPTED;
763 
764 	return 0;
765 }
766 
767 /*
768  * Reinitialize the AGI header, making an in-core copy of the old contents so
769  * that we know which in-core state needs to be reinitialized.
770  */
771 STATIC void
772 xrep_agi_init_header(
773 	struct xfs_scrub	*sc,
774 	struct xfs_buf		*agi_bp,
775 	struct xfs_agi		*old_agi)
776 {
777 	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agi_bp);
778 	struct xfs_mount	*mp = sc->mp;
779 
780 	memcpy(old_agi, agi, sizeof(*old_agi));
781 	memset(agi, 0, BBTOB(agi_bp->b_length));
782 	agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
783 	agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
784 	agi->agi_seqno = cpu_to_be32(sc->sa.agno);
785 	agi->agi_length = cpu_to_be32(xfs_ag_block_count(mp, sc->sa.agno));
786 	agi->agi_newino = cpu_to_be32(NULLAGINO);
787 	agi->agi_dirino = cpu_to_be32(NULLAGINO);
788 	if (xfs_sb_version_hascrc(&mp->m_sb))
789 		uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
790 
791 	/* We don't know how to fix the unlinked list yet. */
792 	memcpy(&agi->agi_unlinked, &old_agi->agi_unlinked,
793 			sizeof(agi->agi_unlinked));
794 
795 	/* Mark the incore AGF data stale until we're done fixing things. */
796 	ASSERT(sc->sa.pag->pagi_init);
797 	sc->sa.pag->pagi_init = 0;
798 }
799 
800 /* Set btree root information in an AGI. */
801 STATIC void
802 xrep_agi_set_roots(
803 	struct xfs_scrub		*sc,
804 	struct xfs_agi			*agi,
805 	struct xrep_find_ag_btree	*fab)
806 {
807 	agi->agi_root = cpu_to_be32(fab[XREP_AGI_INOBT].root);
808 	agi->agi_level = cpu_to_be32(fab[XREP_AGI_INOBT].height);
809 
810 	if (xfs_sb_version_hasfinobt(&sc->mp->m_sb)) {
811 		agi->agi_free_root = cpu_to_be32(fab[XREP_AGI_FINOBT].root);
812 		agi->agi_free_level = cpu_to_be32(fab[XREP_AGI_FINOBT].height);
813 	}
814 }
815 
816 /* Update the AGI counters. */
817 STATIC int
818 xrep_agi_calc_from_btrees(
819 	struct xfs_scrub	*sc,
820 	struct xfs_buf		*agi_bp)
821 {
822 	struct xfs_btree_cur	*cur;
823 	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agi_bp);
824 	struct xfs_mount	*mp = sc->mp;
825 	xfs_agino_t		count;
826 	xfs_agino_t		freecount;
827 	int			error;
828 
829 	cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno,
830 			XFS_BTNUM_INO);
831 	error = xfs_ialloc_count_inodes(cur, &count, &freecount);
832 	if (error)
833 		goto err;
834 	xfs_btree_del_cursor(cur, error);
835 
836 	agi->agi_count = cpu_to_be32(count);
837 	agi->agi_freecount = cpu_to_be32(freecount);
838 	return 0;
839 err:
840 	xfs_btree_del_cursor(cur, error);
841 	return error;
842 }
843 
844 /* Trigger reinitialization of the in-core data. */
845 STATIC int
846 xrep_agi_commit_new(
847 	struct xfs_scrub	*sc,
848 	struct xfs_buf		*agi_bp)
849 {
850 	struct xfs_perag	*pag;
851 	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agi_bp);
852 
853 	/* Trigger inode count recalculation */
854 	xfs_force_summary_recalc(sc->mp);
855 
856 	/* Write this to disk. */
857 	xfs_trans_buf_set_type(sc->tp, agi_bp, XFS_BLFT_AGI_BUF);
858 	xfs_trans_log_buf(sc->tp, agi_bp, 0, BBTOB(agi_bp->b_length) - 1);
859 
860 	/* Now reinitialize the in-core counters if necessary. */
861 	pag = sc->sa.pag;
862 	pag->pagi_count = be32_to_cpu(agi->agi_count);
863 	pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
864 	pag->pagi_init = 1;
865 
866 	return 0;
867 }
868 
869 /* Repair the AGI. */
870 int
871 xrep_agi(
872 	struct xfs_scrub		*sc)
873 {
874 	struct xrep_find_ag_btree	fab[XREP_AGI_MAX] = {
875 		[XREP_AGI_INOBT] = {
876 			.rmap_owner = XFS_RMAP_OWN_INOBT,
877 			.buf_ops = &xfs_inobt_buf_ops,
878 			.magic = XFS_IBT_CRC_MAGIC,
879 		},
880 		[XREP_AGI_FINOBT] = {
881 			.rmap_owner = XFS_RMAP_OWN_INOBT,
882 			.buf_ops = &xfs_inobt_buf_ops,
883 			.magic = XFS_FIBT_CRC_MAGIC,
884 		},
885 		[XREP_AGI_END] = {
886 			.buf_ops = NULL
887 		},
888 	};
889 	struct xfs_agi			old_agi;
890 	struct xfs_mount		*mp = sc->mp;
891 	struct xfs_buf			*agi_bp;
892 	struct xfs_agi			*agi;
893 	int				error;
894 
895 	/* We require the rmapbt to rebuild anything. */
896 	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
897 		return -EOPNOTSUPP;
898 
899 	xchk_perag_get(sc->mp, &sc->sa);
900 	/*
901 	 * Make sure we have the AGI buffer, as scrub might have decided it
902 	 * was corrupt after xfs_ialloc_read_agi failed with -EFSCORRUPTED.
903 	 */
904 	error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
905 			XFS_AG_DADDR(mp, sc->sa.agno, XFS_AGI_DADDR(mp)),
906 			XFS_FSS_TO_BB(mp, 1), 0, &agi_bp, NULL);
907 	if (error)
908 		return error;
909 	agi_bp->b_ops = &xfs_agi_buf_ops;
910 	agi = XFS_BUF_TO_AGI(agi_bp);
911 
912 	/* Find the AGI btree roots. */
913 	error = xrep_agi_find_btrees(sc, fab);
914 	if (error)
915 		return error;
916 
917 	/* Start rewriting the header and implant the btrees we found. */
918 	xrep_agi_init_header(sc, agi_bp, &old_agi);
919 	xrep_agi_set_roots(sc, agi, fab);
920 	error = xrep_agi_calc_from_btrees(sc, agi_bp);
921 	if (error)
922 		goto out_revert;
923 
924 	/* Reinitialize in-core state. */
925 	return xrep_agi_commit_new(sc, agi_bp);
926 
927 out_revert:
928 	/* Mark the incore AGI state stale and revert the AGI. */
929 	sc->sa.pag->pagi_init = 0;
930 	memcpy(agi, &old_agi, sizeof(old_agi));
931 	return error;
932 }
933