xref: /openbmc/linux/fs/xfs/xfs_health.c (revision ae213c44)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2019 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_da_format.h"
17 #include "xfs_da_btree.h"
18 #include "xfs_inode.h"
19 #include "xfs_trace.h"
20 #include "xfs_health.h"
21 
22 /*
23  * Warn about metadata corruption that we detected but haven't fixed, and
24  * make sure we're not sitting on anything that would get in the way of
25  * recovery.
26  */
27 void
28 xfs_health_unmount(
29 	struct xfs_mount	*mp)
30 {
31 	struct xfs_perag	*pag;
32 	xfs_agnumber_t		agno;
33 	unsigned int		sick = 0;
34 	unsigned int		checked = 0;
35 	bool			warn = false;
36 
37 	if (XFS_FORCED_SHUTDOWN(mp))
38 		return;
39 
40 	/* Measure AG corruption levels. */
41 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
42 		pag = xfs_perag_get(mp, agno);
43 		xfs_ag_measure_sickness(pag, &sick, &checked);
44 		if (sick) {
45 			trace_xfs_ag_unfixed_corruption(mp, agno, sick);
46 			warn = true;
47 		}
48 		xfs_perag_put(pag);
49 	}
50 
51 	/* Measure realtime volume corruption levels. */
52 	xfs_rt_measure_sickness(mp, &sick, &checked);
53 	if (sick) {
54 		trace_xfs_rt_unfixed_corruption(mp, sick);
55 		warn = true;
56 	}
57 
58 	/*
59 	 * Measure fs corruption and keep the sample around for the warning.
60 	 * See the note below for why we exempt FS_COUNTERS.
61 	 */
62 	xfs_fs_measure_sickness(mp, &sick, &checked);
63 	if (sick & ~XFS_SICK_FS_COUNTERS) {
64 		trace_xfs_fs_unfixed_corruption(mp, sick);
65 		warn = true;
66 	}
67 
68 	if (warn) {
69 		xfs_warn(mp,
70 "Uncorrected metadata errors detected; please run xfs_repair.");
71 
72 		/*
73 		 * We discovered uncorrected metadata problems at some point
74 		 * during this filesystem mount and have advised the
75 		 * administrator to run repair once the unmount completes.
76 		 *
77 		 * However, we must be careful -- when FSCOUNTERS are flagged
78 		 * unhealthy, the unmount procedure omits writing the clean
79 		 * unmount record to the log so that the next mount will run
80 		 * recovery and recompute the summary counters.  In other
81 		 * words, we leave a dirty log to get the counters fixed.
82 		 *
83 		 * Unfortunately, xfs_repair cannot recover dirty logs, so if
84 		 * there were filesystem problems, FSCOUNTERS was flagged, and
85 		 * the administrator takes our advice to run xfs_repair,
86 		 * they'll have to zap the log before repairing structures.
87 		 * We don't really want to encourage this, so we mark the
88 		 * FSCOUNTERS healthy so that a subsequent repair run won't see
89 		 * a dirty log.
90 		 */
91 		if (sick & XFS_SICK_FS_COUNTERS)
92 			xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
93 	}
94 }
95 
96 /* Mark unhealthy per-fs metadata. */
97 void
98 xfs_fs_mark_sick(
99 	struct xfs_mount	*mp,
100 	unsigned int		mask)
101 {
102 	ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
103 	trace_xfs_fs_mark_sick(mp, mask);
104 
105 	spin_lock(&mp->m_sb_lock);
106 	mp->m_fs_sick |= mask;
107 	mp->m_fs_checked |= mask;
108 	spin_unlock(&mp->m_sb_lock);
109 }
110 
111 /* Mark a per-fs metadata healed. */
112 void
113 xfs_fs_mark_healthy(
114 	struct xfs_mount	*mp,
115 	unsigned int		mask)
116 {
117 	ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
118 	trace_xfs_fs_mark_healthy(mp, mask);
119 
120 	spin_lock(&mp->m_sb_lock);
121 	mp->m_fs_sick &= ~mask;
122 	mp->m_fs_checked |= mask;
123 	spin_unlock(&mp->m_sb_lock);
124 }
125 
126 /* Sample which per-fs metadata are unhealthy. */
127 void
128 xfs_fs_measure_sickness(
129 	struct xfs_mount	*mp,
130 	unsigned int		*sick,
131 	unsigned int		*checked)
132 {
133 	spin_lock(&mp->m_sb_lock);
134 	*sick = mp->m_fs_sick;
135 	*checked = mp->m_fs_checked;
136 	spin_unlock(&mp->m_sb_lock);
137 }
138 
139 /* Mark unhealthy realtime metadata. */
140 void
141 xfs_rt_mark_sick(
142 	struct xfs_mount	*mp,
143 	unsigned int		mask)
144 {
145 	ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
146 	trace_xfs_rt_mark_sick(mp, mask);
147 
148 	spin_lock(&mp->m_sb_lock);
149 	mp->m_rt_sick |= mask;
150 	mp->m_rt_checked |= mask;
151 	spin_unlock(&mp->m_sb_lock);
152 }
153 
154 /* Mark a realtime metadata healed. */
155 void
156 xfs_rt_mark_healthy(
157 	struct xfs_mount	*mp,
158 	unsigned int		mask)
159 {
160 	ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
161 	trace_xfs_rt_mark_healthy(mp, mask);
162 
163 	spin_lock(&mp->m_sb_lock);
164 	mp->m_rt_sick &= ~mask;
165 	mp->m_rt_checked |= mask;
166 	spin_unlock(&mp->m_sb_lock);
167 }
168 
169 /* Sample which realtime metadata are unhealthy. */
170 void
171 xfs_rt_measure_sickness(
172 	struct xfs_mount	*mp,
173 	unsigned int		*sick,
174 	unsigned int		*checked)
175 {
176 	spin_lock(&mp->m_sb_lock);
177 	*sick = mp->m_rt_sick;
178 	*checked = mp->m_rt_checked;
179 	spin_unlock(&mp->m_sb_lock);
180 }
181 
182 /* Mark unhealthy per-ag metadata. */
183 void
184 xfs_ag_mark_sick(
185 	struct xfs_perag	*pag,
186 	unsigned int		mask)
187 {
188 	ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
189 	trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);
190 
191 	spin_lock(&pag->pag_state_lock);
192 	pag->pag_sick |= mask;
193 	pag->pag_checked |= mask;
194 	spin_unlock(&pag->pag_state_lock);
195 }
196 
197 /* Mark per-ag metadata ok. */
198 void
199 xfs_ag_mark_healthy(
200 	struct xfs_perag	*pag,
201 	unsigned int		mask)
202 {
203 	ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
204 	trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);
205 
206 	spin_lock(&pag->pag_state_lock);
207 	pag->pag_sick &= ~mask;
208 	pag->pag_checked |= mask;
209 	spin_unlock(&pag->pag_state_lock);
210 }
211 
212 /* Sample which per-ag metadata are unhealthy. */
213 void
214 xfs_ag_measure_sickness(
215 	struct xfs_perag	*pag,
216 	unsigned int		*sick,
217 	unsigned int		*checked)
218 {
219 	spin_lock(&pag->pag_state_lock);
220 	*sick = pag->pag_sick;
221 	*checked = pag->pag_checked;
222 	spin_unlock(&pag->pag_state_lock);
223 }
224 
225 /* Mark the unhealthy parts of an inode. */
226 void
227 xfs_inode_mark_sick(
228 	struct xfs_inode	*ip,
229 	unsigned int		mask)
230 {
231 	ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
232 	trace_xfs_inode_mark_sick(ip, mask);
233 
234 	spin_lock(&ip->i_flags_lock);
235 	ip->i_sick |= mask;
236 	ip->i_checked |= mask;
237 	spin_unlock(&ip->i_flags_lock);
238 }
239 
240 /* Mark parts of an inode healed. */
241 void
242 xfs_inode_mark_healthy(
243 	struct xfs_inode	*ip,
244 	unsigned int		mask)
245 {
246 	ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
247 	trace_xfs_inode_mark_healthy(ip, mask);
248 
249 	spin_lock(&ip->i_flags_lock);
250 	ip->i_sick &= ~mask;
251 	ip->i_checked |= mask;
252 	spin_unlock(&ip->i_flags_lock);
253 }
254 
255 /* Sample which parts of an inode are unhealthy. */
256 void
257 xfs_inode_measure_sickness(
258 	struct xfs_inode	*ip,
259 	unsigned int		*sick,
260 	unsigned int		*checked)
261 {
262 	spin_lock(&ip->i_flags_lock);
263 	*sick = ip->i_sick;
264 	*checked = ip->i_checked;
265 	spin_unlock(&ip->i_flags_lock);
266 }
267 
268 /* Mappings between internal sick masks and ioctl sick masks. */
269 
270 struct ioctl_sick_map {
271 	unsigned int		sick_mask;
272 	unsigned int		ioctl_mask;
273 };
274 
275 static const struct ioctl_sick_map fs_map[] = {
276 	{ XFS_SICK_FS_COUNTERS,	XFS_FSOP_GEOM_SICK_COUNTERS},
277 	{ XFS_SICK_FS_UQUOTA,	XFS_FSOP_GEOM_SICK_UQUOTA },
278 	{ XFS_SICK_FS_GQUOTA,	XFS_FSOP_GEOM_SICK_GQUOTA },
279 	{ XFS_SICK_FS_PQUOTA,	XFS_FSOP_GEOM_SICK_PQUOTA },
280 	{ 0, 0 },
281 };
282 
283 static const struct ioctl_sick_map rt_map[] = {
284 	{ XFS_SICK_RT_BITMAP,	XFS_FSOP_GEOM_SICK_RT_BITMAP },
285 	{ XFS_SICK_RT_SUMMARY,	XFS_FSOP_GEOM_SICK_RT_SUMMARY },
286 	{ 0, 0 },
287 };
288 
289 static inline void
290 xfgeo_health_tick(
291 	struct xfs_fsop_geom		*geo,
292 	unsigned int			sick,
293 	unsigned int			checked,
294 	const struct ioctl_sick_map	*m)
295 {
296 	if (checked & m->sick_mask)
297 		geo->checked |= m->ioctl_mask;
298 	if (sick & m->sick_mask)
299 		geo->sick |= m->ioctl_mask;
300 }
301 
302 /* Fill out fs geometry health info. */
303 void
304 xfs_fsop_geom_health(
305 	struct xfs_mount		*mp,
306 	struct xfs_fsop_geom		*geo)
307 {
308 	const struct ioctl_sick_map	*m;
309 	unsigned int			sick;
310 	unsigned int			checked;
311 
312 	geo->sick = 0;
313 	geo->checked = 0;
314 
315 	xfs_fs_measure_sickness(mp, &sick, &checked);
316 	for (m = fs_map; m->sick_mask; m++)
317 		xfgeo_health_tick(geo, sick, checked, m);
318 
319 	xfs_rt_measure_sickness(mp, &sick, &checked);
320 	for (m = rt_map; m->sick_mask; m++)
321 		xfgeo_health_tick(geo, sick, checked, m);
322 }
323 
324 static const struct ioctl_sick_map ag_map[] = {
325 	{ XFS_SICK_AG_SB,	XFS_AG_GEOM_SICK_SB },
326 	{ XFS_SICK_AG_AGF,	XFS_AG_GEOM_SICK_AGF },
327 	{ XFS_SICK_AG_AGFL,	XFS_AG_GEOM_SICK_AGFL },
328 	{ XFS_SICK_AG_AGI,	XFS_AG_GEOM_SICK_AGI },
329 	{ XFS_SICK_AG_BNOBT,	XFS_AG_GEOM_SICK_BNOBT },
330 	{ XFS_SICK_AG_CNTBT,	XFS_AG_GEOM_SICK_CNTBT },
331 	{ XFS_SICK_AG_INOBT,	XFS_AG_GEOM_SICK_INOBT },
332 	{ XFS_SICK_AG_FINOBT,	XFS_AG_GEOM_SICK_FINOBT },
333 	{ XFS_SICK_AG_RMAPBT,	XFS_AG_GEOM_SICK_RMAPBT },
334 	{ XFS_SICK_AG_REFCNTBT,	XFS_AG_GEOM_SICK_REFCNTBT },
335 	{ 0, 0 },
336 };
337 
338 /* Fill out ag geometry health info. */
339 void
340 xfs_ag_geom_health(
341 	struct xfs_perag		*pag,
342 	struct xfs_ag_geometry		*ageo)
343 {
344 	const struct ioctl_sick_map	*m;
345 	unsigned int			sick;
346 	unsigned int			checked;
347 
348 	ageo->ag_sick = 0;
349 	ageo->ag_checked = 0;
350 
351 	xfs_ag_measure_sickness(pag, &sick, &checked);
352 	for (m = ag_map; m->sick_mask; m++) {
353 		if (checked & m->sick_mask)
354 			ageo->ag_checked |= m->ioctl_mask;
355 		if (sick & m->sick_mask)
356 			ageo->ag_sick |= m->ioctl_mask;
357 	}
358 }
359 
360 static const struct ioctl_sick_map ino_map[] = {
361 	{ XFS_SICK_INO_CORE,	XFS_BS_SICK_INODE },
362 	{ XFS_SICK_INO_BMBTD,	XFS_BS_SICK_BMBTD },
363 	{ XFS_SICK_INO_BMBTA,	XFS_BS_SICK_BMBTA },
364 	{ XFS_SICK_INO_BMBTC,	XFS_BS_SICK_BMBTC },
365 	{ XFS_SICK_INO_DIR,	XFS_BS_SICK_DIR },
366 	{ XFS_SICK_INO_XATTR,	XFS_BS_SICK_XATTR },
367 	{ XFS_SICK_INO_SYMLINK,	XFS_BS_SICK_SYMLINK },
368 	{ XFS_SICK_INO_PARENT,	XFS_BS_SICK_PARENT },
369 	{ 0, 0 },
370 };
371 
372 /* Fill out bulkstat health info. */
373 void
374 xfs_bulkstat_health(
375 	struct xfs_inode		*ip,
376 	struct xfs_bstat		*bs)
377 {
378 	const struct ioctl_sick_map	*m;
379 	unsigned int			sick;
380 	unsigned int			checked;
381 
382 	bs->bs_sick = 0;
383 	bs->bs_checked = 0;
384 
385 	xfs_inode_measure_sickness(ip, &sick, &checked);
386 	for (m = ino_map; m->sick_mask; m++) {
387 		if (checked & m->sick_mask)
388 			bs->bs_checked |= m->ioctl_mask;
389 		if (sick & m->sick_mask)
390 			bs->bs_sick |= m->ioctl_mask;
391 	}
392 }
393