xref: /openbmc/linux/fs/xfs/xfs_health.c (revision 74be2d3b)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * Copyright (C) 2019 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <darrick.wong@oracle.com>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_sb.h"
13 #include "xfs_mount.h"
14 #include "xfs_inode.h"
15 #include "xfs_trace.h"
16 #include "xfs_health.h"
17 
18 /*
19  * Warn about metadata corruption that we detected but haven't fixed, and
20  * make sure we're not sitting on anything that would get in the way of
21  * recovery.
22  */
23 void
24 xfs_health_unmount(
25 	struct xfs_mount	*mp)
26 {
27 	struct xfs_perag	*pag;
28 	xfs_agnumber_t		agno;
29 	unsigned int		sick = 0;
30 	unsigned int		checked = 0;
31 	bool			warn = false;
32 
33 	if (XFS_FORCED_SHUTDOWN(mp))
34 		return;
35 
36 	/* Measure AG corruption levels. */
37 	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
38 		pag = xfs_perag_get(mp, agno);
39 		xfs_ag_measure_sickness(pag, &sick, &checked);
40 		if (sick) {
41 			trace_xfs_ag_unfixed_corruption(mp, agno, sick);
42 			warn = true;
43 		}
44 		xfs_perag_put(pag);
45 	}
46 
47 	/* Measure realtime volume corruption levels. */
48 	xfs_rt_measure_sickness(mp, &sick, &checked);
49 	if (sick) {
50 		trace_xfs_rt_unfixed_corruption(mp, sick);
51 		warn = true;
52 	}
53 
54 	/*
55 	 * Measure fs corruption and keep the sample around for the warning.
56 	 * See the note below for why we exempt FS_COUNTERS.
57 	 */
58 	xfs_fs_measure_sickness(mp, &sick, &checked);
59 	if (sick & ~XFS_SICK_FS_COUNTERS) {
60 		trace_xfs_fs_unfixed_corruption(mp, sick);
61 		warn = true;
62 	}
63 
64 	if (warn) {
65 		xfs_warn(mp,
66 "Uncorrected metadata errors detected; please run xfs_repair.");
67 
68 		/*
69 		 * We discovered uncorrected metadata problems at some point
70 		 * during this filesystem mount and have advised the
71 		 * administrator to run repair once the unmount completes.
72 		 *
73 		 * However, we must be careful -- when FSCOUNTERS are flagged
74 		 * unhealthy, the unmount procedure omits writing the clean
75 		 * unmount record to the log so that the next mount will run
76 		 * recovery and recompute the summary counters.  In other
77 		 * words, we leave a dirty log to get the counters fixed.
78 		 *
79 		 * Unfortunately, xfs_repair cannot recover dirty logs, so if
80 		 * there were filesystem problems, FSCOUNTERS was flagged, and
81 		 * the administrator takes our advice to run xfs_repair,
82 		 * they'll have to zap the log before repairing structures.
83 		 * We don't really want to encourage this, so we mark the
84 		 * FSCOUNTERS healthy so that a subsequent repair run won't see
85 		 * a dirty log.
86 		 */
87 		if (sick & XFS_SICK_FS_COUNTERS)
88 			xfs_fs_mark_healthy(mp, XFS_SICK_FS_COUNTERS);
89 	}
90 }
91 
92 /* Mark unhealthy per-fs metadata. */
93 void
94 xfs_fs_mark_sick(
95 	struct xfs_mount	*mp,
96 	unsigned int		mask)
97 {
98 	ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
99 	trace_xfs_fs_mark_sick(mp, mask);
100 
101 	spin_lock(&mp->m_sb_lock);
102 	mp->m_fs_sick |= mask;
103 	mp->m_fs_checked |= mask;
104 	spin_unlock(&mp->m_sb_lock);
105 }
106 
107 /* Mark a per-fs metadata healed. */
108 void
109 xfs_fs_mark_healthy(
110 	struct xfs_mount	*mp,
111 	unsigned int		mask)
112 {
113 	ASSERT(!(mask & ~XFS_SICK_FS_PRIMARY));
114 	trace_xfs_fs_mark_healthy(mp, mask);
115 
116 	spin_lock(&mp->m_sb_lock);
117 	mp->m_fs_sick &= ~mask;
118 	mp->m_fs_checked |= mask;
119 	spin_unlock(&mp->m_sb_lock);
120 }
121 
122 /* Sample which per-fs metadata are unhealthy. */
123 void
124 xfs_fs_measure_sickness(
125 	struct xfs_mount	*mp,
126 	unsigned int		*sick,
127 	unsigned int		*checked)
128 {
129 	spin_lock(&mp->m_sb_lock);
130 	*sick = mp->m_fs_sick;
131 	*checked = mp->m_fs_checked;
132 	spin_unlock(&mp->m_sb_lock);
133 }
134 
135 /* Mark unhealthy realtime metadata. */
136 void
137 xfs_rt_mark_sick(
138 	struct xfs_mount	*mp,
139 	unsigned int		mask)
140 {
141 	ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
142 	trace_xfs_rt_mark_sick(mp, mask);
143 
144 	spin_lock(&mp->m_sb_lock);
145 	mp->m_rt_sick |= mask;
146 	mp->m_rt_checked |= mask;
147 	spin_unlock(&mp->m_sb_lock);
148 }
149 
150 /* Mark a realtime metadata healed. */
151 void
152 xfs_rt_mark_healthy(
153 	struct xfs_mount	*mp,
154 	unsigned int		mask)
155 {
156 	ASSERT(!(mask & ~XFS_SICK_RT_PRIMARY));
157 	trace_xfs_rt_mark_healthy(mp, mask);
158 
159 	spin_lock(&mp->m_sb_lock);
160 	mp->m_rt_sick &= ~mask;
161 	mp->m_rt_checked |= mask;
162 	spin_unlock(&mp->m_sb_lock);
163 }
164 
165 /* Sample which realtime metadata are unhealthy. */
166 void
167 xfs_rt_measure_sickness(
168 	struct xfs_mount	*mp,
169 	unsigned int		*sick,
170 	unsigned int		*checked)
171 {
172 	spin_lock(&mp->m_sb_lock);
173 	*sick = mp->m_rt_sick;
174 	*checked = mp->m_rt_checked;
175 	spin_unlock(&mp->m_sb_lock);
176 }
177 
178 /* Mark unhealthy per-ag metadata. */
179 void
180 xfs_ag_mark_sick(
181 	struct xfs_perag	*pag,
182 	unsigned int		mask)
183 {
184 	ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
185 	trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);
186 
187 	spin_lock(&pag->pag_state_lock);
188 	pag->pag_sick |= mask;
189 	pag->pag_checked |= mask;
190 	spin_unlock(&pag->pag_state_lock);
191 }
192 
193 /* Mark per-ag metadata ok. */
194 void
195 xfs_ag_mark_healthy(
196 	struct xfs_perag	*pag,
197 	unsigned int		mask)
198 {
199 	ASSERT(!(mask & ~XFS_SICK_AG_PRIMARY));
200 	trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);
201 
202 	spin_lock(&pag->pag_state_lock);
203 	pag->pag_sick &= ~mask;
204 	pag->pag_checked |= mask;
205 	spin_unlock(&pag->pag_state_lock);
206 }
207 
208 /* Sample which per-ag metadata are unhealthy. */
209 void
210 xfs_ag_measure_sickness(
211 	struct xfs_perag	*pag,
212 	unsigned int		*sick,
213 	unsigned int		*checked)
214 {
215 	spin_lock(&pag->pag_state_lock);
216 	*sick = pag->pag_sick;
217 	*checked = pag->pag_checked;
218 	spin_unlock(&pag->pag_state_lock);
219 }
220 
221 /* Mark the unhealthy parts of an inode. */
222 void
223 xfs_inode_mark_sick(
224 	struct xfs_inode	*ip,
225 	unsigned int		mask)
226 {
227 	ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
228 	trace_xfs_inode_mark_sick(ip, mask);
229 
230 	spin_lock(&ip->i_flags_lock);
231 	ip->i_sick |= mask;
232 	ip->i_checked |= mask;
233 	spin_unlock(&ip->i_flags_lock);
234 }
235 
236 /* Mark parts of an inode healed. */
237 void
238 xfs_inode_mark_healthy(
239 	struct xfs_inode	*ip,
240 	unsigned int		mask)
241 {
242 	ASSERT(!(mask & ~XFS_SICK_INO_PRIMARY));
243 	trace_xfs_inode_mark_healthy(ip, mask);
244 
245 	spin_lock(&ip->i_flags_lock);
246 	ip->i_sick &= ~mask;
247 	ip->i_checked |= mask;
248 	spin_unlock(&ip->i_flags_lock);
249 }
250 
251 /* Sample which parts of an inode are unhealthy. */
252 void
253 xfs_inode_measure_sickness(
254 	struct xfs_inode	*ip,
255 	unsigned int		*sick,
256 	unsigned int		*checked)
257 {
258 	spin_lock(&ip->i_flags_lock);
259 	*sick = ip->i_sick;
260 	*checked = ip->i_checked;
261 	spin_unlock(&ip->i_flags_lock);
262 }
263 
264 /* Mappings between internal sick masks and ioctl sick masks. */
265 
266 struct ioctl_sick_map {
267 	unsigned int		sick_mask;
268 	unsigned int		ioctl_mask;
269 };
270 
271 static const struct ioctl_sick_map fs_map[] = {
272 	{ XFS_SICK_FS_COUNTERS,	XFS_FSOP_GEOM_SICK_COUNTERS},
273 	{ XFS_SICK_FS_UQUOTA,	XFS_FSOP_GEOM_SICK_UQUOTA },
274 	{ XFS_SICK_FS_GQUOTA,	XFS_FSOP_GEOM_SICK_GQUOTA },
275 	{ XFS_SICK_FS_PQUOTA,	XFS_FSOP_GEOM_SICK_PQUOTA },
276 	{ 0, 0 },
277 };
278 
279 static const struct ioctl_sick_map rt_map[] = {
280 	{ XFS_SICK_RT_BITMAP,	XFS_FSOP_GEOM_SICK_RT_BITMAP },
281 	{ XFS_SICK_RT_SUMMARY,	XFS_FSOP_GEOM_SICK_RT_SUMMARY },
282 	{ 0, 0 },
283 };
284 
285 static inline void
286 xfgeo_health_tick(
287 	struct xfs_fsop_geom		*geo,
288 	unsigned int			sick,
289 	unsigned int			checked,
290 	const struct ioctl_sick_map	*m)
291 {
292 	if (checked & m->sick_mask)
293 		geo->checked |= m->ioctl_mask;
294 	if (sick & m->sick_mask)
295 		geo->sick |= m->ioctl_mask;
296 }
297 
298 /* Fill out fs geometry health info. */
299 void
300 xfs_fsop_geom_health(
301 	struct xfs_mount		*mp,
302 	struct xfs_fsop_geom		*geo)
303 {
304 	const struct ioctl_sick_map	*m;
305 	unsigned int			sick;
306 	unsigned int			checked;
307 
308 	geo->sick = 0;
309 	geo->checked = 0;
310 
311 	xfs_fs_measure_sickness(mp, &sick, &checked);
312 	for (m = fs_map; m->sick_mask; m++)
313 		xfgeo_health_tick(geo, sick, checked, m);
314 
315 	xfs_rt_measure_sickness(mp, &sick, &checked);
316 	for (m = rt_map; m->sick_mask; m++)
317 		xfgeo_health_tick(geo, sick, checked, m);
318 }
319 
320 static const struct ioctl_sick_map ag_map[] = {
321 	{ XFS_SICK_AG_SB,	XFS_AG_GEOM_SICK_SB },
322 	{ XFS_SICK_AG_AGF,	XFS_AG_GEOM_SICK_AGF },
323 	{ XFS_SICK_AG_AGFL,	XFS_AG_GEOM_SICK_AGFL },
324 	{ XFS_SICK_AG_AGI,	XFS_AG_GEOM_SICK_AGI },
325 	{ XFS_SICK_AG_BNOBT,	XFS_AG_GEOM_SICK_BNOBT },
326 	{ XFS_SICK_AG_CNTBT,	XFS_AG_GEOM_SICK_CNTBT },
327 	{ XFS_SICK_AG_INOBT,	XFS_AG_GEOM_SICK_INOBT },
328 	{ XFS_SICK_AG_FINOBT,	XFS_AG_GEOM_SICK_FINOBT },
329 	{ XFS_SICK_AG_RMAPBT,	XFS_AG_GEOM_SICK_RMAPBT },
330 	{ XFS_SICK_AG_REFCNTBT,	XFS_AG_GEOM_SICK_REFCNTBT },
331 	{ 0, 0 },
332 };
333 
334 /* Fill out ag geometry health info. */
335 void
336 xfs_ag_geom_health(
337 	struct xfs_perag		*pag,
338 	struct xfs_ag_geometry		*ageo)
339 {
340 	const struct ioctl_sick_map	*m;
341 	unsigned int			sick;
342 	unsigned int			checked;
343 
344 	ageo->ag_sick = 0;
345 	ageo->ag_checked = 0;
346 
347 	xfs_ag_measure_sickness(pag, &sick, &checked);
348 	for (m = ag_map; m->sick_mask; m++) {
349 		if (checked & m->sick_mask)
350 			ageo->ag_checked |= m->ioctl_mask;
351 		if (sick & m->sick_mask)
352 			ageo->ag_sick |= m->ioctl_mask;
353 	}
354 }
355 
356 static const struct ioctl_sick_map ino_map[] = {
357 	{ XFS_SICK_INO_CORE,	XFS_BS_SICK_INODE },
358 	{ XFS_SICK_INO_BMBTD,	XFS_BS_SICK_BMBTD },
359 	{ XFS_SICK_INO_BMBTA,	XFS_BS_SICK_BMBTA },
360 	{ XFS_SICK_INO_BMBTC,	XFS_BS_SICK_BMBTC },
361 	{ XFS_SICK_INO_DIR,	XFS_BS_SICK_DIR },
362 	{ XFS_SICK_INO_XATTR,	XFS_BS_SICK_XATTR },
363 	{ XFS_SICK_INO_SYMLINK,	XFS_BS_SICK_SYMLINK },
364 	{ XFS_SICK_INO_PARENT,	XFS_BS_SICK_PARENT },
365 	{ 0, 0 },
366 };
367 
368 /* Fill out bulkstat health info. */
369 void
370 xfs_bulkstat_health(
371 	struct xfs_inode		*ip,
372 	struct xfs_bulkstat		*bs)
373 {
374 	const struct ioctl_sick_map	*m;
375 	unsigned int			sick;
376 	unsigned int			checked;
377 
378 	bs->bs_sick = 0;
379 	bs->bs_checked = 0;
380 
381 	xfs_inode_measure_sickness(ip, &sick, &checked);
382 	for (m = ino_map; m->sick_mask; m++) {
383 		if (checked & m->sick_mask)
384 			bs->bs_checked |= m->ioctl_mask;
385 		if (sick & m->sick_mask)
386 			bs->bs_sick |= m->ioctl_mask;
387 	}
388 }
389