xref: /openbmc/linux/fs/gfs2/util.c (revision 0bf49ffb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
5  */
6 
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 
9 #include <linux/spinlock.h>
10 #include <linux/completion.h>
11 #include <linux/buffer_head.h>
12 #include <linux/crc32.h>
13 #include <linux/gfs2_ondisk.h>
14 #include <linux/delay.h>
15 #include <linux/uaccess.h>
16 
17 #include "gfs2.h"
18 #include "incore.h"
19 #include "glock.h"
20 #include "glops.h"
21 #include "log.h"
22 #include "lops.h"
23 #include "recovery.h"
24 #include "rgrp.h"
25 #include "super.h"
26 #include "util.h"
27 
28 struct kmem_cache *gfs2_glock_cachep __read_mostly;
29 struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly;
30 struct kmem_cache *gfs2_inode_cachep __read_mostly;
31 struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
32 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly;
33 struct kmem_cache *gfs2_quotad_cachep __read_mostly;
34 struct kmem_cache *gfs2_qadata_cachep __read_mostly;
35 struct kmem_cache *gfs2_trans_cachep __read_mostly;
36 mempool_t *gfs2_page_pool __read_mostly;
37 
38 void gfs2_assert_i(struct gfs2_sbd *sdp)
39 {
40 	fs_emerg(sdp, "fatal assertion failed\n");
41 }
42 
43 /**
44  * check_journal_clean - Make sure a journal is clean for a spectator mount
45  * @sdp: The GFS2 superblock
46  * @jd: The journal descriptor
47  *
48  * Returns: 0 if the journal is clean or locked, else an error
49  */
50 int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
51 			bool verbose)
52 {
53 	int error;
54 	struct gfs2_holder j_gh;
55 	struct gfs2_log_header_host head;
56 	struct gfs2_inode *ip;
57 
58 	ip = GFS2_I(jd->jd_inode);
59 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
60 				   GL_EXACT | GL_NOCACHE, &j_gh);
61 	if (error) {
62 		if (verbose)
63 			fs_err(sdp, "Error %d locking journal for spectator "
64 			       "mount.\n", error);
65 		return -EPERM;
66 	}
67 	error = gfs2_jdesc_check(jd);
68 	if (error) {
69 		if (verbose)
70 			fs_err(sdp, "Error checking journal for spectator "
71 			       "mount.\n");
72 		goto out_unlock;
73 	}
74 	error = gfs2_find_jhead(jd, &head, false);
75 	if (error) {
76 		if (verbose)
77 			fs_err(sdp, "Error parsing journal for spectator "
78 			       "mount.\n");
79 		goto out_unlock;
80 	}
81 	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
82 		error = -EPERM;
83 		if (verbose)
84 			fs_err(sdp, "jid=%u: Journal is dirty, so the first "
85 			       "mounter must not be a spectator.\n",
86 			       jd->jd_jid);
87 	}
88 
89 out_unlock:
90 	gfs2_glock_dq_uninit(&j_gh);
91 	return error;
92 }
93 
94 /**
95  * gfs2_freeze_lock - hold the freeze glock
96  * @sdp: the superblock
97  * @freeze_gh: pointer to the requested holder
98  * @caller_flags: any additional flags needed by the caller
99  */
100 int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
101 		     int caller_flags)
102 {
103 	int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
104 	int error;
105 
106 	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
107 				   freeze_gh);
108 	if (error && error != GLR_TRYFAILED)
109 		fs_err(sdp, "can't lock the freeze lock: %d\n", error);
110 	return error;
111 }
112 
113 void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
114 {
115 	if (gfs2_holder_initialized(freeze_gh))
116 		gfs2_glock_dq_uninit(freeze_gh);
117 }
118 
119 static void signal_our_withdraw(struct gfs2_sbd *sdp)
120 {
121 	struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
122 	struct inode *inode = sdp->sd_jdesc->jd_inode;
123 	struct gfs2_inode *ip = GFS2_I(inode);
124 	struct gfs2_glock *i_gl = ip->i_gl;
125 	u64 no_formal_ino = ip->i_no_formal_ino;
126 	int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
127 	int ret = 0;
128 	int tries;
129 
130 	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
131 		return;
132 
133 	/* Prevent any glock dq until withdraw recovery is complete */
134 	set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
135 	/*
136 	 * Don't tell dlm we're bailing until we have no more buffers in the
137 	 * wind. If journal had an IO error, the log code should just purge
138 	 * the outstanding buffers rather than submitting new IO. Making the
139 	 * file system read-only will flush the journal, etc.
140 	 *
141 	 * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
142 	 * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
143 	 * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
144 	 * therefore we need to clear SDF_JOURNAL_LIVE manually.
145 	 */
146 	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
147 	if (!sb_rdonly(sdp->sd_vfs)) {
148 		struct gfs2_holder freeze_gh;
149 
150 		gfs2_holder_mark_uninitialized(&freeze_gh);
151 		if (sdp->sd_freeze_gl &&
152 		    !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
153 			ret = gfs2_freeze_lock(sdp, &freeze_gh,
154 				       log_write_allowed ? 0 : LM_FLAG_TRY);
155 			if (ret == GLR_TRYFAILED)
156 				ret = 0;
157 		}
158 		if (!ret)
159 			ret = gfs2_make_fs_ro(sdp);
160 		gfs2_freeze_unlock(&freeze_gh);
161 	}
162 
163 	if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
164 		if (!ret)
165 			ret = -EIO;
166 		clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
167 		goto skip_recovery;
168 	}
169 	/*
170 	 * Drop the glock for our journal so another node can recover it.
171 	 */
172 	if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
173 		gfs2_glock_dq_wait(&sdp->sd_journal_gh);
174 		gfs2_holder_uninit(&sdp->sd_journal_gh);
175 	}
176 	sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
177 	gfs2_glock_dq(&sdp->sd_jinode_gh);
178 	if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
179 		/* Make sure gfs2_unfreeze works if partially-frozen */
180 		flush_work(&sdp->sd_freeze_work);
181 		atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
182 		thaw_super(sdp->sd_vfs);
183 	} else {
184 		wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
185 			    TASK_UNINTERRUPTIBLE);
186 	}
187 
188 	/*
189 	 * holder_uninit to force glock_put, to force dlm to let go
190 	 */
191 	gfs2_holder_uninit(&sdp->sd_jinode_gh);
192 
193 	/*
194 	 * Note: We need to be careful here:
195 	 * Our iput of jd_inode will evict it. The evict will dequeue its
196 	 * glock, but the glock dq will wait for the withdraw unless we have
197 	 * exception code in glock_dq.
198 	 */
199 	iput(inode);
200 	/*
201 	 * Wait until the journal inode's glock is freed. This allows try locks
202 	 * on other nodes to be successful, otherwise we remain the owner of
203 	 * the glock as far as dlm is concerned.
204 	 */
205 	if (i_gl->gl_ops->go_free) {
206 		set_bit(GLF_FREEING, &i_gl->gl_flags);
207 		wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
208 	}
209 
210 	/*
211 	 * Dequeue the "live" glock, but keep a reference so it's never freed.
212 	 */
213 	gfs2_glock_hold(live_gl);
214 	gfs2_glock_dq_wait(&sdp->sd_live_gh);
215 	/*
216 	 * We enqueue the "live" glock in EX so that all other nodes
217 	 * get a demote request and act on it. We don't really want the
218 	 * lock in EX, so we send a "try" lock with 1CB to produce a callback.
219 	 */
220 	fs_warn(sdp, "Requesting recovery of jid %d.\n",
221 		sdp->sd_lockstruct.ls_jid);
222 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
223 			   &sdp->sd_live_gh);
224 	msleep(GL_GLOCK_MAX_HOLD);
225 	/*
226 	 * This will likely fail in a cluster, but succeed standalone:
227 	 */
228 	ret = gfs2_glock_nq(&sdp->sd_live_gh);
229 
230 	/*
231 	 * If we actually got the "live" lock in EX mode, there are no other
232 	 * nodes available to replay our journal. So we try to replay it
233 	 * ourselves. We hold the "live" glock to prevent other mounters
234 	 * during recovery, then just dequeue it and reacquire it in our
235 	 * normal SH mode. Just in case the problem that caused us to
236 	 * withdraw prevents us from recovering our journal (e.g. io errors
237 	 * and such) we still check if the journal is clean before proceeding
238 	 * but we may wait forever until another mounter does the recovery.
239 	 */
240 	if (ret == 0) {
241 		fs_warn(sdp, "No other mounters found. Trying to recover our "
242 			"own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
243 		if (gfs2_recover_journal(sdp->sd_jdesc, 1))
244 			fs_warn(sdp, "Unable to recover our journal jid %d.\n",
245 				sdp->sd_lockstruct.ls_jid);
246 		gfs2_glock_dq_wait(&sdp->sd_live_gh);
247 		gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
248 				   &sdp->sd_live_gh);
249 		gfs2_glock_nq(&sdp->sd_live_gh);
250 	}
251 
252 	gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
253 	clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
254 
255 	/*
256 	 * At this point our journal is evicted, so we need to get a new inode
257 	 * for it. Once done, we need to call gfs2_find_jhead which
258 	 * calls gfs2_map_journal_extents to map it for us again.
259 	 *
260 	 * Note that we don't really want it to look up a FREE block. The
261 	 * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
262 	 * which would otherwise fail because it requires grabbing an rgrp
263 	 * glock, which would fail with -EIO because we're withdrawing.
264 	 */
265 	inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
266 				  sdp->sd_jdesc->jd_no_addr, no_formal_ino,
267 				  GFS2_BLKST_FREE);
268 	if (IS_ERR(inode)) {
269 		fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
270 			sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
271 		goto skip_recovery;
272 	}
273 	sdp->sd_jdesc->jd_inode = inode;
274 
275 	/*
276 	 * Now wait until recovery is complete.
277 	 */
278 	for (tries = 0; tries < 10; tries++) {
279 		ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
280 		if (!ret)
281 			break;
282 		msleep(HZ);
283 		fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
284 			sdp->sd_lockstruct.ls_jid);
285 	}
286 skip_recovery:
287 	if (!ret)
288 		fs_warn(sdp, "Journal recovery complete for jid %d.\n",
289 			sdp->sd_lockstruct.ls_jid);
290 	else
291 		fs_warn(sdp, "Journal recovery skipped for %d until next "
292 			"mount.\n", sdp->sd_lockstruct.ls_jid);
293 	fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
294 	sdp->sd_glock_dqs_held = 0;
295 	wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
296 }
297 
298 void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
299 {
300 	struct va_format vaf;
301 	va_list args;
302 
303 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
304 	    test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
305 		return;
306 
307 	va_start(args, fmt);
308 	vaf.fmt = fmt;
309 	vaf.va = &args;
310 	fs_err(sdp, "%pV", &vaf);
311 	va_end(args);
312 }
313 
314 int gfs2_withdraw(struct gfs2_sbd *sdp)
315 {
316 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
317 	const struct lm_lockops *lm = ls->ls_ops;
318 
319 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
320 	    test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
321 		if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
322 			return -1;
323 
324 		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
325 			    TASK_UNINTERRUPTIBLE);
326 		return -1;
327 	}
328 
329 	set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
330 
331 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
332 		fs_err(sdp, "about to withdraw this file system\n");
333 		BUG_ON(sdp->sd_args.ar_debug);
334 
335 		signal_our_withdraw(sdp);
336 
337 		kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
338 
339 		if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
340 			wait_for_completion(&sdp->sd_wdack);
341 
342 		if (lm->lm_unmount) {
343 			fs_err(sdp, "telling LM to unmount\n");
344 			lm->lm_unmount(sdp);
345 		}
346 		set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
347 		fs_err(sdp, "File system withdrawn\n");
348 		dump_stack();
349 		clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
350 		smp_mb__after_atomic();
351 		wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
352 	}
353 
354 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
355 		panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname);
356 
357 	return -1;
358 }
359 
360 /**
361  * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
362  */
363 
364 void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
365 			    const char *function, char *file, unsigned int line,
366 			    bool delayed)
367 {
368 	if (gfs2_withdrawn(sdp))
369 		return;
370 
371 	fs_err(sdp,
372 	       "fatal: assertion \"%s\" failed\n"
373 	       "   function = %s, file = %s, line = %u\n",
374 	       assertion, function, file, line);
375 
376 	/*
377 	 * If errors=panic was specified on mount, it won't help to delay the
378 	 * withdraw.
379 	 */
380 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
381 		delayed = false;
382 
383 	if (delayed)
384 		gfs2_withdraw_delayed(sdp);
385 	else
386 		gfs2_withdraw(sdp);
387 	dump_stack();
388 }
389 
390 /**
391  * gfs2_assert_warn_i - Print a message to the console if @assertion is false
392  */
393 
394 void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
395 			const char *function, char *file, unsigned int line)
396 {
397 	if (time_before(jiffies,
398 			sdp->sd_last_warning +
399 			gfs2_tune_get(sdp, gt_complain_secs) * HZ))
400 		return;
401 
402 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
403 		fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
404 			assertion, function, file, line);
405 
406 	if (sdp->sd_args.ar_debug)
407 		BUG();
408 	else
409 		dump_stack();
410 
411 	if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
412 		panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
413 		      "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
414 		      sdp->sd_fsname, assertion,
415 		      sdp->sd_fsname, function, file, line);
416 
417 	sdp->sd_last_warning = jiffies;
418 }
419 
420 /**
421  * gfs2_consist_i - Flag a filesystem consistency error and withdraw
422  */
423 
424 void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
425 		    char *file, unsigned int line)
426 {
427 	gfs2_lm(sdp,
428 		"fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
429 		function, file, line);
430 	gfs2_withdraw(sdp);
431 }
432 
433 /**
434  * gfs2_consist_inode_i - Flag an inode consistency error and withdraw
435  */
436 
437 void gfs2_consist_inode_i(struct gfs2_inode *ip,
438 			  const char *function, char *file, unsigned int line)
439 {
440 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
441 
442 	gfs2_lm(sdp,
443 		"fatal: filesystem consistency error\n"
444 		"  inode = %llu %llu\n"
445 		"  function = %s, file = %s, line = %u\n",
446 		(unsigned long long)ip->i_no_formal_ino,
447 		(unsigned long long)ip->i_no_addr,
448 		function, file, line);
449 	gfs2_withdraw(sdp);
450 }
451 
452 /**
453  * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
454  */
455 
456 void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
457 			  const char *function, char *file, unsigned int line)
458 {
459 	struct gfs2_sbd *sdp = rgd->rd_sbd;
460 	char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
461 
462 	sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
463 	gfs2_rgrp_dump(NULL, rgd, fs_id_buf);
464 	gfs2_lm(sdp,
465 		"fatal: filesystem consistency error\n"
466 		"  RG = %llu\n"
467 		"  function = %s, file = %s, line = %u\n",
468 		(unsigned long long)rgd->rd_addr,
469 		function, file, line);
470 	gfs2_withdraw(sdp);
471 }
472 
473 /**
474  * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
475  * Returns: -1 if this call withdrew the machine,
476  *          -2 if it was already withdrawn
477  */
478 
479 int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
480 		       const char *type, const char *function, char *file,
481 		       unsigned int line)
482 {
483 	int me;
484 
485 	gfs2_lm(sdp,
486 		"fatal: invalid metadata block\n"
487 		"  bh = %llu (%s)\n"
488 		"  function = %s, file = %s, line = %u\n",
489 		(unsigned long long)bh->b_blocknr, type,
490 		function, file, line);
491 	me = gfs2_withdraw(sdp);
492 	return (me) ? -1 : -2;
493 }
494 
495 /**
496  * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
497  * Returns: -1 if this call withdrew the machine,
498  *          -2 if it was already withdrawn
499  */
500 
501 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
502 			   u16 type, u16 t, const char *function,
503 			   char *file, unsigned int line)
504 {
505 	int me;
506 
507 	gfs2_lm(sdp,
508 		"fatal: invalid metadata block\n"
509 		"  bh = %llu (type: exp=%u, found=%u)\n"
510 		"  function = %s, file = %s, line = %u\n",
511 		(unsigned long long)bh->b_blocknr, type, t,
512 		function, file, line);
513 	me = gfs2_withdraw(sdp);
514 	return (me) ? -1 : -2;
515 }
516 
517 /**
518  * gfs2_io_error_i - Flag an I/O error and withdraw
519  * Returns: -1 if this call withdrew the machine,
520  *          0 if it was already withdrawn
521  */
522 
523 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
524 		    unsigned int line)
525 {
526 	gfs2_lm(sdp,
527 		"fatal: I/O error\n"
528 		"  function = %s, file = %s, line = %u\n",
529 		function, file, line);
530 	return gfs2_withdraw(sdp);
531 }
532 
533 /**
534  * gfs2_io_error_bh_i - Flag a buffer I/O error
535  * @withdraw: withdraw the filesystem
536  */
537 
538 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
539 			const char *function, char *file, unsigned int line,
540 			bool withdraw)
541 {
542 	if (gfs2_withdrawn(sdp))
543 		return;
544 
545 	fs_err(sdp, "fatal: I/O error\n"
546 	       "  block = %llu\n"
547 	       "  function = %s, file = %s, line = %u\n",
548 	       (unsigned long long)bh->b_blocknr, function, file, line);
549 	if (withdraw)
550 		gfs2_withdraw(sdp);
551 }
552 
553