1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/spinlock.h> 10 #include <linux/completion.h> 11 #include <linux/buffer_head.h> 12 #include <linux/crc32.h> 13 #include <linux/gfs2_ondisk.h> 14 #include <linux/delay.h> 15 #include <linux/uaccess.h> 16 17 #include "gfs2.h" 18 #include "incore.h" 19 #include "glock.h" 20 #include "glops.h" 21 #include "log.h" 22 #include "lops.h" 23 #include "recovery.h" 24 #include "rgrp.h" 25 #include "super.h" 26 #include "util.h" 27 28 struct kmem_cache *gfs2_glock_cachep __read_mostly; 29 struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly; 30 struct kmem_cache *gfs2_inode_cachep __read_mostly; 31 struct kmem_cache *gfs2_bufdata_cachep __read_mostly; 32 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; 33 struct kmem_cache *gfs2_quotad_cachep __read_mostly; 34 struct kmem_cache *gfs2_qadata_cachep __read_mostly; 35 struct kmem_cache *gfs2_trans_cachep __read_mostly; 36 mempool_t *gfs2_page_pool __read_mostly; 37 38 void gfs2_assert_i(struct gfs2_sbd *sdp) 39 { 40 fs_emerg(sdp, "fatal assertion failed\n"); 41 } 42 43 /** 44 * check_journal_clean - Make sure a journal is clean for a spectator mount 45 * @sdp: The GFS2 superblock 46 * @jd: The journal descriptor 47 * 48 * Returns: 0 if the journal is clean or locked, else an error 49 */ 50 int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, 51 bool verbose) 52 { 53 int error; 54 struct gfs2_holder j_gh; 55 struct gfs2_log_header_host head; 56 struct gfs2_inode *ip; 57 58 ip = GFS2_I(jd->jd_inode); 59 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | 60 GL_EXACT | GL_NOCACHE, &j_gh); 61 if (error) { 62 if (verbose) 63 fs_err(sdp, "Error %d locking journal for spectator " 64 "mount.\n", error); 65 return -EPERM; 66 } 67 error = gfs2_jdesc_check(jd); 68 if (error) { 69 if (verbose) 70 fs_err(sdp, "Error checking journal for spectator " 71 "mount.\n"); 72 goto out_unlock; 73 } 74 error = gfs2_find_jhead(jd, &head, false); 75 if (error) { 76 if (verbose) 77 fs_err(sdp, "Error parsing journal for spectator " 78 "mount.\n"); 79 goto out_unlock; 80 } 81 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { 82 error = -EPERM; 83 if (verbose) 84 fs_err(sdp, "jid=%u: Journal is dirty, so the first " 85 "mounter must not be a spectator.\n", 86 jd->jd_jid); 87 } 88 89 out_unlock: 90 gfs2_glock_dq_uninit(&j_gh); 91 return error; 92 } 93 94 /** 95 * gfs2_freeze_lock - hold the freeze glock 96 * @sdp: the superblock 97 * @freeze_gh: pointer to the requested holder 98 * @caller_flags: any additional flags needed by the caller 99 */ 100 int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, 101 int caller_flags) 102 { 103 int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags; 104 int error; 105 106 error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, 107 freeze_gh); 108 if (error && error != GLR_TRYFAILED) 109 fs_err(sdp, "can't lock the freeze lock: %d\n", error); 110 return error; 111 } 112 113 void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh) 114 { 115 if (gfs2_holder_initialized(freeze_gh)) 116 gfs2_glock_dq_uninit(freeze_gh); 117 } 118 119 static void signal_our_withdraw(struct gfs2_sbd *sdp) 120 { 121 struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl; 122 struct inode *inode = sdp->sd_jdesc->jd_inode; 123 struct gfs2_inode *ip = GFS2_I(inode); 124 struct gfs2_glock *i_gl = ip->i_gl; 125 u64 no_formal_ino = ip->i_no_formal_ino; 126 int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 127 int ret = 0; 128 int tries; 129 130 if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) 131 return; 132 133 /* Prevent any glock dq until withdraw recovery is complete */ 134 set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 135 /* 136 * Don't tell dlm we're bailing until we have no more buffers in the 137 * wind. If journal had an IO error, the log code should just purge 138 * the outstanding buffers rather than submitting new IO. Making the 139 * file system read-only will flush the journal, etc. 140 * 141 * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown 142 * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write 143 * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and 144 * therefore we need to clear SDF_JOURNAL_LIVE manually. 145 */ 146 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 147 if (!sb_rdonly(sdp->sd_vfs)) { 148 struct gfs2_holder freeze_gh; 149 150 gfs2_holder_mark_uninitialized(&freeze_gh); 151 if (sdp->sd_freeze_gl && 152 !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { 153 ret = gfs2_freeze_lock(sdp, &freeze_gh, 154 log_write_allowed ? 0 : LM_FLAG_TRY); 155 if (ret == GLR_TRYFAILED) 156 ret = 0; 157 } 158 if (!ret) 159 ret = gfs2_make_fs_ro(sdp); 160 gfs2_freeze_unlock(&freeze_gh); 161 } 162 163 if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ 164 if (!ret) 165 ret = -EIO; 166 clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 167 goto skip_recovery; 168 } 169 /* 170 * Drop the glock for our journal so another node can recover it. 171 */ 172 if (gfs2_holder_initialized(&sdp->sd_journal_gh)) { 173 gfs2_glock_dq_wait(&sdp->sd_journal_gh); 174 gfs2_holder_uninit(&sdp->sd_journal_gh); 175 } 176 sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; 177 gfs2_glock_dq(&sdp->sd_jinode_gh); 178 if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { 179 /* Make sure gfs2_unfreeze works if partially-frozen */ 180 flush_work(&sdp->sd_freeze_work); 181 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); 182 thaw_super(sdp->sd_vfs); 183 } else { 184 wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, 185 TASK_UNINTERRUPTIBLE); 186 } 187 188 /* 189 * holder_uninit to force glock_put, to force dlm to let go 190 */ 191 gfs2_holder_uninit(&sdp->sd_jinode_gh); 192 193 /* 194 * Note: We need to be careful here: 195 * Our iput of jd_inode will evict it. The evict will dequeue its 196 * glock, but the glock dq will wait for the withdraw unless we have 197 * exception code in glock_dq. 198 */ 199 iput(inode); 200 /* 201 * Wait until the journal inode's glock is freed. This allows try locks 202 * on other nodes to be successful, otherwise we remain the owner of 203 * the glock as far as dlm is concerned. 204 */ 205 if (i_gl->gl_ops->go_free) { 206 set_bit(GLF_FREEING, &i_gl->gl_flags); 207 wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); 208 } 209 210 /* 211 * Dequeue the "live" glock, but keep a reference so it's never freed. 212 */ 213 gfs2_glock_hold(live_gl); 214 gfs2_glock_dq_wait(&sdp->sd_live_gh); 215 /* 216 * We enqueue the "live" glock in EX so that all other nodes 217 * get a demote request and act on it. We don't really want the 218 * lock in EX, so we send a "try" lock with 1CB to produce a callback. 219 */ 220 fs_warn(sdp, "Requesting recovery of jid %d.\n", 221 sdp->sd_lockstruct.ls_jid); 222 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP, 223 &sdp->sd_live_gh); 224 msleep(GL_GLOCK_MAX_HOLD); 225 /* 226 * This will likely fail in a cluster, but succeed standalone: 227 */ 228 ret = gfs2_glock_nq(&sdp->sd_live_gh); 229 230 /* 231 * If we actually got the "live" lock in EX mode, there are no other 232 * nodes available to replay our journal. So we try to replay it 233 * ourselves. We hold the "live" glock to prevent other mounters 234 * during recovery, then just dequeue it and reacquire it in our 235 * normal SH mode. Just in case the problem that caused us to 236 * withdraw prevents us from recovering our journal (e.g. io errors 237 * and such) we still check if the journal is clean before proceeding 238 * but we may wait forever until another mounter does the recovery. 239 */ 240 if (ret == 0) { 241 fs_warn(sdp, "No other mounters found. Trying to recover our " 242 "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid); 243 if (gfs2_recover_journal(sdp->sd_jdesc, 1)) 244 fs_warn(sdp, "Unable to recover our journal jid %d.\n", 245 sdp->sd_lockstruct.ls_jid); 246 gfs2_glock_dq_wait(&sdp->sd_live_gh); 247 gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT, 248 &sdp->sd_live_gh); 249 gfs2_glock_nq(&sdp->sd_live_gh); 250 } 251 252 gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */ 253 clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 254 255 /* 256 * At this point our journal is evicted, so we need to get a new inode 257 * for it. Once done, we need to call gfs2_find_jhead which 258 * calls gfs2_map_journal_extents to map it for us again. 259 * 260 * Note that we don't really want it to look up a FREE block. The 261 * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup 262 * which would otherwise fail because it requires grabbing an rgrp 263 * glock, which would fail with -EIO because we're withdrawing. 264 */ 265 inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN, 266 sdp->sd_jdesc->jd_no_addr, no_formal_ino, 267 GFS2_BLKST_FREE); 268 if (IS_ERR(inode)) { 269 fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n", 270 sdp->sd_lockstruct.ls_jid, PTR_ERR(inode)); 271 goto skip_recovery; 272 } 273 sdp->sd_jdesc->jd_inode = inode; 274 275 /* 276 * Now wait until recovery is complete. 277 */ 278 for (tries = 0; tries < 10; tries++) { 279 ret = check_journal_clean(sdp, sdp->sd_jdesc, false); 280 if (!ret) 281 break; 282 msleep(HZ); 283 fs_warn(sdp, "Waiting for journal recovery jid %d.\n", 284 sdp->sd_lockstruct.ls_jid); 285 } 286 skip_recovery: 287 if (!ret) 288 fs_warn(sdp, "Journal recovery complete for jid %d.\n", 289 sdp->sd_lockstruct.ls_jid); 290 else 291 fs_warn(sdp, "Journal recovery skipped for %d until next " 292 "mount.\n", sdp->sd_lockstruct.ls_jid); 293 fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held); 294 sdp->sd_glock_dqs_held = 0; 295 wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY); 296 } 297 298 void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) 299 { 300 struct va_format vaf; 301 va_list args; 302 303 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && 304 test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) 305 return; 306 307 va_start(args, fmt); 308 vaf.fmt = fmt; 309 vaf.va = &args; 310 fs_err(sdp, "%pV", &vaf); 311 va_end(args); 312 } 313 314 int gfs2_withdraw(struct gfs2_sbd *sdp) 315 { 316 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 317 const struct lm_lockops *lm = ls->ls_ops; 318 319 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && 320 test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { 321 if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags)) 322 return -1; 323 324 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG, 325 TASK_UNINTERRUPTIBLE); 326 return -1; 327 } 328 329 set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); 330 331 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { 332 fs_err(sdp, "about to withdraw this file system\n"); 333 BUG_ON(sdp->sd_args.ar_debug); 334 335 signal_our_withdraw(sdp); 336 337 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 338 339 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) 340 wait_for_completion(&sdp->sd_wdack); 341 342 if (lm->lm_unmount) { 343 fs_err(sdp, "telling LM to unmount\n"); 344 lm->lm_unmount(sdp); 345 } 346 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 347 fs_err(sdp, "File system withdrawn\n"); 348 dump_stack(); 349 clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); 350 smp_mb__after_atomic(); 351 wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG); 352 } 353 354 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) 355 panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); 356 357 return -1; 358 } 359 360 /** 361 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false 362 */ 363 364 void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, 365 const char *function, char *file, unsigned int line, 366 bool delayed) 367 { 368 if (gfs2_withdrawn(sdp)) 369 return; 370 371 fs_err(sdp, 372 "fatal: assertion \"%s\" failed\n" 373 " function = %s, file = %s, line = %u\n", 374 assertion, function, file, line); 375 376 /* 377 * If errors=panic was specified on mount, it won't help to delay the 378 * withdraw. 379 */ 380 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) 381 delayed = false; 382 383 if (delayed) 384 gfs2_withdraw_delayed(sdp); 385 else 386 gfs2_withdraw(sdp); 387 dump_stack(); 388 } 389 390 /** 391 * gfs2_assert_warn_i - Print a message to the console if @assertion is false 392 */ 393 394 void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, 395 const char *function, char *file, unsigned int line) 396 { 397 if (time_before(jiffies, 398 sdp->sd_last_warning + 399 gfs2_tune_get(sdp, gt_complain_secs) * HZ)) 400 return; 401 402 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) 403 fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", 404 assertion, function, file, line); 405 406 if (sdp->sd_args.ar_debug) 407 BUG(); 408 else 409 dump_stack(); 410 411 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) 412 panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n" 413 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 414 sdp->sd_fsname, assertion, 415 sdp->sd_fsname, function, file, line); 416 417 sdp->sd_last_warning = jiffies; 418 } 419 420 /** 421 * gfs2_consist_i - Flag a filesystem consistency error and withdraw 422 */ 423 424 void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, 425 char *file, unsigned int line) 426 { 427 gfs2_lm(sdp, 428 "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", 429 function, file, line); 430 gfs2_withdraw(sdp); 431 } 432 433 /** 434 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw 435 */ 436 437 void gfs2_consist_inode_i(struct gfs2_inode *ip, 438 const char *function, char *file, unsigned int line) 439 { 440 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 441 442 gfs2_lm(sdp, 443 "fatal: filesystem consistency error\n" 444 " inode = %llu %llu\n" 445 " function = %s, file = %s, line = %u\n", 446 (unsigned long long)ip->i_no_formal_ino, 447 (unsigned long long)ip->i_no_addr, 448 function, file, line); 449 gfs2_withdraw(sdp); 450 } 451 452 /** 453 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw 454 */ 455 456 void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, 457 const char *function, char *file, unsigned int line) 458 { 459 struct gfs2_sbd *sdp = rgd->rd_sbd; 460 char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; 461 462 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); 463 gfs2_rgrp_dump(NULL, rgd, fs_id_buf); 464 gfs2_lm(sdp, 465 "fatal: filesystem consistency error\n" 466 " RG = %llu\n" 467 " function = %s, file = %s, line = %u\n", 468 (unsigned long long)rgd->rd_addr, 469 function, file, line); 470 gfs2_withdraw(sdp); 471 } 472 473 /** 474 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw 475 * Returns: -1 if this call withdrew the machine, 476 * -2 if it was already withdrawn 477 */ 478 479 int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 480 const char *type, const char *function, char *file, 481 unsigned int line) 482 { 483 int me; 484 485 gfs2_lm(sdp, 486 "fatal: invalid metadata block\n" 487 " bh = %llu (%s)\n" 488 " function = %s, file = %s, line = %u\n", 489 (unsigned long long)bh->b_blocknr, type, 490 function, file, line); 491 me = gfs2_withdraw(sdp); 492 return (me) ? -1 : -2; 493 } 494 495 /** 496 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw 497 * Returns: -1 if this call withdrew the machine, 498 * -2 if it was already withdrawn 499 */ 500 501 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 502 u16 type, u16 t, const char *function, 503 char *file, unsigned int line) 504 { 505 int me; 506 507 gfs2_lm(sdp, 508 "fatal: invalid metadata block\n" 509 " bh = %llu (type: exp=%u, found=%u)\n" 510 " function = %s, file = %s, line = %u\n", 511 (unsigned long long)bh->b_blocknr, type, t, 512 function, file, line); 513 me = gfs2_withdraw(sdp); 514 return (me) ? -1 : -2; 515 } 516 517 /** 518 * gfs2_io_error_i - Flag an I/O error and withdraw 519 * Returns: -1 if this call withdrew the machine, 520 * 0 if it was already withdrawn 521 */ 522 523 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, 524 unsigned int line) 525 { 526 gfs2_lm(sdp, 527 "fatal: I/O error\n" 528 " function = %s, file = %s, line = %u\n", 529 function, file, line); 530 return gfs2_withdraw(sdp); 531 } 532 533 /** 534 * gfs2_io_error_bh_i - Flag a buffer I/O error 535 * @withdraw: withdraw the filesystem 536 */ 537 538 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 539 const char *function, char *file, unsigned int line, 540 bool withdraw) 541 { 542 if (gfs2_withdrawn(sdp)) 543 return; 544 545 fs_err(sdp, "fatal: I/O error\n" 546 " block = %llu\n" 547 " function = %s, file = %s, line = %u\n", 548 (unsigned long long)bh->b_blocknr, function, file, line); 549 if (withdraw) 550 gfs2_withdraw(sdp); 551 } 552 553