1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 5 */ 6 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 9 #include <linux/spinlock.h> 10 #include <linux/completion.h> 11 #include <linux/buffer_head.h> 12 #include <linux/crc32.h> 13 #include <linux/gfs2_ondisk.h> 14 #include <linux/delay.h> 15 #include <linux/uaccess.h> 16 17 #include "gfs2.h" 18 #include "incore.h" 19 #include "glock.h" 20 #include "glops.h" 21 #include "log.h" 22 #include "lops.h" 23 #include "recovery.h" 24 #include "rgrp.h" 25 #include "super.h" 26 #include "util.h" 27 28 struct kmem_cache *gfs2_glock_cachep __read_mostly; 29 struct kmem_cache *gfs2_glock_aspace_cachep __read_mostly; 30 struct kmem_cache *gfs2_inode_cachep __read_mostly; 31 struct kmem_cache *gfs2_bufdata_cachep __read_mostly; 32 struct kmem_cache *gfs2_rgrpd_cachep __read_mostly; 33 struct kmem_cache *gfs2_quotad_cachep __read_mostly; 34 struct kmem_cache *gfs2_qadata_cachep __read_mostly; 35 struct kmem_cache *gfs2_trans_cachep __read_mostly; 36 mempool_t *gfs2_page_pool __read_mostly; 37 38 void gfs2_assert_i(struct gfs2_sbd *sdp) 39 { 40 fs_emerg(sdp, "fatal assertion failed\n"); 41 } 42 43 /** 44 * check_journal_clean - Make sure a journal is clean for a spectator mount 45 * @sdp: The GFS2 superblock 46 * @jd: The journal descriptor 47 * 48 * Returns: 0 if the journal is clean or locked, else an error 49 */ 50 int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, 51 bool verbose) 52 { 53 int error; 54 struct gfs2_holder j_gh; 55 struct gfs2_log_header_host head; 56 struct gfs2_inode *ip; 57 58 ip = GFS2_I(jd->jd_inode); 59 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | 60 GL_EXACT | GL_NOCACHE, &j_gh); 61 if (error) { 62 if (verbose) 63 fs_err(sdp, "Error %d locking journal for spectator " 64 "mount.\n", error); 65 return -EPERM; 66 } 67 error = gfs2_jdesc_check(jd); 68 if (error) { 69 if (verbose) 70 fs_err(sdp, "Error checking journal for spectator " 71 "mount.\n"); 72 goto out_unlock; 73 } 74 error = gfs2_find_jhead(jd, &head, false); 75 if (error) { 76 if (verbose) 77 fs_err(sdp, "Error parsing journal for spectator " 78 "mount.\n"); 79 goto out_unlock; 80 } 81 if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { 82 error = -EPERM; 83 if (verbose) 84 fs_err(sdp, "jid=%u: Journal is dirty, so the first " 85 "mounter must not be a spectator.\n", 86 jd->jd_jid); 87 } 88 89 out_unlock: 90 gfs2_glock_dq_uninit(&j_gh); 91 return error; 92 } 93 94 /** 95 * gfs2_freeze_lock - hold the freeze glock 96 * @sdp: the superblock 97 * @freeze_gh: pointer to the requested holder 98 * @caller_flags: any additional flags needed by the caller 99 */ 100 int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, 101 int caller_flags) 102 { 103 int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags; 104 int error; 105 106 error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, 107 freeze_gh); 108 if (error && error != GLR_TRYFAILED) 109 fs_err(sdp, "can't lock the freeze lock: %d\n", error); 110 return error; 111 } 112 113 void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh) 114 { 115 if (gfs2_holder_initialized(freeze_gh)) 116 gfs2_glock_dq_uninit(freeze_gh); 117 } 118 119 static void signal_our_withdraw(struct gfs2_sbd *sdp) 120 { 121 struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl; 122 struct inode *inode; 123 struct gfs2_inode *ip; 124 struct gfs2_glock *i_gl; 125 u64 no_formal_ino; 126 int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 127 int ret = 0; 128 int tries; 129 130 if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc) 131 return; 132 133 inode = sdp->sd_jdesc->jd_inode; 134 ip = GFS2_I(inode); 135 i_gl = ip->i_gl; 136 no_formal_ino = ip->i_no_formal_ino; 137 138 /* Prevent any glock dq until withdraw recovery is complete */ 139 set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 140 /* 141 * Don't tell dlm we're bailing until we have no more buffers in the 142 * wind. If journal had an IO error, the log code should just purge 143 * the outstanding buffers rather than submitting new IO. Making the 144 * file system read-only will flush the journal, etc. 145 * 146 * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown 147 * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write 148 * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and 149 * therefore we need to clear SDF_JOURNAL_LIVE manually. 150 */ 151 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 152 if (!sb_rdonly(sdp->sd_vfs)) { 153 struct gfs2_holder freeze_gh; 154 155 gfs2_holder_mark_uninitialized(&freeze_gh); 156 if (sdp->sd_freeze_gl && 157 !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { 158 ret = gfs2_freeze_lock(sdp, &freeze_gh, 159 log_write_allowed ? 0 : LM_FLAG_TRY); 160 if (ret == GLR_TRYFAILED) 161 ret = 0; 162 } 163 if (!ret) 164 gfs2_make_fs_ro(sdp); 165 gfs2_freeze_unlock(&freeze_gh); 166 } 167 168 if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ 169 if (!ret) 170 ret = -EIO; 171 clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 172 goto skip_recovery; 173 } 174 /* 175 * Drop the glock for our journal so another node can recover it. 176 */ 177 if (gfs2_holder_initialized(&sdp->sd_journal_gh)) { 178 gfs2_glock_dq_wait(&sdp->sd_journal_gh); 179 gfs2_holder_uninit(&sdp->sd_journal_gh); 180 } 181 sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; 182 gfs2_glock_dq(&sdp->sd_jinode_gh); 183 if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { 184 /* Make sure gfs2_unfreeze works if partially-frozen */ 185 flush_work(&sdp->sd_freeze_work); 186 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); 187 thaw_super(sdp->sd_vfs); 188 } else { 189 wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, 190 TASK_UNINTERRUPTIBLE); 191 } 192 193 /* 194 * holder_uninit to force glock_put, to force dlm to let go 195 */ 196 gfs2_holder_uninit(&sdp->sd_jinode_gh); 197 198 /* 199 * Note: We need to be careful here: 200 * Our iput of jd_inode will evict it. The evict will dequeue its 201 * glock, but the glock dq will wait for the withdraw unless we have 202 * exception code in glock_dq. 203 */ 204 iput(inode); 205 /* 206 * Wait until the journal inode's glock is freed. This allows try locks 207 * on other nodes to be successful, otherwise we remain the owner of 208 * the glock as far as dlm is concerned. 209 */ 210 if (i_gl->gl_ops->go_free) { 211 set_bit(GLF_FREEING, &i_gl->gl_flags); 212 wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); 213 } 214 215 /* 216 * Dequeue the "live" glock, but keep a reference so it's never freed. 217 */ 218 gfs2_glock_hold(live_gl); 219 gfs2_glock_dq_wait(&sdp->sd_live_gh); 220 /* 221 * We enqueue the "live" glock in EX so that all other nodes 222 * get a demote request and act on it. We don't really want the 223 * lock in EX, so we send a "try" lock with 1CB to produce a callback. 224 */ 225 fs_warn(sdp, "Requesting recovery of jid %d.\n", 226 sdp->sd_lockstruct.ls_jid); 227 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP, 228 &sdp->sd_live_gh); 229 msleep(GL_GLOCK_MAX_HOLD); 230 /* 231 * This will likely fail in a cluster, but succeed standalone: 232 */ 233 ret = gfs2_glock_nq(&sdp->sd_live_gh); 234 235 /* 236 * If we actually got the "live" lock in EX mode, there are no other 237 * nodes available to replay our journal. So we try to replay it 238 * ourselves. We hold the "live" glock to prevent other mounters 239 * during recovery, then just dequeue it and reacquire it in our 240 * normal SH mode. Just in case the problem that caused us to 241 * withdraw prevents us from recovering our journal (e.g. io errors 242 * and such) we still check if the journal is clean before proceeding 243 * but we may wait forever until another mounter does the recovery. 244 */ 245 if (ret == 0) { 246 fs_warn(sdp, "No other mounters found. Trying to recover our " 247 "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid); 248 if (gfs2_recover_journal(sdp->sd_jdesc, 1)) 249 fs_warn(sdp, "Unable to recover our journal jid %d.\n", 250 sdp->sd_lockstruct.ls_jid); 251 gfs2_glock_dq_wait(&sdp->sd_live_gh); 252 gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT, 253 &sdp->sd_live_gh); 254 gfs2_glock_nq(&sdp->sd_live_gh); 255 } 256 257 gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */ 258 clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); 259 260 /* 261 * At this point our journal is evicted, so we need to get a new inode 262 * for it. Once done, we need to call gfs2_find_jhead which 263 * calls gfs2_map_journal_extents to map it for us again. 264 * 265 * Note that we don't really want it to look up a FREE block. The 266 * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup 267 * which would otherwise fail because it requires grabbing an rgrp 268 * glock, which would fail with -EIO because we're withdrawing. 269 */ 270 inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN, 271 sdp->sd_jdesc->jd_no_addr, no_formal_ino, 272 GFS2_BLKST_FREE); 273 if (IS_ERR(inode)) { 274 fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n", 275 sdp->sd_lockstruct.ls_jid, PTR_ERR(inode)); 276 goto skip_recovery; 277 } 278 sdp->sd_jdesc->jd_inode = inode; 279 280 /* 281 * Now wait until recovery is complete. 282 */ 283 for (tries = 0; tries < 10; tries++) { 284 ret = check_journal_clean(sdp, sdp->sd_jdesc, false); 285 if (!ret) 286 break; 287 msleep(HZ); 288 fs_warn(sdp, "Waiting for journal recovery jid %d.\n", 289 sdp->sd_lockstruct.ls_jid); 290 } 291 skip_recovery: 292 if (!ret) 293 fs_warn(sdp, "Journal recovery complete for jid %d.\n", 294 sdp->sd_lockstruct.ls_jid); 295 else 296 fs_warn(sdp, "Journal recovery skipped for %d until next " 297 "mount.\n", sdp->sd_lockstruct.ls_jid); 298 fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held); 299 sdp->sd_glock_dqs_held = 0; 300 wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY); 301 } 302 303 void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) 304 { 305 struct va_format vaf; 306 va_list args; 307 308 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && 309 test_bit(SDF_WITHDRAWN, &sdp->sd_flags)) 310 return; 311 312 va_start(args, fmt); 313 vaf.fmt = fmt; 314 vaf.va = &args; 315 fs_err(sdp, "%pV", &vaf); 316 va_end(args); 317 } 318 319 int gfs2_withdraw(struct gfs2_sbd *sdp) 320 { 321 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 322 const struct lm_lockops *lm = ls->ls_ops; 323 324 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && 325 test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) { 326 if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags)) 327 return -1; 328 329 wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG, 330 TASK_UNINTERRUPTIBLE); 331 return -1; 332 } 333 334 set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); 335 336 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { 337 fs_err(sdp, "about to withdraw this file system\n"); 338 BUG_ON(sdp->sd_args.ar_debug); 339 340 signal_our_withdraw(sdp); 341 342 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 343 344 if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) 345 wait_for_completion(&sdp->sd_wdack); 346 347 if (lm->lm_unmount) { 348 fs_err(sdp, "telling LM to unmount\n"); 349 lm->lm_unmount(sdp); 350 } 351 set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); 352 fs_err(sdp, "File system withdrawn\n"); 353 dump_stack(); 354 clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); 355 smp_mb__after_atomic(); 356 wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG); 357 } 358 359 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) 360 panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); 361 362 return -1; 363 } 364 365 /** 366 * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false 367 */ 368 369 void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, 370 const char *function, char *file, unsigned int line, 371 bool delayed) 372 { 373 if (gfs2_withdrawn(sdp)) 374 return; 375 376 fs_err(sdp, 377 "fatal: assertion \"%s\" failed\n" 378 " function = %s, file = %s, line = %u\n", 379 assertion, function, file, line); 380 381 /* 382 * If errors=panic was specified on mount, it won't help to delay the 383 * withdraw. 384 */ 385 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) 386 delayed = false; 387 388 if (delayed) 389 gfs2_withdraw_delayed(sdp); 390 else 391 gfs2_withdraw(sdp); 392 dump_stack(); 393 } 394 395 /** 396 * gfs2_assert_warn_i - Print a message to the console if @assertion is false 397 */ 398 399 void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, 400 const char *function, char *file, unsigned int line) 401 { 402 if (time_before(jiffies, 403 sdp->sd_last_warning + 404 gfs2_tune_get(sdp, gt_complain_secs) * HZ)) 405 return; 406 407 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) 408 fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", 409 assertion, function, file, line); 410 411 if (sdp->sd_args.ar_debug) 412 BUG(); 413 else 414 dump_stack(); 415 416 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) 417 panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n" 418 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 419 sdp->sd_fsname, assertion, 420 sdp->sd_fsname, function, file, line); 421 422 sdp->sd_last_warning = jiffies; 423 } 424 425 /** 426 * gfs2_consist_i - Flag a filesystem consistency error and withdraw 427 */ 428 429 void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function, 430 char *file, unsigned int line) 431 { 432 gfs2_lm(sdp, 433 "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", 434 function, file, line); 435 gfs2_withdraw(sdp); 436 } 437 438 /** 439 * gfs2_consist_inode_i - Flag an inode consistency error and withdraw 440 */ 441 442 void gfs2_consist_inode_i(struct gfs2_inode *ip, 443 const char *function, char *file, unsigned int line) 444 { 445 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 446 447 gfs2_lm(sdp, 448 "fatal: filesystem consistency error\n" 449 " inode = %llu %llu\n" 450 " function = %s, file = %s, line = %u\n", 451 (unsigned long long)ip->i_no_formal_ino, 452 (unsigned long long)ip->i_no_addr, 453 function, file, line); 454 gfs2_withdraw(sdp); 455 } 456 457 /** 458 * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw 459 */ 460 461 void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, 462 const char *function, char *file, unsigned int line) 463 { 464 struct gfs2_sbd *sdp = rgd->rd_sbd; 465 char fs_id_buf[sizeof(sdp->sd_fsname) + 7]; 466 467 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname); 468 gfs2_rgrp_dump(NULL, rgd, fs_id_buf); 469 gfs2_lm(sdp, 470 "fatal: filesystem consistency error\n" 471 " RG = %llu\n" 472 " function = %s, file = %s, line = %u\n", 473 (unsigned long long)rgd->rd_addr, 474 function, file, line); 475 gfs2_withdraw(sdp); 476 } 477 478 /** 479 * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw 480 * Returns: -1 if this call withdrew the machine, 481 * -2 if it was already withdrawn 482 */ 483 484 int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 485 const char *type, const char *function, char *file, 486 unsigned int line) 487 { 488 int me; 489 490 gfs2_lm(sdp, 491 "fatal: invalid metadata block\n" 492 " bh = %llu (%s)\n" 493 " function = %s, file = %s, line = %u\n", 494 (unsigned long long)bh->b_blocknr, type, 495 function, file, line); 496 me = gfs2_withdraw(sdp); 497 return (me) ? -1 : -2; 498 } 499 500 /** 501 * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw 502 * Returns: -1 if this call withdrew the machine, 503 * -2 if it was already withdrawn 504 */ 505 506 int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 507 u16 type, u16 t, const char *function, 508 char *file, unsigned int line) 509 { 510 int me; 511 512 gfs2_lm(sdp, 513 "fatal: invalid metadata block\n" 514 " bh = %llu (type: exp=%u, found=%u)\n" 515 " function = %s, file = %s, line = %u\n", 516 (unsigned long long)bh->b_blocknr, type, t, 517 function, file, line); 518 me = gfs2_withdraw(sdp); 519 return (me) ? -1 : -2; 520 } 521 522 /** 523 * gfs2_io_error_i - Flag an I/O error and withdraw 524 * Returns: -1 if this call withdrew the machine, 525 * 0 if it was already withdrawn 526 */ 527 528 int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, 529 unsigned int line) 530 { 531 gfs2_lm(sdp, 532 "fatal: I/O error\n" 533 " function = %s, file = %s, line = %u\n", 534 function, file, line); 535 return gfs2_withdraw(sdp); 536 } 537 538 /** 539 * gfs2_io_error_bh_i - Flag a buffer I/O error 540 * @withdraw: withdraw the filesystem 541 */ 542 543 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, 544 const char *function, char *file, unsigned int line, 545 bool withdraw) 546 { 547 if (gfs2_withdrawn(sdp)) 548 return; 549 550 fs_err(sdp, "fatal: I/O error\n" 551 " block = %llu\n" 552 " function = %s, file = %s, line = %u\n", 553 (unsigned long long)bh->b_blocknr, function, file, line); 554 if (withdraw) 555 gfs2_withdraw(sdp); 556 } 557 558