1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/fs.h> 3 #include <linux/random.h> 4 #include <linux/buffer_head.h> 5 #include <linux/utsname.h> 6 #include <linux/kthread.h> 7 8 #include "ext4.h" 9 10 /* Checksumming functions */ 11 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) 12 { 13 struct ext4_sb_info *sbi = EXT4_SB(sb); 14 int offset = offsetof(struct mmp_struct, mmp_checksum); 15 __u32 csum; 16 17 csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); 18 19 return cpu_to_le32(csum); 20 } 21 22 static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 23 { 24 if (!ext4_has_metadata_csum(sb)) 25 return 1; 26 27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 28 } 29 30 static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 31 { 32 if (!ext4_has_metadata_csum(sb)) 33 return; 34 35 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 36 } 37 38 /* 39 * Write the MMP block using REQ_SYNC to try to get the block on-disk 40 * faster. 41 */ 42 static int write_mmp_block_thawed(struct super_block *sb, 43 struct buffer_head *bh) 44 { 45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 46 47 ext4_mmp_csum_set(sb, mmp); 48 lock_buffer(bh); 49 bh->b_end_io = end_buffer_write_sync; 50 get_bh(bh); 51 submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh); 52 wait_on_buffer(bh); 53 if (unlikely(!buffer_uptodate(bh))) 54 return -EIO; 55 return 0; 56 } 57 58 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) 59 { 60 int err; 61 62 /* 63 * We protect against freezing so that we don't create dirty buffers 64 * on frozen filesystem. 65 */ 66 sb_start_write(sb); 67 err = write_mmp_block_thawed(sb, bh); 68 sb_end_write(sb); 69 return err; 70 } 71 72 /* 73 * Read the MMP block. It _must_ be read from disk and hence we clear the 74 * uptodate flag on the buffer. 75 */ 76 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, 77 ext4_fsblk_t mmp_block) 78 { 79 struct mmp_struct *mmp; 80 int ret; 81 82 if (*bh) 83 clear_buffer_uptodate(*bh); 84 85 /* This would be sb_bread(sb, mmp_block), except we need to be sure 86 * that the MD RAID device cache has been bypassed, and that the read 87 * is not blocked in the elevator. */ 88 if (!*bh) { 89 *bh = sb_getblk(sb, mmp_block); 90 if (!*bh) { 91 ret = -ENOMEM; 92 goto warn_exit; 93 } 94 } 95 96 lock_buffer(*bh); 97 ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL); 98 if (ret) 99 goto warn_exit; 100 101 mmp = (struct mmp_struct *)((*bh)->b_data); 102 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) { 103 ret = -EFSCORRUPTED; 104 goto warn_exit; 105 } 106 if (!ext4_mmp_csum_verify(sb, mmp)) { 107 ret = -EFSBADCRC; 108 goto warn_exit; 109 } 110 return 0; 111 warn_exit: 112 brelse(*bh); 113 *bh = NULL; 114 ext4_warning(sb, "Error %d while reading MMP block %llu", 115 ret, mmp_block); 116 return ret; 117 } 118 119 /* 120 * Dump as much information as possible to help the admin. 121 */ 122 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, 123 const char *function, unsigned int line, const char *msg) 124 { 125 __ext4_warning(sb, function, line, "%s", msg); 126 __ext4_warning(sb, function, line, 127 "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", 128 (unsigned long long)le64_to_cpu(mmp->mmp_time), 129 (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, 130 (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); 131 } 132 133 /* 134 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds 135 */ 136 static int kmmpd(void *data) 137 { 138 struct super_block *sb = data; 139 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 140 struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh; 141 struct mmp_struct *mmp; 142 ext4_fsblk_t mmp_block; 143 u32 seq = 0; 144 unsigned long failed_writes = 0; 145 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); 146 unsigned mmp_check_interval; 147 unsigned long last_update_time; 148 unsigned long diff; 149 int retval = 0; 150 151 mmp_block = le64_to_cpu(es->s_mmp_block); 152 mmp = (struct mmp_struct *)(bh->b_data); 153 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 154 /* 155 * Start with the higher mmp_check_interval and reduce it if 156 * the MMP block is being updated on time. 157 */ 158 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, 159 EXT4_MMP_MIN_CHECK_INTERVAL); 160 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 161 162 memcpy(mmp->mmp_nodename, init_utsname()->nodename, 163 sizeof(mmp->mmp_nodename)); 164 165 while (!kthread_should_stop() && !sb_rdonly(sb)) { 166 if (!ext4_has_feature_mmp(sb)) { 167 ext4_warning(sb, "kmmpd being stopped since MMP feature" 168 " has been disabled."); 169 goto wait_to_exit; 170 } 171 if (++seq > EXT4_MMP_SEQ_MAX) 172 seq = 1; 173 174 mmp->mmp_seq = cpu_to_le32(seq); 175 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 176 last_update_time = jiffies; 177 178 retval = write_mmp_block(sb, bh); 179 /* 180 * Don't spew too many error messages. Print one every 181 * (s_mmp_update_interval * 60) seconds. 182 */ 183 if (retval) { 184 if ((failed_writes % 60) == 0) { 185 ext4_error_err(sb, -retval, 186 "Error writing to MMP block"); 187 } 188 failed_writes++; 189 } 190 191 diff = jiffies - last_update_time; 192 if (diff < mmp_update_interval * HZ) 193 schedule_timeout_interruptible(mmp_update_interval * 194 HZ - diff); 195 196 /* 197 * We need to make sure that more than mmp_check_interval 198 * seconds have not passed since writing. If that has happened 199 * we need to check if the MMP block is as we left it. 200 */ 201 diff = jiffies - last_update_time; 202 if (diff > mmp_check_interval * HZ) { 203 struct buffer_head *bh_check = NULL; 204 struct mmp_struct *mmp_check; 205 206 retval = read_mmp_block(sb, &bh_check, mmp_block); 207 if (retval) { 208 ext4_error_err(sb, -retval, 209 "error reading MMP data: %d", 210 retval); 211 goto wait_to_exit; 212 } 213 214 mmp_check = (struct mmp_struct *)(bh_check->b_data); 215 if (mmp->mmp_seq != mmp_check->mmp_seq || 216 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, 217 sizeof(mmp->mmp_nodename))) { 218 dump_mmp_msg(sb, mmp_check, 219 "Error while updating MMP info. " 220 "The filesystem seems to have been" 221 " multiply mounted."); 222 ext4_error_err(sb, EBUSY, "abort"); 223 put_bh(bh_check); 224 retval = -EBUSY; 225 goto wait_to_exit; 226 } 227 put_bh(bh_check); 228 } 229 230 /* 231 * Adjust the mmp_check_interval depending on how much time 232 * it took for the MMP block to be written. 233 */ 234 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, 235 EXT4_MMP_MAX_CHECK_INTERVAL), 236 EXT4_MMP_MIN_CHECK_INTERVAL); 237 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 238 } 239 240 /* 241 * Unmount seems to be clean. 242 */ 243 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 244 mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds()); 245 246 retval = write_mmp_block(sb, bh); 247 248 wait_to_exit: 249 while (!kthread_should_stop()) { 250 set_current_state(TASK_INTERRUPTIBLE); 251 if (!kthread_should_stop()) 252 schedule(); 253 } 254 set_current_state(TASK_RUNNING); 255 return retval; 256 } 257 258 void ext4_stop_mmpd(struct ext4_sb_info *sbi) 259 { 260 if (sbi->s_mmp_tsk) { 261 kthread_stop(sbi->s_mmp_tsk); 262 brelse(sbi->s_mmp_bh); 263 sbi->s_mmp_tsk = NULL; 264 } 265 } 266 267 /* 268 * Get a random new sequence number but make sure it is not greater than 269 * EXT4_MMP_SEQ_MAX. 270 */ 271 static unsigned int mmp_new_seq(void) 272 { 273 return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1); 274 } 275 276 /* 277 * Protect the filesystem from being mounted more than once. 278 */ 279 int ext4_multi_mount_protect(struct super_block *sb, 280 ext4_fsblk_t mmp_block) 281 { 282 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 283 struct buffer_head *bh = NULL; 284 struct mmp_struct *mmp = NULL; 285 u32 seq; 286 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); 287 unsigned int wait_time = 0; 288 int retval; 289 290 if (mmp_block < le32_to_cpu(es->s_first_data_block) || 291 mmp_block >= ext4_blocks_count(es)) { 292 ext4_warning(sb, "Invalid MMP block in superblock"); 293 retval = -EINVAL; 294 goto failed; 295 } 296 297 retval = read_mmp_block(sb, &bh, mmp_block); 298 if (retval) 299 goto failed; 300 301 mmp = (struct mmp_struct *)(bh->b_data); 302 303 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) 304 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; 305 306 /* 307 * If check_interval in MMP block is larger, use that instead of 308 * update_interval from the superblock. 309 */ 310 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) 311 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); 312 313 seq = le32_to_cpu(mmp->mmp_seq); 314 if (seq == EXT4_MMP_SEQ_CLEAN) 315 goto skip; 316 317 if (seq == EXT4_MMP_SEQ_FSCK) { 318 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); 319 retval = -EBUSY; 320 goto failed; 321 } 322 323 wait_time = min(mmp_check_interval * 2 + 1, 324 mmp_check_interval + 60); 325 326 /* Print MMP interval if more than 20 secs. */ 327 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) 328 ext4_warning(sb, "MMP interval %u higher than expected, please" 329 " wait.\n", wait_time * 2); 330 331 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 332 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 333 retval = -ETIMEDOUT; 334 goto failed; 335 } 336 337 retval = read_mmp_block(sb, &bh, mmp_block); 338 if (retval) 339 goto failed; 340 mmp = (struct mmp_struct *)(bh->b_data); 341 if (seq != le32_to_cpu(mmp->mmp_seq)) { 342 dump_mmp_msg(sb, mmp, 343 "Device is already active on another node."); 344 retval = -EBUSY; 345 goto failed; 346 } 347 348 skip: 349 /* 350 * write a new random sequence number. 351 */ 352 seq = mmp_new_seq(); 353 mmp->mmp_seq = cpu_to_le32(seq); 354 355 /* 356 * On mount / remount we are protected against fs freezing (by s_umount 357 * semaphore) and grabbing freeze protection upsets lockdep 358 */ 359 retval = write_mmp_block_thawed(sb, bh); 360 if (retval) 361 goto failed; 362 363 /* 364 * wait for MMP interval and check mmp_seq. 365 */ 366 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 367 ext4_warning(sb, "MMP startup interrupted, failing mount"); 368 retval = -ETIMEDOUT; 369 goto failed; 370 } 371 372 retval = read_mmp_block(sb, &bh, mmp_block); 373 if (retval) 374 goto failed; 375 mmp = (struct mmp_struct *)(bh->b_data); 376 if (seq != le32_to_cpu(mmp->mmp_seq)) { 377 dump_mmp_msg(sb, mmp, 378 "Device is already active on another node."); 379 retval = -EBUSY; 380 goto failed; 381 } 382 383 EXT4_SB(sb)->s_mmp_bh = bh; 384 385 BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); 386 snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname), 387 "%pg", bh->b_bdev); 388 389 /* 390 * Start a kernel thread to update the MMP block periodically. 391 */ 392 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s", 393 (int)sizeof(mmp->mmp_bdevname), 394 mmp->mmp_bdevname); 395 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { 396 EXT4_SB(sb)->s_mmp_tsk = NULL; 397 ext4_warning(sb, "Unable to create kmmpd thread for %s.", 398 sb->s_id); 399 retval = -ENOMEM; 400 goto failed; 401 } 402 403 return 0; 404 405 failed: 406 brelse(bh); 407 return retval; 408 } 409