1 #include <linux/fs.h> 2 #include <linux/random.h> 3 #include <linux/buffer_head.h> 4 #include <linux/utsname.h> 5 #include <linux/kthread.h> 6 7 #include "ext4.h" 8 9 /* Checksumming functions */ 10 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp) 11 { 12 struct ext4_sb_info *sbi = EXT4_SB(sb); 13 int offset = offsetof(struct mmp_struct, mmp_checksum); 14 __u32 csum; 15 16 csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset); 17 18 return cpu_to_le32(csum); 19 } 20 21 static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 22 { 23 if (!ext4_has_metadata_csum(sb)) 24 return 1; 25 26 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 27 } 28 29 static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 30 { 31 if (!ext4_has_metadata_csum(sb)) 32 return; 33 34 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 35 } 36 37 /* 38 * Write the MMP block using WRITE_SYNC to try to get the block on-disk 39 * faster. 40 */ 41 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) 42 { 43 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 44 45 /* 46 * We protect against freezing so that we don't create dirty buffers 47 * on frozen filesystem. 48 */ 49 sb_start_write(sb); 50 ext4_mmp_csum_set(sb, mmp); 51 mark_buffer_dirty(bh); 52 lock_buffer(bh); 53 bh->b_end_io = end_buffer_write_sync; 54 get_bh(bh); 55 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); 56 wait_on_buffer(bh); 57 sb_end_write(sb); 58 if (unlikely(!buffer_uptodate(bh))) 59 return 1; 60 61 return 0; 62 } 63 64 /* 65 * Read the MMP block. It _must_ be read from disk and hence we clear the 66 * uptodate flag on the buffer. 67 */ 68 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, 69 ext4_fsblk_t mmp_block) 70 { 71 struct mmp_struct *mmp; 72 73 if (*bh) 74 clear_buffer_uptodate(*bh); 75 76 /* This would be sb_bread(sb, mmp_block), except we need to be sure 77 * that the MD RAID device cache has been bypassed, and that the read 78 * is not blocked in the elevator. */ 79 if (!*bh) 80 *bh = sb_getblk(sb, mmp_block); 81 if (!*bh) 82 return -ENOMEM; 83 if (*bh) { 84 get_bh(*bh); 85 lock_buffer(*bh); 86 (*bh)->b_end_io = end_buffer_read_sync; 87 submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh); 88 wait_on_buffer(*bh); 89 if (!buffer_uptodate(*bh)) { 90 brelse(*bh); 91 *bh = NULL; 92 } 93 } 94 if (unlikely(!*bh)) { 95 ext4_warning(sb, "Error while reading MMP block %llu", 96 mmp_block); 97 return -EIO; 98 } 99 100 mmp = (struct mmp_struct *)((*bh)->b_data); 101 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC || 102 !ext4_mmp_csum_verify(sb, mmp)) 103 return -EINVAL; 104 105 return 0; 106 } 107 108 /* 109 * Dump as much information as possible to help the admin. 110 */ 111 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, 112 const char *function, unsigned int line, const char *msg) 113 { 114 __ext4_warning(sb, function, line, msg); 115 __ext4_warning(sb, function, line, 116 "MMP failure info: last update time: %llu, last update " 117 "node: %s, last update device: %s\n", 118 (long long unsigned int) le64_to_cpu(mmp->mmp_time), 119 mmp->mmp_nodename, mmp->mmp_bdevname); 120 } 121 122 /* 123 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds 124 */ 125 static int kmmpd(void *data) 126 { 127 struct super_block *sb = ((struct mmpd_data *) data)->sb; 128 struct buffer_head *bh = ((struct mmpd_data *) data)->bh; 129 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 130 struct mmp_struct *mmp; 131 ext4_fsblk_t mmp_block; 132 u32 seq = 0; 133 unsigned long failed_writes = 0; 134 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); 135 unsigned mmp_check_interval; 136 unsigned long last_update_time; 137 unsigned long diff; 138 int retval; 139 140 mmp_block = le64_to_cpu(es->s_mmp_block); 141 mmp = (struct mmp_struct *)(bh->b_data); 142 mmp->mmp_time = cpu_to_le64(get_seconds()); 143 /* 144 * Start with the higher mmp_check_interval and reduce it if 145 * the MMP block is being updated on time. 146 */ 147 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, 148 EXT4_MMP_MIN_CHECK_INTERVAL); 149 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 150 bdevname(bh->b_bdev, mmp->mmp_bdevname); 151 152 memcpy(mmp->mmp_nodename, init_utsname()->nodename, 153 sizeof(mmp->mmp_nodename)); 154 155 while (!kthread_should_stop()) { 156 if (++seq > EXT4_MMP_SEQ_MAX) 157 seq = 1; 158 159 mmp->mmp_seq = cpu_to_le32(seq); 160 mmp->mmp_time = cpu_to_le64(get_seconds()); 161 last_update_time = jiffies; 162 163 retval = write_mmp_block(sb, bh); 164 /* 165 * Don't spew too many error messages. Print one every 166 * (s_mmp_update_interval * 60) seconds. 167 */ 168 if (retval) { 169 if ((failed_writes % 60) == 0) 170 ext4_error(sb, "Error writing to MMP block"); 171 failed_writes++; 172 } 173 174 if (!(le32_to_cpu(es->s_feature_incompat) & 175 EXT4_FEATURE_INCOMPAT_MMP)) { 176 ext4_warning(sb, "kmmpd being stopped since MMP feature" 177 " has been disabled."); 178 EXT4_SB(sb)->s_mmp_tsk = NULL; 179 goto failed; 180 } 181 182 if (sb->s_flags & MS_RDONLY) { 183 ext4_warning(sb, "kmmpd being stopped since filesystem " 184 "has been remounted as readonly."); 185 EXT4_SB(sb)->s_mmp_tsk = NULL; 186 goto failed; 187 } 188 189 diff = jiffies - last_update_time; 190 if (diff < mmp_update_interval * HZ) 191 schedule_timeout_interruptible(mmp_update_interval * 192 HZ - diff); 193 194 /* 195 * We need to make sure that more than mmp_check_interval 196 * seconds have not passed since writing. If that has happened 197 * we need to check if the MMP block is as we left it. 198 */ 199 diff = jiffies - last_update_time; 200 if (diff > mmp_check_interval * HZ) { 201 struct buffer_head *bh_check = NULL; 202 struct mmp_struct *mmp_check; 203 204 retval = read_mmp_block(sb, &bh_check, mmp_block); 205 if (retval) { 206 ext4_error(sb, "error reading MMP data: %d", 207 retval); 208 209 EXT4_SB(sb)->s_mmp_tsk = NULL; 210 goto failed; 211 } 212 213 mmp_check = (struct mmp_struct *)(bh_check->b_data); 214 if (mmp->mmp_seq != mmp_check->mmp_seq || 215 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, 216 sizeof(mmp->mmp_nodename))) { 217 dump_mmp_msg(sb, mmp_check, 218 "Error while updating MMP info. " 219 "The filesystem seems to have been" 220 " multiply mounted."); 221 ext4_error(sb, "abort"); 222 goto failed; 223 } 224 put_bh(bh_check); 225 } 226 227 /* 228 * Adjust the mmp_check_interval depending on how much time 229 * it took for the MMP block to be written. 230 */ 231 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, 232 EXT4_MMP_MAX_CHECK_INTERVAL), 233 EXT4_MMP_MIN_CHECK_INTERVAL); 234 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 235 } 236 237 /* 238 * Unmount seems to be clean. 239 */ 240 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 241 mmp->mmp_time = cpu_to_le64(get_seconds()); 242 243 retval = write_mmp_block(sb, bh); 244 245 failed: 246 kfree(data); 247 brelse(bh); 248 return retval; 249 } 250 251 /* 252 * Get a random new sequence number but make sure it is not greater than 253 * EXT4_MMP_SEQ_MAX. 254 */ 255 static unsigned int mmp_new_seq(void) 256 { 257 u32 new_seq; 258 259 do { 260 new_seq = prandom_u32(); 261 } while (new_seq > EXT4_MMP_SEQ_MAX); 262 263 return new_seq; 264 } 265 266 /* 267 * Protect the filesystem from being mounted more than once. 268 */ 269 int ext4_multi_mount_protect(struct super_block *sb, 270 ext4_fsblk_t mmp_block) 271 { 272 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 273 struct buffer_head *bh = NULL; 274 struct mmp_struct *mmp = NULL; 275 struct mmpd_data *mmpd_data; 276 u32 seq; 277 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); 278 unsigned int wait_time = 0; 279 int retval; 280 281 if (mmp_block < le32_to_cpu(es->s_first_data_block) || 282 mmp_block >= ext4_blocks_count(es)) { 283 ext4_warning(sb, "Invalid MMP block in superblock"); 284 goto failed; 285 } 286 287 retval = read_mmp_block(sb, &bh, mmp_block); 288 if (retval) 289 goto failed; 290 291 mmp = (struct mmp_struct *)(bh->b_data); 292 293 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) 294 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; 295 296 /* 297 * If check_interval in MMP block is larger, use that instead of 298 * update_interval from the superblock. 299 */ 300 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval) 301 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval); 302 303 seq = le32_to_cpu(mmp->mmp_seq); 304 if (seq == EXT4_MMP_SEQ_CLEAN) 305 goto skip; 306 307 if (seq == EXT4_MMP_SEQ_FSCK) { 308 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); 309 goto failed; 310 } 311 312 wait_time = min(mmp_check_interval * 2 + 1, 313 mmp_check_interval + 60); 314 315 /* Print MMP interval if more than 20 secs. */ 316 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) 317 ext4_warning(sb, "MMP interval %u higher than expected, please" 318 " wait.\n", wait_time * 2); 319 320 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 321 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 322 goto failed; 323 } 324 325 retval = read_mmp_block(sb, &bh, mmp_block); 326 if (retval) 327 goto failed; 328 mmp = (struct mmp_struct *)(bh->b_data); 329 if (seq != le32_to_cpu(mmp->mmp_seq)) { 330 dump_mmp_msg(sb, mmp, 331 "Device is already active on another node."); 332 goto failed; 333 } 334 335 skip: 336 /* 337 * write a new random sequence number. 338 */ 339 seq = mmp_new_seq(); 340 mmp->mmp_seq = cpu_to_le32(seq); 341 342 retval = write_mmp_block(sb, bh); 343 if (retval) 344 goto failed; 345 346 /* 347 * wait for MMP interval and check mmp_seq. 348 */ 349 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 350 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 351 goto failed; 352 } 353 354 retval = read_mmp_block(sb, &bh, mmp_block); 355 if (retval) 356 goto failed; 357 mmp = (struct mmp_struct *)(bh->b_data); 358 if (seq != le32_to_cpu(mmp->mmp_seq)) { 359 dump_mmp_msg(sb, mmp, 360 "Device is already active on another node."); 361 goto failed; 362 } 363 364 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL); 365 if (!mmpd_data) { 366 ext4_warning(sb, "not enough memory for mmpd_data"); 367 goto failed; 368 } 369 mmpd_data->sb = sb; 370 mmpd_data->bh = bh; 371 372 /* 373 * Start a kernel thread to update the MMP block periodically. 374 */ 375 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", 376 bdevname(bh->b_bdev, 377 mmp->mmp_bdevname)); 378 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { 379 EXT4_SB(sb)->s_mmp_tsk = NULL; 380 kfree(mmpd_data); 381 ext4_warning(sb, "Unable to create kmmpd thread for %s.", 382 sb->s_id); 383 goto failed; 384 } 385 386 return 0; 387 388 failed: 389 brelse(bh); 390 return 1; 391 } 392 393 394