1 #include <linux/fs.h> 2 #include <linux/random.h> 3 #include <linux/buffer_head.h> 4 #include <linux/utsname.h> 5 #include <linux/kthread.h> 6 7 #include "ext4.h" 8 9 /* 10 * Write the MMP block using WRITE_SYNC to try to get the block on-disk 11 * faster. 12 */ 13 static int write_mmp_block(struct buffer_head *bh) 14 { 15 mark_buffer_dirty(bh); 16 lock_buffer(bh); 17 bh->b_end_io = end_buffer_write_sync; 18 get_bh(bh); 19 submit_bh(WRITE_SYNC, bh); 20 wait_on_buffer(bh); 21 if (unlikely(!buffer_uptodate(bh))) 22 return 1; 23 24 return 0; 25 } 26 27 /* 28 * Read the MMP block. It _must_ be read from disk and hence we clear the 29 * uptodate flag on the buffer. 30 */ 31 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, 32 ext4_fsblk_t mmp_block) 33 { 34 struct mmp_struct *mmp; 35 36 if (*bh) 37 clear_buffer_uptodate(*bh); 38 39 /* This would be sb_bread(sb, mmp_block), except we need to be sure 40 * that the MD RAID device cache has been bypassed, and that the read 41 * is not blocked in the elevator. */ 42 if (!*bh) 43 *bh = sb_getblk(sb, mmp_block); 44 if (*bh) { 45 get_bh(*bh); 46 lock_buffer(*bh); 47 (*bh)->b_end_io = end_buffer_read_sync; 48 submit_bh(READ_SYNC, *bh); 49 wait_on_buffer(*bh); 50 if (!buffer_uptodate(*bh)) { 51 brelse(*bh); 52 *bh = NULL; 53 } 54 } 55 if (!*bh) { 56 ext4_warning(sb, "Error while reading MMP block %llu", 57 mmp_block); 58 return -EIO; 59 } 60 61 mmp = (struct mmp_struct *)((*bh)->b_data); 62 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) 63 return -EINVAL; 64 65 return 0; 66 } 67 68 /* 69 * Dump as much information as possible to help the admin. 70 */ 71 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, 72 const char *function, unsigned int line, const char *msg) 73 { 74 __ext4_warning(sb, function, line, msg); 75 __ext4_warning(sb, function, line, 76 "MMP failure info: last update time: %llu, last update " 77 "node: %s, last update device: %s\n", 78 (long long unsigned int) le64_to_cpu(mmp->mmp_time), 79 mmp->mmp_nodename, mmp->mmp_bdevname); 80 } 81 82 /* 83 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds 84 */ 85 static int kmmpd(void *data) 86 { 87 struct super_block *sb = ((struct mmpd_data *) data)->sb; 88 struct buffer_head *bh = ((struct mmpd_data *) data)->bh; 89 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 90 struct mmp_struct *mmp; 91 ext4_fsblk_t mmp_block; 92 u32 seq = 0; 93 unsigned long failed_writes = 0; 94 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); 95 unsigned mmp_check_interval; 96 unsigned long last_update_time; 97 unsigned long diff; 98 int retval; 99 100 mmp_block = le64_to_cpu(es->s_mmp_block); 101 mmp = (struct mmp_struct *)(bh->b_data); 102 mmp->mmp_time = cpu_to_le64(get_seconds()); 103 /* 104 * Start with the higher mmp_check_interval and reduce it if 105 * the MMP block is being updated on time. 106 */ 107 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, 108 EXT4_MMP_MIN_CHECK_INTERVAL); 109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 110 bdevname(bh->b_bdev, mmp->mmp_bdevname); 111 112 memcpy(mmp->mmp_nodename, init_utsname()->sysname, 113 sizeof(mmp->mmp_nodename)); 114 115 while (!kthread_should_stop()) { 116 if (++seq > EXT4_MMP_SEQ_MAX) 117 seq = 1; 118 119 mmp->mmp_seq = cpu_to_le32(seq); 120 mmp->mmp_time = cpu_to_le64(get_seconds()); 121 last_update_time = jiffies; 122 123 retval = write_mmp_block(bh); 124 /* 125 * Don't spew too many error messages. Print one every 126 * (s_mmp_update_interval * 60) seconds. 127 */ 128 if (retval && (failed_writes % 60) == 0) { 129 ext4_error(sb, "Error writing to MMP block"); 130 failed_writes++; 131 } 132 133 if (!(le32_to_cpu(es->s_feature_incompat) & 134 EXT4_FEATURE_INCOMPAT_MMP)) { 135 ext4_warning(sb, "kmmpd being stopped since MMP feature" 136 " has been disabled."); 137 EXT4_SB(sb)->s_mmp_tsk = NULL; 138 goto failed; 139 } 140 141 if (sb->s_flags & MS_RDONLY) { 142 ext4_warning(sb, "kmmpd being stopped since filesystem " 143 "has been remounted as readonly."); 144 EXT4_SB(sb)->s_mmp_tsk = NULL; 145 goto failed; 146 } 147 148 diff = jiffies - last_update_time; 149 if (diff < mmp_update_interval * HZ) 150 schedule_timeout_interruptible(mmp_update_interval * 151 HZ - diff); 152 153 /* 154 * We need to make sure that more than mmp_check_interval 155 * seconds have not passed since writing. If that has happened 156 * we need to check if the MMP block is as we left it. 157 */ 158 diff = jiffies - last_update_time; 159 if (diff > mmp_check_interval * HZ) { 160 struct buffer_head *bh_check = NULL; 161 struct mmp_struct *mmp_check; 162 163 retval = read_mmp_block(sb, &bh_check, mmp_block); 164 if (retval) { 165 ext4_error(sb, "error reading MMP data: %d", 166 retval); 167 168 EXT4_SB(sb)->s_mmp_tsk = NULL; 169 goto failed; 170 } 171 172 mmp_check = (struct mmp_struct *)(bh_check->b_data); 173 if (mmp->mmp_seq != mmp_check->mmp_seq || 174 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, 175 sizeof(mmp->mmp_nodename))) { 176 dump_mmp_msg(sb, mmp_check, 177 "Error while updating MMP info. " 178 "The filesystem seems to have been" 179 " multiply mounted."); 180 ext4_error(sb, "abort"); 181 goto failed; 182 } 183 put_bh(bh_check); 184 } 185 186 /* 187 * Adjust the mmp_check_interval depending on how much time 188 * it took for the MMP block to be written. 189 */ 190 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, 191 EXT4_MMP_MAX_CHECK_INTERVAL), 192 EXT4_MMP_MIN_CHECK_INTERVAL); 193 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 194 } 195 196 /* 197 * Unmount seems to be clean. 198 */ 199 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 200 mmp->mmp_time = cpu_to_le64(get_seconds()); 201 202 retval = write_mmp_block(bh); 203 204 failed: 205 kfree(data); 206 brelse(bh); 207 return retval; 208 } 209 210 /* 211 * Get a random new sequence number but make sure it is not greater than 212 * EXT4_MMP_SEQ_MAX. 213 */ 214 static unsigned int mmp_new_seq(void) 215 { 216 u32 new_seq; 217 218 do { 219 get_random_bytes(&new_seq, sizeof(u32)); 220 } while (new_seq > EXT4_MMP_SEQ_MAX); 221 222 return new_seq; 223 } 224 225 /* 226 * Protect the filesystem from being mounted more than once. 227 */ 228 int ext4_multi_mount_protect(struct super_block *sb, 229 ext4_fsblk_t mmp_block) 230 { 231 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 232 struct buffer_head *bh = NULL; 233 struct mmp_struct *mmp = NULL; 234 struct mmpd_data *mmpd_data; 235 u32 seq; 236 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); 237 unsigned int wait_time = 0; 238 int retval; 239 240 if (mmp_block < le32_to_cpu(es->s_first_data_block) || 241 mmp_block >= ext4_blocks_count(es)) { 242 ext4_warning(sb, "Invalid MMP block in superblock"); 243 goto failed; 244 } 245 246 retval = read_mmp_block(sb, &bh, mmp_block); 247 if (retval) 248 goto failed; 249 250 mmp = (struct mmp_struct *)(bh->b_data); 251 252 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) 253 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; 254 255 /* 256 * If check_interval in MMP block is larger, use that instead of 257 * update_interval from the superblock. 258 */ 259 if (mmp->mmp_check_interval > mmp_check_interval) 260 mmp_check_interval = mmp->mmp_check_interval; 261 262 seq = le32_to_cpu(mmp->mmp_seq); 263 if (seq == EXT4_MMP_SEQ_CLEAN) 264 goto skip; 265 266 if (seq == EXT4_MMP_SEQ_FSCK) { 267 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); 268 goto failed; 269 } 270 271 wait_time = min(mmp_check_interval * 2 + 1, 272 mmp_check_interval + 60); 273 274 /* Print MMP interval if more than 20 secs. */ 275 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) 276 ext4_warning(sb, "MMP interval %u higher than expected, please" 277 " wait.\n", wait_time * 2); 278 279 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 280 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 281 goto failed; 282 } 283 284 retval = read_mmp_block(sb, &bh, mmp_block); 285 if (retval) 286 goto failed; 287 mmp = (struct mmp_struct *)(bh->b_data); 288 if (seq != le32_to_cpu(mmp->mmp_seq)) { 289 dump_mmp_msg(sb, mmp, 290 "Device is already active on another node."); 291 goto failed; 292 } 293 294 skip: 295 /* 296 * write a new random sequence number. 297 */ 298 mmp->mmp_seq = seq = cpu_to_le32(mmp_new_seq()); 299 300 retval = write_mmp_block(bh); 301 if (retval) 302 goto failed; 303 304 /* 305 * wait for MMP interval and check mmp_seq. 306 */ 307 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 308 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 309 goto failed; 310 } 311 312 retval = read_mmp_block(sb, &bh, mmp_block); 313 if (retval) 314 goto failed; 315 mmp = (struct mmp_struct *)(bh->b_data); 316 if (seq != le32_to_cpu(mmp->mmp_seq)) { 317 dump_mmp_msg(sb, mmp, 318 "Device is already active on another node."); 319 goto failed; 320 } 321 322 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL); 323 if (!mmpd_data) { 324 ext4_warning(sb, "not enough memory for mmpd_data"); 325 goto failed; 326 } 327 mmpd_data->sb = sb; 328 mmpd_data->bh = bh; 329 330 /* 331 * Start a kernel thread to update the MMP block periodically. 332 */ 333 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", 334 bdevname(bh->b_bdev, 335 mmp->mmp_bdevname)); 336 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { 337 EXT4_SB(sb)->s_mmp_tsk = NULL; 338 kfree(mmpd_data); 339 ext4_warning(sb, "Unable to create kmmpd thread for %s.", 340 sb->s_id); 341 goto failed; 342 } 343 344 return 0; 345 346 failed: 347 brelse(bh); 348 return 1; 349 } 350 351 352