1 #include <linux/fs.h> 2 #include <linux/random.h> 3 #include <linux/buffer_head.h> 4 #include <linux/utsname.h> 5 #include <linux/kthread.h> 6 7 #include "ext4.h" 8 9 /* 10 * Write the MMP block using WRITE_SYNC to try to get the block on-disk 11 * faster. 12 */ 13 static int write_mmp_block(struct buffer_head *bh) 14 { 15 mark_buffer_dirty(bh); 16 lock_buffer(bh); 17 bh->b_end_io = end_buffer_write_sync; 18 get_bh(bh); 19 submit_bh(WRITE_SYNC, bh); 20 wait_on_buffer(bh); 21 if (unlikely(!buffer_uptodate(bh))) 22 return 1; 23 24 return 0; 25 } 26 27 /* 28 * Read the MMP block. It _must_ be read from disk and hence we clear the 29 * uptodate flag on the buffer. 30 */ 31 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, 32 ext4_fsblk_t mmp_block) 33 { 34 struct mmp_struct *mmp; 35 36 if (*bh) 37 clear_buffer_uptodate(*bh); 38 39 /* This would be sb_bread(sb, mmp_block), except we need to be sure 40 * that the MD RAID device cache has been bypassed, and that the read 41 * is not blocked in the elevator. */ 42 if (!*bh) 43 *bh = sb_getblk(sb, mmp_block); 44 if (*bh) { 45 get_bh(*bh); 46 lock_buffer(*bh); 47 (*bh)->b_end_io = end_buffer_read_sync; 48 submit_bh(READ_SYNC, *bh); 49 wait_on_buffer(*bh); 50 if (!buffer_uptodate(*bh)) { 51 brelse(*bh); 52 *bh = NULL; 53 } 54 } 55 if (!*bh) { 56 ext4_warning(sb, "Error while reading MMP block %llu", 57 mmp_block); 58 return -EIO; 59 } 60 61 mmp = (struct mmp_struct *)((*bh)->b_data); 62 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) 63 return -EINVAL; 64 65 return 0; 66 } 67 68 /* 69 * Dump as much information as possible to help the admin. 70 */ 71 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, 72 const char *function, unsigned int line, const char *msg) 73 { 74 __ext4_warning(sb, function, line, msg); 75 __ext4_warning(sb, function, line, 76 "MMP failure info: last update time: %llu, last update " 77 "node: %s, last update device: %s\n", 78 (long long unsigned int) le64_to_cpu(mmp->mmp_time), 79 mmp->mmp_nodename, mmp->mmp_bdevname); 80 } 81 82 /* 83 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds 84 */ 85 static int kmmpd(void *data) 86 { 87 struct super_block *sb = ((struct mmpd_data *) data)->sb; 88 struct buffer_head *bh = ((struct mmpd_data *) data)->bh; 89 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 90 struct mmp_struct *mmp; 91 ext4_fsblk_t mmp_block; 92 u32 seq = 0; 93 unsigned long failed_writes = 0; 94 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval); 95 unsigned mmp_check_interval; 96 unsigned long last_update_time; 97 unsigned long diff; 98 int retval; 99 100 mmp_block = le64_to_cpu(es->s_mmp_block); 101 mmp = (struct mmp_struct *)(bh->b_data); 102 mmp->mmp_time = cpu_to_le64(get_seconds()); 103 /* 104 * Start with the higher mmp_check_interval and reduce it if 105 * the MMP block is being updated on time. 106 */ 107 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, 108 EXT4_MMP_MIN_CHECK_INTERVAL); 109 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 110 bdevname(bh->b_bdev, mmp->mmp_bdevname); 111 112 memcpy(mmp->mmp_nodename, init_utsname()->nodename, 113 sizeof(mmp->mmp_nodename)); 114 115 while (!kthread_should_stop()) { 116 if (++seq > EXT4_MMP_SEQ_MAX) 117 seq = 1; 118 119 mmp->mmp_seq = cpu_to_le32(seq); 120 mmp->mmp_time = cpu_to_le64(get_seconds()); 121 last_update_time = jiffies; 122 123 retval = write_mmp_block(bh); 124 /* 125 * Don't spew too many error messages. Print one every 126 * (s_mmp_update_interval * 60) seconds. 127 */ 128 if (retval) { 129 if ((failed_writes % 60) == 0) 130 ext4_error(sb, "Error writing to MMP block"); 131 failed_writes++; 132 } 133 134 if (!(le32_to_cpu(es->s_feature_incompat) & 135 EXT4_FEATURE_INCOMPAT_MMP)) { 136 ext4_warning(sb, "kmmpd being stopped since MMP feature" 137 " has been disabled."); 138 EXT4_SB(sb)->s_mmp_tsk = NULL; 139 goto failed; 140 } 141 142 if (sb->s_flags & MS_RDONLY) { 143 ext4_warning(sb, "kmmpd being stopped since filesystem " 144 "has been remounted as readonly."); 145 EXT4_SB(sb)->s_mmp_tsk = NULL; 146 goto failed; 147 } 148 149 diff = jiffies - last_update_time; 150 if (diff < mmp_update_interval * HZ) 151 schedule_timeout_interruptible(mmp_update_interval * 152 HZ - diff); 153 154 /* 155 * We need to make sure that more than mmp_check_interval 156 * seconds have not passed since writing. If that has happened 157 * we need to check if the MMP block is as we left it. 158 */ 159 diff = jiffies - last_update_time; 160 if (diff > mmp_check_interval * HZ) { 161 struct buffer_head *bh_check = NULL; 162 struct mmp_struct *mmp_check; 163 164 retval = read_mmp_block(sb, &bh_check, mmp_block); 165 if (retval) { 166 ext4_error(sb, "error reading MMP data: %d", 167 retval); 168 169 EXT4_SB(sb)->s_mmp_tsk = NULL; 170 goto failed; 171 } 172 173 mmp_check = (struct mmp_struct *)(bh_check->b_data); 174 if (mmp->mmp_seq != mmp_check->mmp_seq || 175 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename, 176 sizeof(mmp->mmp_nodename))) { 177 dump_mmp_msg(sb, mmp_check, 178 "Error while updating MMP info. " 179 "The filesystem seems to have been" 180 " multiply mounted."); 181 ext4_error(sb, "abort"); 182 goto failed; 183 } 184 put_bh(bh_check); 185 } 186 187 /* 188 * Adjust the mmp_check_interval depending on how much time 189 * it took for the MMP block to be written. 190 */ 191 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ, 192 EXT4_MMP_MAX_CHECK_INTERVAL), 193 EXT4_MMP_MIN_CHECK_INTERVAL); 194 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); 195 } 196 197 /* 198 * Unmount seems to be clean. 199 */ 200 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN); 201 mmp->mmp_time = cpu_to_le64(get_seconds()); 202 203 retval = write_mmp_block(bh); 204 205 failed: 206 kfree(data); 207 brelse(bh); 208 return retval; 209 } 210 211 /* 212 * Get a random new sequence number but make sure it is not greater than 213 * EXT4_MMP_SEQ_MAX. 214 */ 215 static unsigned int mmp_new_seq(void) 216 { 217 u32 new_seq; 218 219 do { 220 get_random_bytes(&new_seq, sizeof(u32)); 221 } while (new_seq > EXT4_MMP_SEQ_MAX); 222 223 return new_seq; 224 } 225 226 /* 227 * Protect the filesystem from being mounted more than once. 228 */ 229 int ext4_multi_mount_protect(struct super_block *sb, 230 ext4_fsblk_t mmp_block) 231 { 232 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 233 struct buffer_head *bh = NULL; 234 struct mmp_struct *mmp = NULL; 235 struct mmpd_data *mmpd_data; 236 u32 seq; 237 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval); 238 unsigned int wait_time = 0; 239 int retval; 240 241 if (mmp_block < le32_to_cpu(es->s_first_data_block) || 242 mmp_block >= ext4_blocks_count(es)) { 243 ext4_warning(sb, "Invalid MMP block in superblock"); 244 goto failed; 245 } 246 247 retval = read_mmp_block(sb, &bh, mmp_block); 248 if (retval) 249 goto failed; 250 251 mmp = (struct mmp_struct *)(bh->b_data); 252 253 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL) 254 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL; 255 256 /* 257 * If check_interval in MMP block is larger, use that instead of 258 * update_interval from the superblock. 259 */ 260 if (mmp->mmp_check_interval > mmp_check_interval) 261 mmp_check_interval = mmp->mmp_check_interval; 262 263 seq = le32_to_cpu(mmp->mmp_seq); 264 if (seq == EXT4_MMP_SEQ_CLEAN) 265 goto skip; 266 267 if (seq == EXT4_MMP_SEQ_FSCK) { 268 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem"); 269 goto failed; 270 } 271 272 wait_time = min(mmp_check_interval * 2 + 1, 273 mmp_check_interval + 60); 274 275 /* Print MMP interval if more than 20 secs. */ 276 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4) 277 ext4_warning(sb, "MMP interval %u higher than expected, please" 278 " wait.\n", wait_time * 2); 279 280 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 281 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 282 goto failed; 283 } 284 285 retval = read_mmp_block(sb, &bh, mmp_block); 286 if (retval) 287 goto failed; 288 mmp = (struct mmp_struct *)(bh->b_data); 289 if (seq != le32_to_cpu(mmp->mmp_seq)) { 290 dump_mmp_msg(sb, mmp, 291 "Device is already active on another node."); 292 goto failed; 293 } 294 295 skip: 296 /* 297 * write a new random sequence number. 298 */ 299 seq = mmp_new_seq(); 300 mmp->mmp_seq = cpu_to_le32(seq); 301 302 retval = write_mmp_block(bh); 303 if (retval) 304 goto failed; 305 306 /* 307 * wait for MMP interval and check mmp_seq. 308 */ 309 if (schedule_timeout_interruptible(HZ * wait_time) != 0) { 310 ext4_warning(sb, "MMP startup interrupted, failing mount\n"); 311 goto failed; 312 } 313 314 retval = read_mmp_block(sb, &bh, mmp_block); 315 if (retval) 316 goto failed; 317 mmp = (struct mmp_struct *)(bh->b_data); 318 if (seq != le32_to_cpu(mmp->mmp_seq)) { 319 dump_mmp_msg(sb, mmp, 320 "Device is already active on another node."); 321 goto failed; 322 } 323 324 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL); 325 if (!mmpd_data) { 326 ext4_warning(sb, "not enough memory for mmpd_data"); 327 goto failed; 328 } 329 mmpd_data->sb = sb; 330 mmpd_data->bh = bh; 331 332 /* 333 * Start a kernel thread to update the MMP block periodically. 334 */ 335 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", 336 bdevname(bh->b_bdev, 337 mmp->mmp_bdevname)); 338 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { 339 EXT4_SB(sb)->s_mmp_tsk = NULL; 340 kfree(mmpd_data); 341 ext4_warning(sb, "Unable to create kmmpd thread for %s.", 342 sb->s_id); 343 goto failed; 344 } 345 346 return 0; 347 348 failed: 349 brelse(bh); 350 return 1; 351 } 352 353 354