xref: /openbmc/linux/fs/ext4/mmp.c (revision efb339a83368ab25de1a18c0fdff85e01c13a1ea)
1  // SPDX-License-Identifier: GPL-2.0
2  #include <linux/fs.h>
3  #include <linux/random.h>
4  #include <linux/buffer_head.h>
5  #include <linux/utsname.h>
6  #include <linux/kthread.h>
7  
8  #include "ext4.h"
9  
10  /* Checksumming functions */
11  static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
12  {
13  	struct ext4_sb_info *sbi = EXT4_SB(sb);
14  	int offset = offsetof(struct mmp_struct, mmp_checksum);
15  	__u32 csum;
16  
17  	csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
18  
19  	return cpu_to_le32(csum);
20  }
21  
22  static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
23  {
24  	if (!ext4_has_metadata_csum(sb))
25  		return 1;
26  
27  	return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
28  }
29  
30  static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31  {
32  	if (!ext4_has_metadata_csum(sb))
33  		return;
34  
35  	mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
36  }
37  
38  /*
39   * Write the MMP block using REQ_SYNC to try to get the block on-disk
40   * faster.
41   */
42  static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
43  {
44  	struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
45  
46  	/*
47  	 * We protect against freezing so that we don't create dirty buffers
48  	 * on frozen filesystem.
49  	 */
50  	sb_start_write(sb);
51  	ext4_mmp_csum_set(sb, mmp);
52  	lock_buffer(bh);
53  	bh->b_end_io = end_buffer_write_sync;
54  	get_bh(bh);
55  	submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh);
56  	wait_on_buffer(bh);
57  	sb_end_write(sb);
58  	if (unlikely(!buffer_uptodate(bh)))
59  		return -EIO;
60  
61  	return 0;
62  }
63  
64  /*
65   * Read the MMP block. It _must_ be read from disk and hence we clear the
66   * uptodate flag on the buffer.
67   */
68  static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
69  			  ext4_fsblk_t mmp_block)
70  {
71  	struct mmp_struct *mmp;
72  	int ret;
73  
74  	if (*bh)
75  		clear_buffer_uptodate(*bh);
76  
77  	/* This would be sb_bread(sb, mmp_block), except we need to be sure
78  	 * that the MD RAID device cache has been bypassed, and that the read
79  	 * is not blocked in the elevator. */
80  	if (!*bh) {
81  		*bh = sb_getblk(sb, mmp_block);
82  		if (!*bh) {
83  			ret = -ENOMEM;
84  			goto warn_exit;
85  		}
86  	}
87  
88  	lock_buffer(*bh);
89  	ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL);
90  	if (ret)
91  		goto warn_exit;
92  
93  	mmp = (struct mmp_struct *)((*bh)->b_data);
94  	if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
95  		ret = -EFSCORRUPTED;
96  		goto warn_exit;
97  	}
98  	if (!ext4_mmp_csum_verify(sb, mmp)) {
99  		ret = -EFSBADCRC;
100  		goto warn_exit;
101  	}
102  	return 0;
103  warn_exit:
104  	brelse(*bh);
105  	*bh = NULL;
106  	ext4_warning(sb, "Error %d while reading MMP block %llu",
107  		     ret, mmp_block);
108  	return ret;
109  }
110  
111  /*
112   * Dump as much information as possible to help the admin.
113   */
114  void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
115  		    const char *function, unsigned int line, const char *msg)
116  {
117  	__ext4_warning(sb, function, line, "%s", msg);
118  	__ext4_warning(sb, function, line,
119  		       "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
120  		       (unsigned long long)le64_to_cpu(mmp->mmp_time),
121  		       (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
122  		       (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
123  }
124  
125  /*
126   * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
127   */
128  static int kmmpd(void *data)
129  {
130  	struct super_block *sb = data;
131  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
132  	struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh;
133  	struct mmp_struct *mmp;
134  	ext4_fsblk_t mmp_block;
135  	u32 seq = 0;
136  	unsigned long failed_writes = 0;
137  	int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
138  	unsigned mmp_check_interval;
139  	unsigned long last_update_time;
140  	unsigned long diff;
141  	int retval = 0;
142  
143  	mmp_block = le64_to_cpu(es->s_mmp_block);
144  	mmp = (struct mmp_struct *)(bh->b_data);
145  	mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
146  	/*
147  	 * Start with the higher mmp_check_interval and reduce it if
148  	 * the MMP block is being updated on time.
149  	 */
150  	mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
151  				 EXT4_MMP_MIN_CHECK_INTERVAL);
152  	mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
153  
154  	memcpy(mmp->mmp_nodename, init_utsname()->nodename,
155  	       sizeof(mmp->mmp_nodename));
156  
157  	while (!kthread_should_stop() && !sb_rdonly(sb)) {
158  		if (!ext4_has_feature_mmp(sb)) {
159  			ext4_warning(sb, "kmmpd being stopped since MMP feature"
160  				     " has been disabled.");
161  			goto wait_to_exit;
162  		}
163  		if (++seq > EXT4_MMP_SEQ_MAX)
164  			seq = 1;
165  
166  		mmp->mmp_seq = cpu_to_le32(seq);
167  		mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
168  		last_update_time = jiffies;
169  
170  		retval = write_mmp_block(sb, bh);
171  		/*
172  		 * Don't spew too many error messages. Print one every
173  		 * (s_mmp_update_interval * 60) seconds.
174  		 */
175  		if (retval) {
176  			if ((failed_writes % 60) == 0) {
177  				ext4_error_err(sb, -retval,
178  					       "Error writing to MMP block");
179  			}
180  			failed_writes++;
181  		}
182  
183  		diff = jiffies - last_update_time;
184  		if (diff < mmp_update_interval * HZ)
185  			schedule_timeout_interruptible(mmp_update_interval *
186  						       HZ - diff);
187  
188  		/*
189  		 * We need to make sure that more than mmp_check_interval
190  		 * seconds have not passed since writing. If that has happened
191  		 * we need to check if the MMP block is as we left it.
192  		 */
193  		diff = jiffies - last_update_time;
194  		if (diff > mmp_check_interval * HZ) {
195  			struct buffer_head *bh_check = NULL;
196  			struct mmp_struct *mmp_check;
197  
198  			retval = read_mmp_block(sb, &bh_check, mmp_block);
199  			if (retval) {
200  				ext4_error_err(sb, -retval,
201  					       "error reading MMP data: %d",
202  					       retval);
203  				goto wait_to_exit;
204  			}
205  
206  			mmp_check = (struct mmp_struct *)(bh_check->b_data);
207  			if (mmp->mmp_seq != mmp_check->mmp_seq ||
208  			    memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
209  				   sizeof(mmp->mmp_nodename))) {
210  				dump_mmp_msg(sb, mmp_check,
211  					     "Error while updating MMP info. "
212  					     "The filesystem seems to have been"
213  					     " multiply mounted.");
214  				ext4_error_err(sb, EBUSY, "abort");
215  				put_bh(bh_check);
216  				retval = -EBUSY;
217  				goto wait_to_exit;
218  			}
219  			put_bh(bh_check);
220  		}
221  
222  		 /*
223  		 * Adjust the mmp_check_interval depending on how much time
224  		 * it took for the MMP block to be written.
225  		 */
226  		mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
227  					     EXT4_MMP_MAX_CHECK_INTERVAL),
228  					 EXT4_MMP_MIN_CHECK_INTERVAL);
229  		mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
230  	}
231  
232  	/*
233  	 * Unmount seems to be clean.
234  	 */
235  	mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
236  	mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
237  
238  	retval = write_mmp_block(sb, bh);
239  
240  wait_to_exit:
241  	while (!kthread_should_stop()) {
242  		set_current_state(TASK_INTERRUPTIBLE);
243  		if (!kthread_should_stop())
244  			schedule();
245  	}
246  	set_current_state(TASK_RUNNING);
247  	return retval;
248  }
249  
250  void ext4_stop_mmpd(struct ext4_sb_info *sbi)
251  {
252  	if (sbi->s_mmp_tsk) {
253  		kthread_stop(sbi->s_mmp_tsk);
254  		brelse(sbi->s_mmp_bh);
255  		sbi->s_mmp_tsk = NULL;
256  	}
257  }
258  
259  /*
260   * Get a random new sequence number but make sure it is not greater than
261   * EXT4_MMP_SEQ_MAX.
262   */
263  static unsigned int mmp_new_seq(void)
264  {
265  	return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1);
266  }
267  
268  /*
269   * Protect the filesystem from being mounted more than once.
270   */
271  int ext4_multi_mount_protect(struct super_block *sb,
272  				    ext4_fsblk_t mmp_block)
273  {
274  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
275  	struct buffer_head *bh = NULL;
276  	struct mmp_struct *mmp = NULL;
277  	u32 seq;
278  	unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
279  	unsigned int wait_time = 0;
280  	int retval;
281  
282  	if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
283  	    mmp_block >= ext4_blocks_count(es)) {
284  		ext4_warning(sb, "Invalid MMP block in superblock");
285  		goto failed;
286  	}
287  
288  	retval = read_mmp_block(sb, &bh, mmp_block);
289  	if (retval)
290  		goto failed;
291  
292  	mmp = (struct mmp_struct *)(bh->b_data);
293  
294  	if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
295  		mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
296  
297  	/*
298  	 * If check_interval in MMP block is larger, use that instead of
299  	 * update_interval from the superblock.
300  	 */
301  	if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
302  		mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
303  
304  	seq = le32_to_cpu(mmp->mmp_seq);
305  	if (seq == EXT4_MMP_SEQ_CLEAN)
306  		goto skip;
307  
308  	if (seq == EXT4_MMP_SEQ_FSCK) {
309  		dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
310  		goto failed;
311  	}
312  
313  	wait_time = min(mmp_check_interval * 2 + 1,
314  			mmp_check_interval + 60);
315  
316  	/* Print MMP interval if more than 20 secs. */
317  	if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
318  		ext4_warning(sb, "MMP interval %u higher than expected, please"
319  			     " wait.\n", wait_time * 2);
320  
321  	if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
322  		ext4_warning(sb, "MMP startup interrupted, failing mount\n");
323  		goto failed;
324  	}
325  
326  	retval = read_mmp_block(sb, &bh, mmp_block);
327  	if (retval)
328  		goto failed;
329  	mmp = (struct mmp_struct *)(bh->b_data);
330  	if (seq != le32_to_cpu(mmp->mmp_seq)) {
331  		dump_mmp_msg(sb, mmp,
332  			     "Device is already active on another node.");
333  		goto failed;
334  	}
335  
336  skip:
337  	/*
338  	 * write a new random sequence number.
339  	 */
340  	seq = mmp_new_seq();
341  	mmp->mmp_seq = cpu_to_le32(seq);
342  
343  	retval = write_mmp_block(sb, bh);
344  	if (retval)
345  		goto failed;
346  
347  	/*
348  	 * wait for MMP interval and check mmp_seq.
349  	 */
350  	if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
351  		ext4_warning(sb, "MMP startup interrupted, failing mount");
352  		goto failed;
353  	}
354  
355  	retval = read_mmp_block(sb, &bh, mmp_block);
356  	if (retval)
357  		goto failed;
358  	mmp = (struct mmp_struct *)(bh->b_data);
359  	if (seq != le32_to_cpu(mmp->mmp_seq)) {
360  		dump_mmp_msg(sb, mmp,
361  			     "Device is already active on another node.");
362  		goto failed;
363  	}
364  
365  	EXT4_SB(sb)->s_mmp_bh = bh;
366  
367  	BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
368  	snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname),
369  		 "%pg", bh->b_bdev);
370  
371  	/*
372  	 * Start a kernel thread to update the MMP block periodically.
373  	 */
374  	EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s",
375  					     (int)sizeof(mmp->mmp_bdevname),
376  					     mmp->mmp_bdevname);
377  	if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
378  		EXT4_SB(sb)->s_mmp_tsk = NULL;
379  		ext4_warning(sb, "Unable to create kmmpd thread for %s.",
380  			     sb->s_id);
381  		goto failed;
382  	}
383  
384  	return 0;
385  
386  failed:
387  	brelse(bh);
388  	return 1;
389  }
390