xref: /openbmc/linux/arch/x86/kernel/cpu/mce/dev-mcelog.c (revision b8d312aa)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * /dev/mcelog driver
4  *
5  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
6  * Rest from unknown author(s).
7  * 2004 Andi Kleen. Rewrote most of it.
8  * Copyright 2008 Intel Corporation
9  * Author: Andi Kleen
10  */
11 
12 #include <linux/miscdevice.h>
13 #include <linux/slab.h>
14 #include <linux/kmod.h>
15 #include <linux/poll.h>
16 
17 #include "internal.h"
18 
19 static BLOCKING_NOTIFIER_HEAD(mce_injector_chain);
20 
21 static DEFINE_MUTEX(mce_chrdev_read_mutex);
22 
23 static char mce_helper[128];
24 static char *mce_helper_argv[2] = { mce_helper, NULL };
25 
26 /*
27  * Lockless MCE logging infrastructure.
28  * This avoids deadlocks on printk locks without having to break locks. Also
29  * separate MCEs from kernel messages to avoid bogus bug reports.
30  */
31 
32 static struct mce_log_buffer mcelog = {
33 	.signature	= MCE_LOG_SIGNATURE,
34 	.len		= MCE_LOG_LEN,
35 	.recordlen	= sizeof(struct mce),
36 };
37 
38 static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
39 
40 static int dev_mce_log(struct notifier_block *nb, unsigned long val,
41 				void *data)
42 {
43 	struct mce *mce = (struct mce *)data;
44 	unsigned int entry;
45 
46 	mutex_lock(&mce_chrdev_read_mutex);
47 
48 	entry = mcelog.next;
49 
50 	/*
51 	 * When the buffer fills up discard new entries. Assume that the
52 	 * earlier errors are the more interesting ones:
53 	 */
54 	if (entry >= MCE_LOG_LEN) {
55 		set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
56 		goto unlock;
57 	}
58 
59 	mcelog.next = entry + 1;
60 
61 	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
62 	mcelog.entry[entry].finished = 1;
63 
64 	/* wake processes polling /dev/mcelog */
65 	wake_up_interruptible(&mce_chrdev_wait);
66 
67 unlock:
68 	mutex_unlock(&mce_chrdev_read_mutex);
69 
70 	return NOTIFY_OK;
71 }
72 
73 static struct notifier_block dev_mcelog_nb = {
74 	.notifier_call	= dev_mce_log,
75 	.priority	= MCE_PRIO_MCELOG,
76 };
77 
78 static void mce_do_trigger(struct work_struct *work)
79 {
80 	call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
81 }
82 
83 static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
84 
85 
86 void mce_work_trigger(void)
87 {
88 	if (mce_helper[0])
89 		schedule_work(&mce_trigger_work);
90 }
91 
92 static ssize_t
93 show_trigger(struct device *s, struct device_attribute *attr, char *buf)
94 {
95 	strcpy(buf, mce_helper);
96 	strcat(buf, "\n");
97 	return strlen(mce_helper) + 1;
98 }
99 
100 static ssize_t set_trigger(struct device *s, struct device_attribute *attr,
101 				const char *buf, size_t siz)
102 {
103 	char *p;
104 
105 	strncpy(mce_helper, buf, sizeof(mce_helper));
106 	mce_helper[sizeof(mce_helper)-1] = 0;
107 	p = strchr(mce_helper, '\n');
108 
109 	if (p)
110 		*p = 0;
111 
112 	return strlen(mce_helper) + !!p;
113 }
114 
115 DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
116 
117 /*
118  * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log.
119  */
120 
121 static DEFINE_SPINLOCK(mce_chrdev_state_lock);
122 static int mce_chrdev_open_count;	/* #times opened */
123 static int mce_chrdev_open_exclu;	/* already open exclusive? */
124 
125 static int mce_chrdev_open(struct inode *inode, struct file *file)
126 {
127 	spin_lock(&mce_chrdev_state_lock);
128 
129 	if (mce_chrdev_open_exclu ||
130 	    (mce_chrdev_open_count && (file->f_flags & O_EXCL))) {
131 		spin_unlock(&mce_chrdev_state_lock);
132 
133 		return -EBUSY;
134 	}
135 
136 	if (file->f_flags & O_EXCL)
137 		mce_chrdev_open_exclu = 1;
138 	mce_chrdev_open_count++;
139 
140 	spin_unlock(&mce_chrdev_state_lock);
141 
142 	return nonseekable_open(inode, file);
143 }
144 
145 static int mce_chrdev_release(struct inode *inode, struct file *file)
146 {
147 	spin_lock(&mce_chrdev_state_lock);
148 
149 	mce_chrdev_open_count--;
150 	mce_chrdev_open_exclu = 0;
151 
152 	spin_unlock(&mce_chrdev_state_lock);
153 
154 	return 0;
155 }
156 
157 static int mce_apei_read_done;
158 
159 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */
160 static int __mce_read_apei(char __user **ubuf, size_t usize)
161 {
162 	int rc;
163 	u64 record_id;
164 	struct mce m;
165 
166 	if (usize < sizeof(struct mce))
167 		return -EINVAL;
168 
169 	rc = apei_read_mce(&m, &record_id);
170 	/* Error or no more MCE record */
171 	if (rc <= 0) {
172 		mce_apei_read_done = 1;
173 		/*
174 		 * When ERST is disabled, mce_chrdev_read() should return
175 		 * "no record" instead of "no device."
176 		 */
177 		if (rc == -ENODEV)
178 			return 0;
179 		return rc;
180 	}
181 	rc = -EFAULT;
182 	if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
183 		return rc;
184 	/*
185 	 * In fact, we should have cleared the record after that has
186 	 * been flushed to the disk or sent to network in
187 	 * /sbin/mcelog, but we have no interface to support that now,
188 	 * so just clear it to avoid duplication.
189 	 */
190 	rc = apei_clear_mce(record_id);
191 	if (rc) {
192 		mce_apei_read_done = 1;
193 		return rc;
194 	}
195 	*ubuf += sizeof(struct mce);
196 
197 	return 0;
198 }
199 
200 static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
201 				size_t usize, loff_t *off)
202 {
203 	char __user *buf = ubuf;
204 	unsigned next;
205 	int i, err;
206 
207 	mutex_lock(&mce_chrdev_read_mutex);
208 
209 	if (!mce_apei_read_done) {
210 		err = __mce_read_apei(&buf, usize);
211 		if (err || buf != ubuf)
212 			goto out;
213 	}
214 
215 	/* Only supports full reads right now */
216 	err = -EINVAL;
217 	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
218 		goto out;
219 
220 	next = mcelog.next;
221 	err = 0;
222 
223 	for (i = 0; i < next; i++) {
224 		struct mce *m = &mcelog.entry[i];
225 
226 		err |= copy_to_user(buf, m, sizeof(*m));
227 		buf += sizeof(*m);
228 	}
229 
230 	memset(mcelog.entry, 0, next * sizeof(struct mce));
231 	mcelog.next = 0;
232 
233 	if (err)
234 		err = -EFAULT;
235 
236 out:
237 	mutex_unlock(&mce_chrdev_read_mutex);
238 
239 	return err ? err : buf - ubuf;
240 }
241 
242 static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait)
243 {
244 	poll_wait(file, &mce_chrdev_wait, wait);
245 	if (READ_ONCE(mcelog.next))
246 		return EPOLLIN | EPOLLRDNORM;
247 	if (!mce_apei_read_done && apei_check_mce())
248 		return EPOLLIN | EPOLLRDNORM;
249 	return 0;
250 }
251 
252 static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
253 				unsigned long arg)
254 {
255 	int __user *p = (int __user *)arg;
256 
257 	if (!capable(CAP_SYS_ADMIN))
258 		return -EPERM;
259 
260 	switch (cmd) {
261 	case MCE_GET_RECORD_LEN:
262 		return put_user(sizeof(struct mce), p);
263 	case MCE_GET_LOG_LEN:
264 		return put_user(MCE_LOG_LEN, p);
265 	case MCE_GETCLEAR_FLAGS: {
266 		unsigned flags;
267 
268 		do {
269 			flags = mcelog.flags;
270 		} while (cmpxchg(&mcelog.flags, flags, 0) != flags);
271 
272 		return put_user(flags, p);
273 	}
274 	default:
275 		return -ENOTTY;
276 	}
277 }
278 
279 void mce_register_injector_chain(struct notifier_block *nb)
280 {
281 	blocking_notifier_chain_register(&mce_injector_chain, nb);
282 }
283 EXPORT_SYMBOL_GPL(mce_register_injector_chain);
284 
285 void mce_unregister_injector_chain(struct notifier_block *nb)
286 {
287 	blocking_notifier_chain_unregister(&mce_injector_chain, nb);
288 }
289 EXPORT_SYMBOL_GPL(mce_unregister_injector_chain);
290 
291 static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
292 				size_t usize, loff_t *off)
293 {
294 	struct mce m;
295 
296 	if (!capable(CAP_SYS_ADMIN))
297 		return -EPERM;
298 	/*
299 	 * There are some cases where real MSR reads could slip
300 	 * through.
301 	 */
302 	if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
303 		return -EIO;
304 
305 	if ((unsigned long)usize > sizeof(struct mce))
306 		usize = sizeof(struct mce);
307 	if (copy_from_user(&m, ubuf, usize))
308 		return -EFAULT;
309 
310 	if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
311 		return -EINVAL;
312 
313 	/*
314 	 * Need to give user space some time to set everything up,
315 	 * so do it a jiffie or two later everywhere.
316 	 */
317 	schedule_timeout(2);
318 
319 	blocking_notifier_call_chain(&mce_injector_chain, 0, &m);
320 
321 	return usize;
322 }
323 
324 static const struct file_operations mce_chrdev_ops = {
325 	.open			= mce_chrdev_open,
326 	.release		= mce_chrdev_release,
327 	.read			= mce_chrdev_read,
328 	.write			= mce_chrdev_write,
329 	.poll			= mce_chrdev_poll,
330 	.unlocked_ioctl		= mce_chrdev_ioctl,
331 	.llseek			= no_llseek,
332 };
333 
334 static struct miscdevice mce_chrdev_device = {
335 	MISC_MCELOG_MINOR,
336 	"mcelog",
337 	&mce_chrdev_ops,
338 };
339 
340 static __init int dev_mcelog_init_device(void)
341 {
342 	int err;
343 
344 	/* register character device /dev/mcelog */
345 	err = misc_register(&mce_chrdev_device);
346 	if (err) {
347 		if (err == -EBUSY)
348 			/* Xen dom0 might have registered the device already. */
349 			pr_info("Unable to init device /dev/mcelog, already registered");
350 		else
351 			pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
352 
353 		return err;
354 	}
355 
356 	mce_register_decode_chain(&dev_mcelog_nb);
357 	return 0;
358 }
359 device_initcall_sync(dev_mcelog_init_device);
360