1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * /dev/mcelog driver 4 * 5 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 6 * Rest from unknown author(s). 7 * 2004 Andi Kleen. Rewrote most of it. 8 * Copyright 2008 Intel Corporation 9 * Author: Andi Kleen 10 */ 11 12 #include <linux/miscdevice.h> 13 #include <linux/slab.h> 14 #include <linux/kmod.h> 15 #include <linux/poll.h> 16 17 #include "internal.h" 18 19 static BLOCKING_NOTIFIER_HEAD(mce_injector_chain); 20 21 static DEFINE_MUTEX(mce_chrdev_read_mutex); 22 23 static char mce_helper[128]; 24 static char *mce_helper_argv[2] = { mce_helper, NULL }; 25 26 /* 27 * Lockless MCE logging infrastructure. 28 * This avoids deadlocks on printk locks without having to break locks. Also 29 * separate MCEs from kernel messages to avoid bogus bug reports. 30 */ 31 32 static struct mce_log_buffer *mcelog; 33 34 static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); 35 36 static int dev_mce_log(struct notifier_block *nb, unsigned long val, 37 void *data) 38 { 39 struct mce *mce = (struct mce *)data; 40 unsigned int entry; 41 42 if (mce->kflags & MCE_HANDLED_CEC) 43 return NOTIFY_DONE; 44 45 mutex_lock(&mce_chrdev_read_mutex); 46 47 entry = mcelog->next; 48 49 /* 50 * When the buffer fills up discard new entries. Assume that the 51 * earlier errors are the more interesting ones: 52 */ 53 if (entry >= mcelog->len) { 54 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags); 55 goto unlock; 56 } 57 58 mcelog->next = entry + 1; 59 60 memcpy(mcelog->entry + entry, mce, sizeof(struct mce)); 61 mcelog->entry[entry].finished = 1; 62 mcelog->entry[entry].kflags = 0; 63 64 /* wake processes polling /dev/mcelog */ 65 wake_up_interruptible(&mce_chrdev_wait); 66 67 unlock: 68 mutex_unlock(&mce_chrdev_read_mutex); 69 70 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 71 mce->kflags |= MCE_HANDLED_MCELOG; 72 73 return NOTIFY_OK; 74 } 75 76 static struct notifier_block dev_mcelog_nb = { 77 .notifier_call = dev_mce_log, 78 .priority = MCE_PRIO_MCELOG, 79 }; 80 81 static void mce_do_trigger(struct work_struct *work) 82 { 83 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); 84 } 85 86 static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 87 88 89 void mce_work_trigger(void) 90 { 91 if (mce_helper[0]) 92 schedule_work(&mce_trigger_work); 93 } 94 95 static ssize_t 96 show_trigger(struct device *s, struct device_attribute *attr, char *buf) 97 { 98 strcpy(buf, mce_helper); 99 strcat(buf, "\n"); 100 return strlen(mce_helper) + 1; 101 } 102 103 static ssize_t set_trigger(struct device *s, struct device_attribute *attr, 104 const char *buf, size_t siz) 105 { 106 char *p; 107 108 strscpy(mce_helper, buf, sizeof(mce_helper)); 109 p = strchr(mce_helper, '\n'); 110 111 if (p) 112 *p = 0; 113 114 return strlen(mce_helper) + !!p; 115 } 116 117 DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); 118 119 /* 120 * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. 121 */ 122 123 static DEFINE_SPINLOCK(mce_chrdev_state_lock); 124 static int mce_chrdev_open_count; /* #times opened */ 125 static int mce_chrdev_open_exclu; /* already open exclusive? */ 126 127 static int mce_chrdev_open(struct inode *inode, struct file *file) 128 { 129 spin_lock(&mce_chrdev_state_lock); 130 131 if (mce_chrdev_open_exclu || 132 (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 133 spin_unlock(&mce_chrdev_state_lock); 134 135 return -EBUSY; 136 } 137 138 if (file->f_flags & O_EXCL) 139 mce_chrdev_open_exclu = 1; 140 mce_chrdev_open_count++; 141 142 spin_unlock(&mce_chrdev_state_lock); 143 144 return nonseekable_open(inode, file); 145 } 146 147 static int mce_chrdev_release(struct inode *inode, struct file *file) 148 { 149 spin_lock(&mce_chrdev_state_lock); 150 151 mce_chrdev_open_count--; 152 mce_chrdev_open_exclu = 0; 153 154 spin_unlock(&mce_chrdev_state_lock); 155 156 return 0; 157 } 158 159 static int mce_apei_read_done; 160 161 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ 162 static int __mce_read_apei(char __user **ubuf, size_t usize) 163 { 164 int rc; 165 u64 record_id; 166 struct mce m; 167 168 if (usize < sizeof(struct mce)) 169 return -EINVAL; 170 171 rc = apei_read_mce(&m, &record_id); 172 /* Error or no more MCE record */ 173 if (rc <= 0) { 174 mce_apei_read_done = 1; 175 /* 176 * When ERST is disabled, mce_chrdev_read() should return 177 * "no record" instead of "no device." 178 */ 179 if (rc == -ENODEV) 180 return 0; 181 return rc; 182 } 183 rc = -EFAULT; 184 if (copy_to_user(*ubuf, &m, sizeof(struct mce))) 185 return rc; 186 /* 187 * In fact, we should have cleared the record after that has 188 * been flushed to the disk or sent to network in 189 * /sbin/mcelog, but we have no interface to support that now, 190 * so just clear it to avoid duplication. 191 */ 192 rc = apei_clear_mce(record_id); 193 if (rc) { 194 mce_apei_read_done = 1; 195 return rc; 196 } 197 *ubuf += sizeof(struct mce); 198 199 return 0; 200 } 201 202 static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, 203 size_t usize, loff_t *off) 204 { 205 char __user *buf = ubuf; 206 unsigned next; 207 int i, err; 208 209 mutex_lock(&mce_chrdev_read_mutex); 210 211 if (!mce_apei_read_done) { 212 err = __mce_read_apei(&buf, usize); 213 if (err || buf != ubuf) 214 goto out; 215 } 216 217 /* Only supports full reads right now */ 218 err = -EINVAL; 219 if (*off != 0 || usize < mcelog->len * sizeof(struct mce)) 220 goto out; 221 222 next = mcelog->next; 223 err = 0; 224 225 for (i = 0; i < next; i++) { 226 struct mce *m = &mcelog->entry[i]; 227 228 err |= copy_to_user(buf, m, sizeof(*m)); 229 buf += sizeof(*m); 230 } 231 232 memset(mcelog->entry, 0, next * sizeof(struct mce)); 233 mcelog->next = 0; 234 235 if (err) 236 err = -EFAULT; 237 238 out: 239 mutex_unlock(&mce_chrdev_read_mutex); 240 241 return err ? err : buf - ubuf; 242 } 243 244 static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait) 245 { 246 poll_wait(file, &mce_chrdev_wait, wait); 247 if (READ_ONCE(mcelog->next)) 248 return EPOLLIN | EPOLLRDNORM; 249 if (!mce_apei_read_done && apei_check_mce()) 250 return EPOLLIN | EPOLLRDNORM; 251 return 0; 252 } 253 254 static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, 255 unsigned long arg) 256 { 257 int __user *p = (int __user *)arg; 258 259 if (!capable(CAP_SYS_ADMIN)) 260 return -EPERM; 261 262 switch (cmd) { 263 case MCE_GET_RECORD_LEN: 264 return put_user(sizeof(struct mce), p); 265 case MCE_GET_LOG_LEN: 266 return put_user(mcelog->len, p); 267 case MCE_GETCLEAR_FLAGS: { 268 unsigned flags; 269 270 do { 271 flags = mcelog->flags; 272 } while (cmpxchg(&mcelog->flags, flags, 0) != flags); 273 274 return put_user(flags, p); 275 } 276 default: 277 return -ENOTTY; 278 } 279 } 280 281 void mce_register_injector_chain(struct notifier_block *nb) 282 { 283 blocking_notifier_chain_register(&mce_injector_chain, nb); 284 } 285 EXPORT_SYMBOL_GPL(mce_register_injector_chain); 286 287 void mce_unregister_injector_chain(struct notifier_block *nb) 288 { 289 blocking_notifier_chain_unregister(&mce_injector_chain, nb); 290 } 291 EXPORT_SYMBOL_GPL(mce_unregister_injector_chain); 292 293 static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, 294 size_t usize, loff_t *off) 295 { 296 struct mce m; 297 298 if (!capable(CAP_SYS_ADMIN)) 299 return -EPERM; 300 /* 301 * There are some cases where real MSR reads could slip 302 * through. 303 */ 304 if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) 305 return -EIO; 306 307 if ((unsigned long)usize > sizeof(struct mce)) 308 usize = sizeof(struct mce); 309 if (copy_from_user(&m, ubuf, usize)) 310 return -EFAULT; 311 312 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) 313 return -EINVAL; 314 315 /* 316 * Need to give user space some time to set everything up, 317 * so do it a jiffie or two later everywhere. 318 */ 319 schedule_timeout(2); 320 321 blocking_notifier_call_chain(&mce_injector_chain, 0, &m); 322 323 return usize; 324 } 325 326 static const struct file_operations mce_chrdev_ops = { 327 .open = mce_chrdev_open, 328 .release = mce_chrdev_release, 329 .read = mce_chrdev_read, 330 .write = mce_chrdev_write, 331 .poll = mce_chrdev_poll, 332 .unlocked_ioctl = mce_chrdev_ioctl, 333 .compat_ioctl = compat_ptr_ioctl, 334 .llseek = no_llseek, 335 }; 336 337 static struct miscdevice mce_chrdev_device = { 338 MISC_MCELOG_MINOR, 339 "mcelog", 340 &mce_chrdev_ops, 341 }; 342 343 static __init int dev_mcelog_init_device(void) 344 { 345 int mce_log_len; 346 int err; 347 348 mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus()); 349 mcelog = kzalloc(struct_size(mcelog, entry, mce_log_len), GFP_KERNEL); 350 if (!mcelog) 351 return -ENOMEM; 352 353 memcpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature)); 354 mcelog->len = mce_log_len; 355 mcelog->recordlen = sizeof(struct mce); 356 357 /* register character device /dev/mcelog */ 358 err = misc_register(&mce_chrdev_device); 359 if (err) { 360 if (err == -EBUSY) 361 /* Xen dom0 might have registered the device already. */ 362 pr_info("Unable to init device /dev/mcelog, already registered"); 363 else 364 pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err); 365 366 kfree(mcelog); 367 return err; 368 } 369 370 mce_register_decode_chain(&dev_mcelog_nb); 371 return 0; 372 } 373 device_initcall_sync(dev_mcelog_init_device); 374