1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * /dev/mcelog driver 4 * 5 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 6 * Rest from unknown author(s). 7 * 2004 Andi Kleen. Rewrote most of it. 8 * Copyright 2008 Intel Corporation 9 * Author: Andi Kleen 10 */ 11 12 #include <linux/miscdevice.h> 13 #include <linux/slab.h> 14 #include <linux/kmod.h> 15 #include <linux/poll.h> 16 17 #include "internal.h" 18 19 static BLOCKING_NOTIFIER_HEAD(mce_injector_chain); 20 21 static DEFINE_MUTEX(mce_chrdev_read_mutex); 22 23 static char mce_helper[128]; 24 static char *mce_helper_argv[2] = { mce_helper, NULL }; 25 26 /* 27 * Lockless MCE logging infrastructure. 28 * This avoids deadlocks on printk locks without having to break locks. Also 29 * separate MCEs from kernel messages to avoid bogus bug reports. 30 */ 31 32 static struct mce_log_buffer *mcelog; 33 34 static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); 35 36 static int dev_mce_log(struct notifier_block *nb, unsigned long val, 37 void *data) 38 { 39 struct mce *mce = (struct mce *)data; 40 unsigned int entry; 41 42 mutex_lock(&mce_chrdev_read_mutex); 43 44 entry = mcelog->next; 45 46 /* 47 * When the buffer fills up discard new entries. Assume that the 48 * earlier errors are the more interesting ones: 49 */ 50 if (entry >= mcelog->len) { 51 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog->flags); 52 goto unlock; 53 } 54 55 mcelog->next = entry + 1; 56 57 memcpy(mcelog->entry + entry, mce, sizeof(struct mce)); 58 mcelog->entry[entry].finished = 1; 59 60 /* wake processes polling /dev/mcelog */ 61 wake_up_interruptible(&mce_chrdev_wait); 62 63 unlock: 64 mutex_unlock(&mce_chrdev_read_mutex); 65 66 return NOTIFY_OK; 67 } 68 69 static struct notifier_block dev_mcelog_nb = { 70 .notifier_call = dev_mce_log, 71 .priority = MCE_PRIO_MCELOG, 72 }; 73 74 static void mce_do_trigger(struct work_struct *work) 75 { 76 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); 77 } 78 79 static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 80 81 82 void mce_work_trigger(void) 83 { 84 if (mce_helper[0]) 85 schedule_work(&mce_trigger_work); 86 } 87 88 static ssize_t 89 show_trigger(struct device *s, struct device_attribute *attr, char *buf) 90 { 91 strcpy(buf, mce_helper); 92 strcat(buf, "\n"); 93 return strlen(mce_helper) + 1; 94 } 95 96 static ssize_t set_trigger(struct device *s, struct device_attribute *attr, 97 const char *buf, size_t siz) 98 { 99 char *p; 100 101 strncpy(mce_helper, buf, sizeof(mce_helper)); 102 mce_helper[sizeof(mce_helper)-1] = 0; 103 p = strchr(mce_helper, '\n'); 104 105 if (p) 106 *p = 0; 107 108 return strlen(mce_helper) + !!p; 109 } 110 111 DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); 112 113 /* 114 * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. 115 */ 116 117 static DEFINE_SPINLOCK(mce_chrdev_state_lock); 118 static int mce_chrdev_open_count; /* #times opened */ 119 static int mce_chrdev_open_exclu; /* already open exclusive? */ 120 121 static int mce_chrdev_open(struct inode *inode, struct file *file) 122 { 123 spin_lock(&mce_chrdev_state_lock); 124 125 if (mce_chrdev_open_exclu || 126 (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 127 spin_unlock(&mce_chrdev_state_lock); 128 129 return -EBUSY; 130 } 131 132 if (file->f_flags & O_EXCL) 133 mce_chrdev_open_exclu = 1; 134 mce_chrdev_open_count++; 135 136 spin_unlock(&mce_chrdev_state_lock); 137 138 return nonseekable_open(inode, file); 139 } 140 141 static int mce_chrdev_release(struct inode *inode, struct file *file) 142 { 143 spin_lock(&mce_chrdev_state_lock); 144 145 mce_chrdev_open_count--; 146 mce_chrdev_open_exclu = 0; 147 148 spin_unlock(&mce_chrdev_state_lock); 149 150 return 0; 151 } 152 153 static int mce_apei_read_done; 154 155 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ 156 static int __mce_read_apei(char __user **ubuf, size_t usize) 157 { 158 int rc; 159 u64 record_id; 160 struct mce m; 161 162 if (usize < sizeof(struct mce)) 163 return -EINVAL; 164 165 rc = apei_read_mce(&m, &record_id); 166 /* Error or no more MCE record */ 167 if (rc <= 0) { 168 mce_apei_read_done = 1; 169 /* 170 * When ERST is disabled, mce_chrdev_read() should return 171 * "no record" instead of "no device." 172 */ 173 if (rc == -ENODEV) 174 return 0; 175 return rc; 176 } 177 rc = -EFAULT; 178 if (copy_to_user(*ubuf, &m, sizeof(struct mce))) 179 return rc; 180 /* 181 * In fact, we should have cleared the record after that has 182 * been flushed to the disk or sent to network in 183 * /sbin/mcelog, but we have no interface to support that now, 184 * so just clear it to avoid duplication. 185 */ 186 rc = apei_clear_mce(record_id); 187 if (rc) { 188 mce_apei_read_done = 1; 189 return rc; 190 } 191 *ubuf += sizeof(struct mce); 192 193 return 0; 194 } 195 196 static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, 197 size_t usize, loff_t *off) 198 { 199 char __user *buf = ubuf; 200 unsigned next; 201 int i, err; 202 203 mutex_lock(&mce_chrdev_read_mutex); 204 205 if (!mce_apei_read_done) { 206 err = __mce_read_apei(&buf, usize); 207 if (err || buf != ubuf) 208 goto out; 209 } 210 211 /* Only supports full reads right now */ 212 err = -EINVAL; 213 if (*off != 0 || usize < mcelog->len * sizeof(struct mce)) 214 goto out; 215 216 next = mcelog->next; 217 err = 0; 218 219 for (i = 0; i < next; i++) { 220 struct mce *m = &mcelog->entry[i]; 221 222 err |= copy_to_user(buf, m, sizeof(*m)); 223 buf += sizeof(*m); 224 } 225 226 memset(mcelog->entry, 0, next * sizeof(struct mce)); 227 mcelog->next = 0; 228 229 if (err) 230 err = -EFAULT; 231 232 out: 233 mutex_unlock(&mce_chrdev_read_mutex); 234 235 return err ? err : buf - ubuf; 236 } 237 238 static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait) 239 { 240 poll_wait(file, &mce_chrdev_wait, wait); 241 if (READ_ONCE(mcelog->next)) 242 return EPOLLIN | EPOLLRDNORM; 243 if (!mce_apei_read_done && apei_check_mce()) 244 return EPOLLIN | EPOLLRDNORM; 245 return 0; 246 } 247 248 static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, 249 unsigned long arg) 250 { 251 int __user *p = (int __user *)arg; 252 253 if (!capable(CAP_SYS_ADMIN)) 254 return -EPERM; 255 256 switch (cmd) { 257 case MCE_GET_RECORD_LEN: 258 return put_user(sizeof(struct mce), p); 259 case MCE_GET_LOG_LEN: 260 return put_user(mcelog->len, p); 261 case MCE_GETCLEAR_FLAGS: { 262 unsigned flags; 263 264 do { 265 flags = mcelog->flags; 266 } while (cmpxchg(&mcelog->flags, flags, 0) != flags); 267 268 return put_user(flags, p); 269 } 270 default: 271 return -ENOTTY; 272 } 273 } 274 275 void mce_register_injector_chain(struct notifier_block *nb) 276 { 277 blocking_notifier_chain_register(&mce_injector_chain, nb); 278 } 279 EXPORT_SYMBOL_GPL(mce_register_injector_chain); 280 281 void mce_unregister_injector_chain(struct notifier_block *nb) 282 { 283 blocking_notifier_chain_unregister(&mce_injector_chain, nb); 284 } 285 EXPORT_SYMBOL_GPL(mce_unregister_injector_chain); 286 287 static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, 288 size_t usize, loff_t *off) 289 { 290 struct mce m; 291 292 if (!capable(CAP_SYS_ADMIN)) 293 return -EPERM; 294 /* 295 * There are some cases where real MSR reads could slip 296 * through. 297 */ 298 if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) 299 return -EIO; 300 301 if ((unsigned long)usize > sizeof(struct mce)) 302 usize = sizeof(struct mce); 303 if (copy_from_user(&m, ubuf, usize)) 304 return -EFAULT; 305 306 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) 307 return -EINVAL; 308 309 /* 310 * Need to give user space some time to set everything up, 311 * so do it a jiffie or two later everywhere. 312 */ 313 schedule_timeout(2); 314 315 blocking_notifier_call_chain(&mce_injector_chain, 0, &m); 316 317 return usize; 318 } 319 320 static const struct file_operations mce_chrdev_ops = { 321 .open = mce_chrdev_open, 322 .release = mce_chrdev_release, 323 .read = mce_chrdev_read, 324 .write = mce_chrdev_write, 325 .poll = mce_chrdev_poll, 326 .unlocked_ioctl = mce_chrdev_ioctl, 327 .llseek = no_llseek, 328 }; 329 330 static struct miscdevice mce_chrdev_device = { 331 MISC_MCELOG_MINOR, 332 "mcelog", 333 &mce_chrdev_ops, 334 }; 335 336 static __init int dev_mcelog_init_device(void) 337 { 338 int mce_log_len; 339 int err; 340 341 mce_log_len = max(MCE_LOG_MIN_LEN, num_online_cpus()); 342 mcelog = kzalloc(sizeof(*mcelog) + mce_log_len * sizeof(struct mce), GFP_KERNEL); 343 if (!mcelog) 344 return -ENOMEM; 345 346 strncpy(mcelog->signature, MCE_LOG_SIGNATURE, sizeof(mcelog->signature)); 347 mcelog->len = mce_log_len; 348 mcelog->recordlen = sizeof(struct mce); 349 350 /* register character device /dev/mcelog */ 351 err = misc_register(&mce_chrdev_device); 352 if (err) { 353 if (err == -EBUSY) 354 /* Xen dom0 might have registered the device already. */ 355 pr_info("Unable to init device /dev/mcelog, already registered"); 356 else 357 pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err); 358 359 kfree(mcelog); 360 return err; 361 } 362 363 mce_register_decode_chain(&dev_mcelog_nb); 364 return 0; 365 } 366 device_initcall_sync(dev_mcelog_init_device); 367