1 /* 2 * /dev/mcelog driver 3 * 4 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 5 * Rest from unknown author(s). 6 * 2004 Andi Kleen. Rewrote most of it. 7 * Copyright 2008 Intel Corporation 8 * Author: Andi Kleen 9 */ 10 11 #include <linux/miscdevice.h> 12 #include <linux/slab.h> 13 #include <linux/kmod.h> 14 #include <linux/poll.h> 15 16 #include "internal.h" 17 18 static BLOCKING_NOTIFIER_HEAD(mce_injector_chain); 19 20 static DEFINE_MUTEX(mce_chrdev_read_mutex); 21 22 static char mce_helper[128]; 23 static char *mce_helper_argv[2] = { mce_helper, NULL }; 24 25 /* 26 * Lockless MCE logging infrastructure. 27 * This avoids deadlocks on printk locks without having to break locks. Also 28 * separate MCEs from kernel messages to avoid bogus bug reports. 29 */ 30 31 static struct mce_log_buffer mcelog = { 32 .signature = MCE_LOG_SIGNATURE, 33 .len = MCE_LOG_LEN, 34 .recordlen = sizeof(struct mce), 35 }; 36 37 static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); 38 39 static int dev_mce_log(struct notifier_block *nb, unsigned long val, 40 void *data) 41 { 42 struct mce *mce = (struct mce *)data; 43 unsigned int entry; 44 45 mutex_lock(&mce_chrdev_read_mutex); 46 47 entry = mcelog.next; 48 49 /* 50 * When the buffer fills up discard new entries. Assume that the 51 * earlier errors are the more interesting ones: 52 */ 53 if (entry >= MCE_LOG_LEN) { 54 set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); 55 goto unlock; 56 } 57 58 mcelog.next = entry + 1; 59 60 memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); 61 mcelog.entry[entry].finished = 1; 62 63 /* wake processes polling /dev/mcelog */ 64 wake_up_interruptible(&mce_chrdev_wait); 65 66 unlock: 67 mutex_unlock(&mce_chrdev_read_mutex); 68 69 return NOTIFY_OK; 70 } 71 72 static struct notifier_block dev_mcelog_nb = { 73 .notifier_call = dev_mce_log, 74 .priority = MCE_PRIO_MCELOG, 75 }; 76 77 static void mce_do_trigger(struct work_struct *work) 78 { 79 call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); 80 } 81 82 static DECLARE_WORK(mce_trigger_work, mce_do_trigger); 83 84 85 void mce_work_trigger(void) 86 { 87 if (mce_helper[0]) 88 schedule_work(&mce_trigger_work); 89 } 90 91 static ssize_t 92 show_trigger(struct device *s, struct device_attribute *attr, char *buf) 93 { 94 strcpy(buf, mce_helper); 95 strcat(buf, "\n"); 96 return strlen(mce_helper) + 1; 97 } 98 99 static ssize_t set_trigger(struct device *s, struct device_attribute *attr, 100 const char *buf, size_t siz) 101 { 102 char *p; 103 104 strncpy(mce_helper, buf, sizeof(mce_helper)); 105 mce_helper[sizeof(mce_helper)-1] = 0; 106 p = strchr(mce_helper, '\n'); 107 108 if (p) 109 *p = 0; 110 111 return strlen(mce_helper) + !!p; 112 } 113 114 DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger); 115 116 /* 117 * mce_chrdev: Character device /dev/mcelog to read and clear the MCE log. 118 */ 119 120 static DEFINE_SPINLOCK(mce_chrdev_state_lock); 121 static int mce_chrdev_open_count; /* #times opened */ 122 static int mce_chrdev_open_exclu; /* already open exclusive? */ 123 124 static int mce_chrdev_open(struct inode *inode, struct file *file) 125 { 126 spin_lock(&mce_chrdev_state_lock); 127 128 if (mce_chrdev_open_exclu || 129 (mce_chrdev_open_count && (file->f_flags & O_EXCL))) { 130 spin_unlock(&mce_chrdev_state_lock); 131 132 return -EBUSY; 133 } 134 135 if (file->f_flags & O_EXCL) 136 mce_chrdev_open_exclu = 1; 137 mce_chrdev_open_count++; 138 139 spin_unlock(&mce_chrdev_state_lock); 140 141 return nonseekable_open(inode, file); 142 } 143 144 static int mce_chrdev_release(struct inode *inode, struct file *file) 145 { 146 spin_lock(&mce_chrdev_state_lock); 147 148 mce_chrdev_open_count--; 149 mce_chrdev_open_exclu = 0; 150 151 spin_unlock(&mce_chrdev_state_lock); 152 153 return 0; 154 } 155 156 static int mce_apei_read_done; 157 158 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ 159 static int __mce_read_apei(char __user **ubuf, size_t usize) 160 { 161 int rc; 162 u64 record_id; 163 struct mce m; 164 165 if (usize < sizeof(struct mce)) 166 return -EINVAL; 167 168 rc = apei_read_mce(&m, &record_id); 169 /* Error or no more MCE record */ 170 if (rc <= 0) { 171 mce_apei_read_done = 1; 172 /* 173 * When ERST is disabled, mce_chrdev_read() should return 174 * "no record" instead of "no device." 175 */ 176 if (rc == -ENODEV) 177 return 0; 178 return rc; 179 } 180 rc = -EFAULT; 181 if (copy_to_user(*ubuf, &m, sizeof(struct mce))) 182 return rc; 183 /* 184 * In fact, we should have cleared the record after that has 185 * been flushed to the disk or sent to network in 186 * /sbin/mcelog, but we have no interface to support that now, 187 * so just clear it to avoid duplication. 188 */ 189 rc = apei_clear_mce(record_id); 190 if (rc) { 191 mce_apei_read_done = 1; 192 return rc; 193 } 194 *ubuf += sizeof(struct mce); 195 196 return 0; 197 } 198 199 static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, 200 size_t usize, loff_t *off) 201 { 202 char __user *buf = ubuf; 203 unsigned next; 204 int i, err; 205 206 mutex_lock(&mce_chrdev_read_mutex); 207 208 if (!mce_apei_read_done) { 209 err = __mce_read_apei(&buf, usize); 210 if (err || buf != ubuf) 211 goto out; 212 } 213 214 /* Only supports full reads right now */ 215 err = -EINVAL; 216 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) 217 goto out; 218 219 next = mcelog.next; 220 err = 0; 221 222 for (i = 0; i < next; i++) { 223 struct mce *m = &mcelog.entry[i]; 224 225 err |= copy_to_user(buf, m, sizeof(*m)); 226 buf += sizeof(*m); 227 } 228 229 memset(mcelog.entry, 0, next * sizeof(struct mce)); 230 mcelog.next = 0; 231 232 if (err) 233 err = -EFAULT; 234 235 out: 236 mutex_unlock(&mce_chrdev_read_mutex); 237 238 return err ? err : buf - ubuf; 239 } 240 241 static __poll_t mce_chrdev_poll(struct file *file, poll_table *wait) 242 { 243 poll_wait(file, &mce_chrdev_wait, wait); 244 if (READ_ONCE(mcelog.next)) 245 return EPOLLIN | EPOLLRDNORM; 246 if (!mce_apei_read_done && apei_check_mce()) 247 return EPOLLIN | EPOLLRDNORM; 248 return 0; 249 } 250 251 static long mce_chrdev_ioctl(struct file *f, unsigned int cmd, 252 unsigned long arg) 253 { 254 int __user *p = (int __user *)arg; 255 256 if (!capable(CAP_SYS_ADMIN)) 257 return -EPERM; 258 259 switch (cmd) { 260 case MCE_GET_RECORD_LEN: 261 return put_user(sizeof(struct mce), p); 262 case MCE_GET_LOG_LEN: 263 return put_user(MCE_LOG_LEN, p); 264 case MCE_GETCLEAR_FLAGS: { 265 unsigned flags; 266 267 do { 268 flags = mcelog.flags; 269 } while (cmpxchg(&mcelog.flags, flags, 0) != flags); 270 271 return put_user(flags, p); 272 } 273 default: 274 return -ENOTTY; 275 } 276 } 277 278 void mce_register_injector_chain(struct notifier_block *nb) 279 { 280 blocking_notifier_chain_register(&mce_injector_chain, nb); 281 } 282 EXPORT_SYMBOL_GPL(mce_register_injector_chain); 283 284 void mce_unregister_injector_chain(struct notifier_block *nb) 285 { 286 blocking_notifier_chain_unregister(&mce_injector_chain, nb); 287 } 288 EXPORT_SYMBOL_GPL(mce_unregister_injector_chain); 289 290 static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf, 291 size_t usize, loff_t *off) 292 { 293 struct mce m; 294 295 if (!capable(CAP_SYS_ADMIN)) 296 return -EPERM; 297 /* 298 * There are some cases where real MSR reads could slip 299 * through. 300 */ 301 if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) 302 return -EIO; 303 304 if ((unsigned long)usize > sizeof(struct mce)) 305 usize = sizeof(struct mce); 306 if (copy_from_user(&m, ubuf, usize)) 307 return -EFAULT; 308 309 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) 310 return -EINVAL; 311 312 /* 313 * Need to give user space some time to set everything up, 314 * so do it a jiffie or two later everywhere. 315 */ 316 schedule_timeout(2); 317 318 blocking_notifier_call_chain(&mce_injector_chain, 0, &m); 319 320 return usize; 321 } 322 323 static const struct file_operations mce_chrdev_ops = { 324 .open = mce_chrdev_open, 325 .release = mce_chrdev_release, 326 .read = mce_chrdev_read, 327 .write = mce_chrdev_write, 328 .poll = mce_chrdev_poll, 329 .unlocked_ioctl = mce_chrdev_ioctl, 330 .llseek = no_llseek, 331 }; 332 333 static struct miscdevice mce_chrdev_device = { 334 MISC_MCELOG_MINOR, 335 "mcelog", 336 &mce_chrdev_ops, 337 }; 338 339 static __init int dev_mcelog_init_device(void) 340 { 341 int err; 342 343 /* register character device /dev/mcelog */ 344 err = misc_register(&mce_chrdev_device); 345 if (err) { 346 if (err == -EBUSY) 347 /* Xen dom0 might have registered the device already. */ 348 pr_info("Unable to init device /dev/mcelog, already registered"); 349 else 350 pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err); 351 352 return err; 353 } 354 355 mce_register_decode_chain(&dev_mcelog_nb); 356 return 0; 357 } 358 device_initcall_sync(dev_mcelog_init_device); 359