1 /* 2 * salinfo.c 3 * 4 * Creates entries in /proc/sal for various system features. 5 * 6 * Copyright (c) 2003, 2006 Silicon Graphics, Inc. All rights reserved. 7 * Copyright (c) 2003 Hewlett-Packard Co 8 * Bjorn Helgaas <bjorn.helgaas@hp.com> 9 * 10 * 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo 11 * code to create this file 12 * Oct 23 2003 kaos@sgi.com 13 * Replace IPI with set_cpus_allowed() to read a record from the required cpu. 14 * Redesign salinfo log processing to separate interrupt and user space 15 * contexts. 16 * Cache the record across multi-block reads from user space. 17 * Support > 64 cpus. 18 * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module. 19 * 20 * Jan 28 2004 kaos@sgi.com 21 * Periodically check for outstanding MCA or INIT records. 22 * 23 * Dec 5 2004 kaos@sgi.com 24 * Standardize which records are cleared automatically. 25 * 26 * Aug 18 2005 kaos@sgi.com 27 * mca.c may not pass a buffer, a NULL buffer just indicates that a new 28 * record is available in SAL. 29 * Replace some NR_CPUS by cpus_online, for hotplug cpu. 30 * 31 * Jan 5 2006 kaos@sgi.com 32 * Handle hotplug cpus coming online. 33 * Handle hotplug cpus going offline while they still have outstanding records. 34 * Use the cpu_* macros consistently. 35 * Replace the counting semaphore with a mutex and a test if the cpumask is non-empty. 36 * Modify the locking to make the test for "work to do" an atomic operation. 37 */ 38 39 #include <linux/capability.h> 40 #include <linux/cpu.h> 41 #include <linux/types.h> 42 #include <linux/proc_fs.h> 43 #include <linux/seq_file.h> 44 #include <linux/module.h> 45 #include <linux/smp.h> 46 #include <linux/timer.h> 47 #include <linux/vmalloc.h> 48 #include <linux/semaphore.h> 49 50 #include <asm/sal.h> 51 #include <linux/uaccess.h> 52 53 MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>"); 54 MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); 55 MODULE_LICENSE("GPL"); 56 57 static const struct file_operations proc_salinfo_fops; 58 59 typedef struct { 60 const char *name; /* name of the proc entry */ 61 unsigned long feature; /* feature bit */ 62 struct proc_dir_entry *entry; /* registered entry (removal) */ 63 } salinfo_entry_t; 64 65 /* 66 * List {name,feature} pairs for every entry in /proc/sal/<feature> 67 * that this module exports 68 */ 69 static const salinfo_entry_t salinfo_entries[]={ 70 { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, }, 71 { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, }, 72 { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, }, 73 { "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, }, 74 }; 75 76 #define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries) 77 78 static char *salinfo_log_name[] = { 79 "mca", 80 "init", 81 "cmc", 82 "cpe", 83 }; 84 85 static struct proc_dir_entry *salinfo_proc_entries[ 86 ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */ 87 ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */ 88 (2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */ 89 1]; /* /proc/sal */ 90 91 /* Some records we get ourselves, some are accessed as saved data in buffers 92 * that are owned by mca.c. 93 */ 94 struct salinfo_data_saved { 95 u8* buffer; 96 u64 size; 97 u64 id; 98 int cpu; 99 }; 100 101 /* State transitions. Actions are :- 102 * Write "read <cpunum>" to the data file. 103 * Write "clear <cpunum>" to the data file. 104 * Write "oemdata <cpunum> <offset> to the data file. 105 * Read from the data file. 106 * Close the data file. 107 * 108 * Start state is NO_DATA. 109 * 110 * NO_DATA 111 * write "read <cpunum>" -> NO_DATA or LOG_RECORD. 112 * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. 113 * write "oemdata <cpunum> <offset> -> return -EINVAL. 114 * read data -> return EOF. 115 * close -> unchanged. Free record areas. 116 * 117 * LOG_RECORD 118 * write "read <cpunum>" -> NO_DATA or LOG_RECORD. 119 * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. 120 * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA. 121 * read data -> return the INIT/MCA/CMC/CPE record. 122 * close -> unchanged. Keep record areas. 123 * 124 * OEMDATA 125 * write "read <cpunum>" -> NO_DATA or LOG_RECORD. 126 * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. 127 * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA. 128 * read data -> return the formatted oemdata. 129 * close -> unchanged. Keep record areas. 130 * 131 * Closing the data file does not change the state. This allows shell scripts 132 * to manipulate salinfo data, each shell redirection opens the file, does one 133 * action then closes it again. The record areas are only freed at close when 134 * the state is NO_DATA. 135 */ 136 enum salinfo_state { 137 STATE_NO_DATA, 138 STATE_LOG_RECORD, 139 STATE_OEMDATA, 140 }; 141 142 struct salinfo_data { 143 cpumask_t cpu_event; /* which cpus have outstanding events */ 144 wait_queue_head_t read_wait; 145 u8 *log_buffer; 146 u64 log_size; 147 u8 *oemdata; /* decoded oem data */ 148 u64 oemdata_size; 149 int open; /* single-open to prevent races */ 150 u8 type; 151 u8 saved_num; /* using a saved record? */ 152 enum salinfo_state state :8; /* processing state */ 153 u8 padding; 154 int cpu_check; /* next CPU to check */ 155 struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */ 156 }; 157 158 static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)]; 159 160 static DEFINE_SPINLOCK(data_lock); 161 static DEFINE_SPINLOCK(data_saved_lock); 162 163 /** salinfo_platform_oemdata - optional callback to decode oemdata from an error 164 * record. 165 * @sect_header: pointer to the start of the section to decode. 166 * @oemdata: returns vmalloc area containing the decoded output. 167 * @oemdata_size: returns length of decoded output (strlen). 168 * 169 * Description: If user space asks for oem data to be decoded by the kernel 170 * and/or prom and the platform has set salinfo_platform_oemdata to the address 171 * of a platform specific routine then call that routine. salinfo_platform_oemdata 172 * vmalloc's and formats its output area, returning the address of the text 173 * and its strlen. Returns 0 for success, -ve for error. The callback is 174 * invoked on the cpu that generated the error record. 175 */ 176 int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size); 177 178 struct salinfo_platform_oemdata_parms { 179 const u8 *efi_guid; 180 u8 **oemdata; 181 u64 *oemdata_size; 182 }; 183 184 static long 185 salinfo_platform_oemdata_cpu(void *context) 186 { 187 struct salinfo_platform_oemdata_parms *parms = context; 188 189 return salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); 190 } 191 192 static void 193 shift1_data_saved (struct salinfo_data *data, int shift) 194 { 195 memcpy(data->data_saved+shift, data->data_saved+shift+1, 196 (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0])); 197 memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0, 198 sizeof(data->data_saved[0])); 199 } 200 201 /* This routine is invoked in interrupt context. Note: mca.c enables 202 * interrupts before calling this code for CMC/CPE. MCA and INIT events are 203 * not irq safe, do not call any routines that use spinlocks, they may deadlock. 204 * MCA and INIT records are recorded, a timer event will look for any 205 * outstanding events and wake up the user space code. 206 * 207 * The buffer passed from mca.c points to the output from ia64_log_get. This is 208 * a persistent buffer but its contents can change between the interrupt and 209 * when user space processes the record. Save the record id to identify 210 * changes. If the buffer is NULL then just update the bitmap. 211 */ 212 void 213 salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) 214 { 215 struct salinfo_data *data = salinfo_data + type; 216 struct salinfo_data_saved *data_saved; 217 unsigned long flags = 0; 218 int i; 219 int saved_size = ARRAY_SIZE(data->data_saved); 220 221 BUG_ON(type >= ARRAY_SIZE(salinfo_log_name)); 222 223 if (irqsafe) 224 spin_lock_irqsave(&data_saved_lock, flags); 225 if (buffer) { 226 for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { 227 if (!data_saved->buffer) 228 break; 229 } 230 if (i == saved_size) { 231 if (!data->saved_num) { 232 shift1_data_saved(data, 0); 233 data_saved = data->data_saved + saved_size - 1; 234 } else 235 data_saved = NULL; 236 } 237 if (data_saved) { 238 data_saved->cpu = smp_processor_id(); 239 data_saved->id = ((sal_log_record_header_t *)buffer)->id; 240 data_saved->size = size; 241 data_saved->buffer = buffer; 242 } 243 } 244 cpumask_set_cpu(smp_processor_id(), &data->cpu_event); 245 if (irqsafe) { 246 wake_up_interruptible(&data->read_wait); 247 spin_unlock_irqrestore(&data_saved_lock, flags); 248 } 249 } 250 251 /* Check for outstanding MCA/INIT records every minute (arbitrary) */ 252 #define SALINFO_TIMER_DELAY (60*HZ) 253 static struct timer_list salinfo_timer; 254 extern void ia64_mlogbuf_dump(void); 255 256 static void 257 salinfo_timeout_check(struct salinfo_data *data) 258 { 259 if (!data->open) 260 return; 261 if (!cpumask_empty(&data->cpu_event)) 262 wake_up_interruptible(&data->read_wait); 263 } 264 265 static void 266 salinfo_timeout(struct timer_list *unused) 267 { 268 ia64_mlogbuf_dump(); 269 salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); 270 salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); 271 salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; 272 add_timer(&salinfo_timer); 273 } 274 275 static int 276 salinfo_event_open(struct inode *inode, struct file *file) 277 { 278 if (!capable(CAP_SYS_ADMIN)) 279 return -EPERM; 280 return 0; 281 } 282 283 static ssize_t 284 salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) 285 { 286 struct salinfo_data *data = PDE_DATA(file_inode(file)); 287 char cmd[32]; 288 size_t size; 289 int i, n, cpu = -1; 290 291 retry: 292 if (cpumask_empty(&data->cpu_event)) { 293 if (file->f_flags & O_NONBLOCK) 294 return -EAGAIN; 295 if (wait_event_interruptible(data->read_wait, 296 !cpumask_empty(&data->cpu_event))) 297 return -EINTR; 298 } 299 300 n = data->cpu_check; 301 for (i = 0; i < nr_cpu_ids; i++) { 302 if (cpumask_test_cpu(n, &data->cpu_event)) { 303 if (!cpu_online(n)) { 304 cpumask_clear_cpu(n, &data->cpu_event); 305 continue; 306 } 307 cpu = n; 308 break; 309 } 310 if (++n == nr_cpu_ids) 311 n = 0; 312 } 313 314 if (cpu == -1) 315 goto retry; 316 317 ia64_mlogbuf_dump(); 318 319 /* for next read, start checking at next CPU */ 320 data->cpu_check = cpu; 321 if (++data->cpu_check == nr_cpu_ids) 322 data->cpu_check = 0; 323 324 snprintf(cmd, sizeof(cmd), "read %d\n", cpu); 325 326 size = strlen(cmd); 327 if (size > count) 328 size = count; 329 if (copy_to_user(buffer, cmd, size)) 330 return -EFAULT; 331 332 return size; 333 } 334 335 static const struct file_operations salinfo_event_fops = { 336 .open = salinfo_event_open, 337 .read = salinfo_event_read, 338 .llseek = noop_llseek, 339 }; 340 341 static int 342 salinfo_log_open(struct inode *inode, struct file *file) 343 { 344 struct salinfo_data *data = PDE_DATA(inode); 345 346 if (!capable(CAP_SYS_ADMIN)) 347 return -EPERM; 348 349 spin_lock(&data_lock); 350 if (data->open) { 351 spin_unlock(&data_lock); 352 return -EBUSY; 353 } 354 data->open = 1; 355 spin_unlock(&data_lock); 356 357 if (data->state == STATE_NO_DATA && 358 !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) { 359 data->open = 0; 360 return -ENOMEM; 361 } 362 363 return 0; 364 } 365 366 static int 367 salinfo_log_release(struct inode *inode, struct file *file) 368 { 369 struct salinfo_data *data = PDE_DATA(inode); 370 371 if (data->state == STATE_NO_DATA) { 372 vfree(data->log_buffer); 373 vfree(data->oemdata); 374 data->log_buffer = NULL; 375 data->oemdata = NULL; 376 } 377 spin_lock(&data_lock); 378 data->open = 0; 379 spin_unlock(&data_lock); 380 return 0; 381 } 382 383 static long 384 salinfo_log_read_cpu(void *context) 385 { 386 struct salinfo_data *data = context; 387 sal_log_record_header_t *rh; 388 data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer); 389 rh = (sal_log_record_header_t *)(data->log_buffer); 390 /* Clear corrected errors as they are read from SAL */ 391 if (rh->severity == sal_log_severity_corrected) 392 ia64_sal_clear_state_info(data->type); 393 return 0; 394 } 395 396 static void 397 salinfo_log_new_read(int cpu, struct salinfo_data *data) 398 { 399 struct salinfo_data_saved *data_saved; 400 unsigned long flags; 401 int i; 402 int saved_size = ARRAY_SIZE(data->data_saved); 403 404 data->saved_num = 0; 405 spin_lock_irqsave(&data_saved_lock, flags); 406 retry: 407 for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { 408 if (data_saved->buffer && data_saved->cpu == cpu) { 409 sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer); 410 data->log_size = data_saved->size; 411 memcpy(data->log_buffer, rh, data->log_size); 412 barrier(); /* id check must not be moved */ 413 if (rh->id == data_saved->id) { 414 data->saved_num = i+1; 415 break; 416 } 417 /* saved record changed by mca.c since interrupt, discard it */ 418 shift1_data_saved(data, i); 419 goto retry; 420 } 421 } 422 spin_unlock_irqrestore(&data_saved_lock, flags); 423 424 if (!data->saved_num) 425 work_on_cpu_safe(cpu, salinfo_log_read_cpu, data); 426 if (!data->log_size) { 427 data->state = STATE_NO_DATA; 428 cpumask_clear_cpu(cpu, &data->cpu_event); 429 } else { 430 data->state = STATE_LOG_RECORD; 431 } 432 } 433 434 static ssize_t 435 salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) 436 { 437 struct salinfo_data *data = PDE_DATA(file_inode(file)); 438 u8 *buf; 439 u64 bufsize; 440 441 if (data->state == STATE_LOG_RECORD) { 442 buf = data->log_buffer; 443 bufsize = data->log_size; 444 } else if (data->state == STATE_OEMDATA) { 445 buf = data->oemdata; 446 bufsize = data->oemdata_size; 447 } else { 448 buf = NULL; 449 bufsize = 0; 450 } 451 return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); 452 } 453 454 static long 455 salinfo_log_clear_cpu(void *context) 456 { 457 struct salinfo_data *data = context; 458 459 ia64_sal_clear_state_info(data->type); 460 return 0; 461 } 462 463 static int 464 salinfo_log_clear(struct salinfo_data *data, int cpu) 465 { 466 sal_log_record_header_t *rh; 467 unsigned long flags; 468 spin_lock_irqsave(&data_saved_lock, flags); 469 data->state = STATE_NO_DATA; 470 if (!cpumask_test_cpu(cpu, &data->cpu_event)) { 471 spin_unlock_irqrestore(&data_saved_lock, flags); 472 return 0; 473 } 474 cpumask_clear_cpu(cpu, &data->cpu_event); 475 if (data->saved_num) { 476 shift1_data_saved(data, data->saved_num - 1); 477 data->saved_num = 0; 478 } 479 spin_unlock_irqrestore(&data_saved_lock, flags); 480 rh = (sal_log_record_header_t *)(data->log_buffer); 481 /* Corrected errors have already been cleared from SAL */ 482 if (rh->severity != sal_log_severity_corrected) 483 work_on_cpu_safe(cpu, salinfo_log_clear_cpu, data); 484 /* clearing a record may make a new record visible */ 485 salinfo_log_new_read(cpu, data); 486 if (data->state == STATE_LOG_RECORD) { 487 spin_lock_irqsave(&data_saved_lock, flags); 488 cpumask_set_cpu(cpu, &data->cpu_event); 489 wake_up_interruptible(&data->read_wait); 490 spin_unlock_irqrestore(&data_saved_lock, flags); 491 } 492 return 0; 493 } 494 495 static ssize_t 496 salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) 497 { 498 struct salinfo_data *data = PDE_DATA(file_inode(file)); 499 char cmd[32]; 500 size_t size; 501 u32 offset; 502 int cpu; 503 504 size = sizeof(cmd); 505 if (count < size) 506 size = count; 507 if (copy_from_user(cmd, buffer, size)) 508 return -EFAULT; 509 510 if (sscanf(cmd, "read %d", &cpu) == 1) { 511 salinfo_log_new_read(cpu, data); 512 } else if (sscanf(cmd, "clear %d", &cpu) == 1) { 513 int ret; 514 if ((ret = salinfo_log_clear(data, cpu))) 515 count = ret; 516 } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) { 517 if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA) 518 return -EINVAL; 519 if (offset > data->log_size - sizeof(efi_guid_t)) 520 return -EINVAL; 521 data->state = STATE_OEMDATA; 522 if (salinfo_platform_oemdata) { 523 struct salinfo_platform_oemdata_parms parms = { 524 .efi_guid = data->log_buffer + offset, 525 .oemdata = &data->oemdata, 526 .oemdata_size = &data->oemdata_size 527 }; 528 count = work_on_cpu_safe(cpu, salinfo_platform_oemdata_cpu, 529 &parms); 530 } else 531 data->oemdata_size = 0; 532 } else 533 return -EINVAL; 534 535 return count; 536 } 537 538 static const struct file_operations salinfo_data_fops = { 539 .open = salinfo_log_open, 540 .release = salinfo_log_release, 541 .read = salinfo_log_read, 542 .write = salinfo_log_write, 543 .llseek = default_llseek, 544 }; 545 546 static int salinfo_cpu_online(unsigned int cpu) 547 { 548 unsigned int i, end = ARRAY_SIZE(salinfo_data); 549 struct salinfo_data *data; 550 551 spin_lock_irq(&data_saved_lock); 552 for (i = 0, data = salinfo_data; i < end; ++i, ++data) { 553 cpumask_set_cpu(cpu, &data->cpu_event); 554 wake_up_interruptible(&data->read_wait); 555 } 556 spin_unlock_irq(&data_saved_lock); 557 return 0; 558 } 559 560 static int salinfo_cpu_pre_down(unsigned int cpu) 561 { 562 unsigned int i, end = ARRAY_SIZE(salinfo_data); 563 struct salinfo_data *data; 564 565 spin_lock_irq(&data_saved_lock); 566 for (i = 0, data = salinfo_data; i < end; ++i, ++data) { 567 struct salinfo_data_saved *data_saved; 568 int j = ARRAY_SIZE(data->data_saved) - 1; 569 570 for (data_saved = data->data_saved + j; j >= 0; 571 --j, --data_saved) { 572 if (data_saved->buffer && data_saved->cpu == cpu) 573 shift1_data_saved(data, j); 574 } 575 cpumask_clear_cpu(cpu, &data->cpu_event); 576 } 577 spin_unlock_irq(&data_saved_lock); 578 return 0; 579 } 580 581 static int __init 582 salinfo_init(void) 583 { 584 struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */ 585 struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */ 586 struct proc_dir_entry *dir, *entry; 587 struct salinfo_data *data; 588 int i; 589 590 salinfo_dir = proc_mkdir("sal", NULL); 591 if (!salinfo_dir) 592 return 0; 593 594 for (i=0; i < NR_SALINFO_ENTRIES; i++) { 595 /* pass the feature bit in question as misc data */ 596 *sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir, 597 &proc_salinfo_fops, 598 (void *)salinfo_entries[i].feature); 599 } 600 601 for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { 602 data = salinfo_data + i; 603 data->type = i; 604 init_waitqueue_head(&data->read_wait); 605 dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); 606 if (!dir) 607 continue; 608 609 entry = proc_create_data("event", S_IRUSR, dir, 610 &salinfo_event_fops, data); 611 if (!entry) 612 continue; 613 *sdir++ = entry; 614 615 entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir, 616 &salinfo_data_fops, data); 617 if (!entry) 618 continue; 619 *sdir++ = entry; 620 621 *sdir++ = dir; 622 } 623 624 *sdir++ = salinfo_dir; 625 626 timer_setup(&salinfo_timer, salinfo_timeout, 0); 627 salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; 628 add_timer(&salinfo_timer); 629 630 i = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/salinfo:online", 631 salinfo_cpu_online, salinfo_cpu_pre_down); 632 WARN_ON(i < 0); 633 return 0; 634 } 635 636 /* 637 * 'data' contains an integer that corresponds to the feature we're 638 * testing 639 */ 640 static int proc_salinfo_show(struct seq_file *m, void *v) 641 { 642 unsigned long data = (unsigned long)v; 643 seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); 644 return 0; 645 } 646 647 static int proc_salinfo_open(struct inode *inode, struct file *file) 648 { 649 return single_open(file, proc_salinfo_show, PDE_DATA(inode)); 650 } 651 652 static const struct file_operations proc_salinfo_fops = { 653 .open = proc_salinfo_open, 654 .read = seq_read, 655 .llseek = seq_lseek, 656 .release = single_release, 657 }; 658 659 module_init(salinfo_init); 660