1 /* 2 * File: mca.c 3 * Purpose: Generic MCA handling layer 4 * 5 * Updated for latest kernel 6 * Copyright (C) 2003 Hewlett-Packard Co 7 * David Mosberger-Tang <davidm@hpl.hp.com> 8 * 9 * Copyright (C) 2002 Dell Inc. 10 * Copyright (C) Matt Domsch (Matt_Domsch@dell.com) 11 * 12 * Copyright (C) 2002 Intel 13 * Copyright (C) Jenna Hall (jenna.s.hall@intel.com) 14 * 15 * Copyright (C) 2001 Intel 16 * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com) 17 * 18 * Copyright (C) 2000 Intel 19 * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com) 20 * 21 * Copyright (C) 1999, 2004 Silicon Graphics, Inc. 22 * Copyright (C) Vijay Chander(vijay@engr.sgi.com) 23 * 24 * 03/04/15 D. Mosberger Added INIT backtrace support. 25 * 02/03/25 M. Domsch GUID cleanups 26 * 27 * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU 28 * error flag, set SAL default return values, changed 29 * error record structure to linked list, added init call 30 * to sal_get_state_info_size(). 31 * 32 * 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected 33 * platform errors, completed code for logging of 34 * corrected & uncorrected machine check errors, and 35 * updated for conformance with Nov. 2000 revision of the 36 * SAL 3.0 spec. 37 * 00/03/29 C. Fleckenstein Fixed PAL/SAL update issues, began MCA bug fixes, logging issues, 38 * added min save state dump, added INIT handler. 39 * 40 * 2003-12-08 Keith Owens <kaos@sgi.com> 41 * smp_call_function() must not be called from interrupt context (can 42 * deadlock on tasklist_lock). Use keventd to call smp_call_function(). 43 * 44 * 2004-02-01 Keith Owens <kaos@sgi.com> 45 * Avoid deadlock when using printk() for MCA and INIT records. 46 * Delete all record printing code, moved to salinfo_decode in user space. 47 * Mark variables and functions static where possible. 48 * Delete dead variables and functions. 49 * Reorder to remove the need for forward declarations and to consolidate 50 * related code. 51 * 52 * 2005-08-12 Keith Owens <kaos@sgi.com> 53 * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. 54 * 55 * 2005-10-07 Keith Owens <kaos@sgi.com> 56 * Add notify_die() hooks. 57 * 58 * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> 59 * Add printing support for MCA/INIT. 60 * 61 * 2007-04-27 Russ Anderson <rja@sgi.com> 62 * Support multiple cpus going through OS_MCA in the same event. 63 */ 64 #include <linux/types.h> 65 #include <linux/init.h> 66 #include <linux/sched.h> 67 #include <linux/interrupt.h> 68 #include <linux/irq.h> 69 #include <linux/bootmem.h> 70 #include <linux/acpi.h> 71 #include <linux/timer.h> 72 #include <linux/module.h> 73 #include <linux/kernel.h> 74 #include <linux/smp.h> 75 #include <linux/workqueue.h> 76 #include <linux/cpumask.h> 77 #include <linux/kdebug.h> 78 79 #include <asm/delay.h> 80 #include <asm/machvec.h> 81 #include <asm/meminit.h> 82 #include <asm/page.h> 83 #include <asm/ptrace.h> 84 #include <asm/system.h> 85 #include <asm/sal.h> 86 #include <asm/mca.h> 87 #include <asm/kexec.h> 88 89 #include <asm/irq.h> 90 #include <asm/hw_irq.h> 91 92 #include "mca_drv.h" 93 #include "entry.h" 94 95 #if defined(IA64_MCA_DEBUG_INFO) 96 # define IA64_MCA_DEBUG(fmt...) printk(fmt) 97 #else 98 # define IA64_MCA_DEBUG(fmt...) 99 #endif 100 101 /* Used by mca_asm.S */ 102 DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ 103 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ 104 DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ 105 DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ 106 107 unsigned long __per_cpu_mca[NR_CPUS]; 108 109 /* In mca_asm.S */ 110 extern void ia64_os_init_dispatch_monarch (void); 111 extern void ia64_os_init_dispatch_slave (void); 112 113 static int monarch_cpu = -1; 114 115 static ia64_mc_info_t ia64_mc_info; 116 117 #define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */ 118 #define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */ 119 #define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */ 120 #define CPE_HISTORY_LENGTH 5 121 #define CMC_HISTORY_LENGTH 5 122 123 #ifdef CONFIG_ACPI 124 static struct timer_list cpe_poll_timer; 125 #endif 126 static struct timer_list cmc_poll_timer; 127 /* 128 * This variable tells whether we are currently in polling mode. 129 * Start with this in the wrong state so we won't play w/ timers 130 * before the system is ready. 131 */ 132 static int cmc_polling_enabled = 1; 133 134 /* 135 * Clearing this variable prevents CPE polling from getting activated 136 * in mca_late_init. Use it if your system doesn't provide a CPEI, 137 * but encounters problems retrieving CPE logs. This should only be 138 * necessary for debugging. 139 */ 140 static int cpe_poll_enabled = 1; 141 142 extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); 143 144 static int mca_init __initdata; 145 146 /* 147 * limited & delayed printing support for MCA/INIT handler 148 */ 149 150 #define mprintk(fmt...) ia64_mca_printk(fmt) 151 152 #define MLOGBUF_SIZE (512+256*NR_CPUS) 153 #define MLOGBUF_MSGMAX 256 154 static char mlogbuf[MLOGBUF_SIZE]; 155 static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ 156 static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ 157 static unsigned long mlogbuf_start; 158 static unsigned long mlogbuf_end; 159 static unsigned int mlogbuf_finished = 0; 160 static unsigned long mlogbuf_timestamp = 0; 161 162 static int loglevel_save = -1; 163 #define BREAK_LOGLEVEL(__console_loglevel) \ 164 oops_in_progress = 1; \ 165 if (loglevel_save < 0) \ 166 loglevel_save = __console_loglevel; \ 167 __console_loglevel = 15; 168 169 #define RESTORE_LOGLEVEL(__console_loglevel) \ 170 if (loglevel_save >= 0) { \ 171 __console_loglevel = loglevel_save; \ 172 loglevel_save = -1; \ 173 } \ 174 mlogbuf_finished = 0; \ 175 oops_in_progress = 0; 176 177 /* 178 * Push messages into buffer, print them later if not urgent. 179 */ 180 void ia64_mca_printk(const char *fmt, ...) 181 { 182 va_list args; 183 int printed_len; 184 char temp_buf[MLOGBUF_MSGMAX]; 185 char *p; 186 187 va_start(args, fmt); 188 printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); 189 va_end(args); 190 191 /* Copy the output into mlogbuf */ 192 if (oops_in_progress) { 193 /* mlogbuf was abandoned, use printk directly instead. */ 194 printk(temp_buf); 195 } else { 196 spin_lock(&mlogbuf_wlock); 197 for (p = temp_buf; *p; p++) { 198 unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; 199 if (next != mlogbuf_start) { 200 mlogbuf[mlogbuf_end] = *p; 201 mlogbuf_end = next; 202 } else { 203 /* buffer full */ 204 break; 205 } 206 } 207 mlogbuf[mlogbuf_end] = '\0'; 208 spin_unlock(&mlogbuf_wlock); 209 } 210 } 211 EXPORT_SYMBOL(ia64_mca_printk); 212 213 /* 214 * Print buffered messages. 215 * NOTE: call this after returning normal context. (ex. from salinfod) 216 */ 217 void ia64_mlogbuf_dump(void) 218 { 219 char temp_buf[MLOGBUF_MSGMAX]; 220 char *p; 221 unsigned long index; 222 unsigned long flags; 223 unsigned int printed_len; 224 225 /* Get output from mlogbuf */ 226 while (mlogbuf_start != mlogbuf_end) { 227 temp_buf[0] = '\0'; 228 p = temp_buf; 229 printed_len = 0; 230 231 spin_lock_irqsave(&mlogbuf_rlock, flags); 232 233 index = mlogbuf_start; 234 while (index != mlogbuf_end) { 235 *p = mlogbuf[index]; 236 index = (index + 1) % MLOGBUF_SIZE; 237 if (!*p) 238 break; 239 p++; 240 if (++printed_len >= MLOGBUF_MSGMAX - 1) 241 break; 242 } 243 *p = '\0'; 244 if (temp_buf[0]) 245 printk(temp_buf); 246 mlogbuf_start = index; 247 248 mlogbuf_timestamp = 0; 249 spin_unlock_irqrestore(&mlogbuf_rlock, flags); 250 } 251 } 252 EXPORT_SYMBOL(ia64_mlogbuf_dump); 253 254 /* 255 * Call this if system is going to down or if immediate flushing messages to 256 * console is required. (ex. recovery was failed, crash dump is going to be 257 * invoked, long-wait rendezvous etc.) 258 * NOTE: this should be called from monarch. 259 */ 260 static void ia64_mlogbuf_finish(int wait) 261 { 262 BREAK_LOGLEVEL(console_loglevel); 263 264 spin_lock_init(&mlogbuf_rlock); 265 ia64_mlogbuf_dump(); 266 printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " 267 "MCA/INIT might be dodgy or fail.\n"); 268 269 if (!wait) 270 return; 271 272 /* wait for console */ 273 printk("Delaying for 5 seconds...\n"); 274 udelay(5*1000000); 275 276 mlogbuf_finished = 1; 277 } 278 279 /* 280 * Print buffered messages from INIT context. 281 */ 282 static void ia64_mlogbuf_dump_from_init(void) 283 { 284 if (mlogbuf_finished) 285 return; 286 287 if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { 288 printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " 289 " and the system seems to be messed up.\n"); 290 ia64_mlogbuf_finish(0); 291 return; 292 } 293 294 if (!spin_trylock(&mlogbuf_rlock)) { 295 printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " 296 "Generated messages other than stack dump will be " 297 "buffered to mlogbuf and will be printed later.\n"); 298 printk(KERN_ERR "INIT: If messages would not printed after " 299 "this INIT, wait 30sec and assert INIT again.\n"); 300 if (!mlogbuf_timestamp) 301 mlogbuf_timestamp = jiffies; 302 return; 303 } 304 spin_unlock(&mlogbuf_rlock); 305 ia64_mlogbuf_dump(); 306 } 307 308 static void inline 309 ia64_mca_spin(const char *func) 310 { 311 if (monarch_cpu == smp_processor_id()) 312 ia64_mlogbuf_finish(0); 313 mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); 314 while (1) 315 cpu_relax(); 316 } 317 /* 318 * IA64_MCA log support 319 */ 320 #define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ 321 #define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */ 322 323 typedef struct ia64_state_log_s 324 { 325 spinlock_t isl_lock; 326 int isl_index; 327 unsigned long isl_count; 328 ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ 329 } ia64_state_log_t; 330 331 static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; 332 333 #define IA64_LOG_ALLOCATE(it, size) \ 334 {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ 335 (ia64_err_rec_t *)alloc_bootmem(size); \ 336 ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ 337 (ia64_err_rec_t *)alloc_bootmem(size);} 338 #define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) 339 #define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) 340 #define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) 341 #define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index 342 #define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index 343 #define IA64_LOG_INDEX_INC(it) \ 344 {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ 345 ia64_state_log[it].isl_count++;} 346 #define IA64_LOG_INDEX_DEC(it) \ 347 ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index 348 #define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) 349 #define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) 350 #define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count 351 352 /* 353 * ia64_log_init 354 * Reset the OS ia64 log buffer 355 * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) 356 * Outputs : None 357 */ 358 static void __init 359 ia64_log_init(int sal_info_type) 360 { 361 u64 max_size = 0; 362 363 IA64_LOG_NEXT_INDEX(sal_info_type) = 0; 364 IA64_LOG_LOCK_INIT(sal_info_type); 365 366 // SAL will tell us the maximum size of any error record of this type 367 max_size = ia64_sal_get_state_info_size(sal_info_type); 368 if (!max_size) 369 /* alloc_bootmem() doesn't like zero-sized allocations! */ 370 return; 371 372 // set up OS data structures to hold error info 373 IA64_LOG_ALLOCATE(sal_info_type, max_size); 374 memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size); 375 memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size); 376 } 377 378 /* 379 * ia64_log_get 380 * 381 * Get the current MCA log from SAL and copy it into the OS log buffer. 382 * 383 * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) 384 * irq_safe whether you can use printk at this point 385 * Outputs : size (total record length) 386 * *buffer (ptr to error record) 387 * 388 */ 389 static u64 390 ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) 391 { 392 sal_log_record_header_t *log_buffer; 393 u64 total_len = 0; 394 unsigned long s; 395 396 IA64_LOG_LOCK(sal_info_type); 397 398 /* Get the process state information */ 399 log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); 400 401 total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); 402 403 if (total_len) { 404 IA64_LOG_INDEX_INC(sal_info_type); 405 IA64_LOG_UNLOCK(sal_info_type); 406 if (irq_safe) { 407 IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. " 408 "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len); 409 } 410 *buffer = (u8 *) log_buffer; 411 return total_len; 412 } else { 413 IA64_LOG_UNLOCK(sal_info_type); 414 return 0; 415 } 416 } 417 418 /* 419 * ia64_mca_log_sal_error_record 420 * 421 * This function retrieves a specified error record type from SAL 422 * and wakes up any processes waiting for error records. 423 * 424 * Inputs : sal_info_type (Type of error record MCA/CMC/CPE) 425 * FIXME: remove MCA and irq_safe. 426 */ 427 static void 428 ia64_mca_log_sal_error_record(int sal_info_type) 429 { 430 u8 *buffer; 431 sal_log_record_header_t *rh; 432 u64 size; 433 int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA; 434 #ifdef IA64_MCA_DEBUG_INFO 435 static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; 436 #endif 437 438 size = ia64_log_get(sal_info_type, &buffer, irq_safe); 439 if (!size) 440 return; 441 442 salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); 443 444 if (irq_safe) 445 IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n", 446 smp_processor_id(), 447 sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); 448 449 /* Clear logs from corrected errors in case there's no user-level logger */ 450 rh = (sal_log_record_header_t *)buffer; 451 if (rh->severity == sal_log_severity_corrected) 452 ia64_sal_clear_state_info(sal_info_type); 453 } 454 455 /* 456 * search_mca_table 457 * See if the MCA surfaced in an instruction range 458 * that has been tagged as recoverable. 459 * 460 * Inputs 461 * first First address range to check 462 * last Last address range to check 463 * ip Instruction pointer, address we are looking for 464 * 465 * Return value: 466 * 1 on Success (in the table)/ 0 on Failure (not in the table) 467 */ 468 int 469 search_mca_table (const struct mca_table_entry *first, 470 const struct mca_table_entry *last, 471 unsigned long ip) 472 { 473 const struct mca_table_entry *curr; 474 u64 curr_start, curr_end; 475 476 curr = first; 477 while (curr <= last) { 478 curr_start = (u64) &curr->start_addr + curr->start_addr; 479 curr_end = (u64) &curr->end_addr + curr->end_addr; 480 481 if ((ip >= curr_start) && (ip <= curr_end)) { 482 return 1; 483 } 484 curr++; 485 } 486 return 0; 487 } 488 489 /* Given an address, look for it in the mca tables. */ 490 int mca_recover_range(unsigned long addr) 491 { 492 extern struct mca_table_entry __start___mca_table[]; 493 extern struct mca_table_entry __stop___mca_table[]; 494 495 return search_mca_table(__start___mca_table, __stop___mca_table-1, addr); 496 } 497 EXPORT_SYMBOL_GPL(mca_recover_range); 498 499 #ifdef CONFIG_ACPI 500 501 int cpe_vector = -1; 502 int ia64_cpe_irq = -1; 503 504 static irqreturn_t 505 ia64_mca_cpe_int_handler (int cpe_irq, void *arg) 506 { 507 static unsigned long cpe_history[CPE_HISTORY_LENGTH]; 508 static int index; 509 static DEFINE_SPINLOCK(cpe_history_lock); 510 511 IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", 512 __FUNCTION__, cpe_irq, smp_processor_id()); 513 514 /* SAL spec states this should run w/ interrupts enabled */ 515 local_irq_enable(); 516 517 spin_lock(&cpe_history_lock); 518 if (!cpe_poll_enabled && cpe_vector >= 0) { 519 520 int i, count = 1; /* we know 1 happened now */ 521 unsigned long now = jiffies; 522 523 for (i = 0; i < CPE_HISTORY_LENGTH; i++) { 524 if (now - cpe_history[i] <= HZ) 525 count++; 526 } 527 528 IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH); 529 if (count >= CPE_HISTORY_LENGTH) { 530 531 cpe_poll_enabled = 1; 532 spin_unlock(&cpe_history_lock); 533 disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR)); 534 535 /* 536 * Corrected errors will still be corrected, but 537 * make sure there's a log somewhere that indicates 538 * something is generating more than we can handle. 539 */ 540 printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n"); 541 542 mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); 543 544 /* lock already released, get out now */ 545 goto out; 546 } else { 547 cpe_history[index++] = now; 548 if (index == CPE_HISTORY_LENGTH) 549 index = 0; 550 } 551 } 552 spin_unlock(&cpe_history_lock); 553 out: 554 /* Get the CPE error record and log it */ 555 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); 556 557 return IRQ_HANDLED; 558 } 559 560 #endif /* CONFIG_ACPI */ 561 562 #ifdef CONFIG_ACPI 563 /* 564 * ia64_mca_register_cpev 565 * 566 * Register the corrected platform error vector with SAL. 567 * 568 * Inputs 569 * cpev Corrected Platform Error Vector number 570 * 571 * Outputs 572 * None 573 */ 574 static void __init 575 ia64_mca_register_cpev (int cpev) 576 { 577 /* Register the CPE interrupt vector with SAL */ 578 struct ia64_sal_retval isrv; 579 580 isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0); 581 if (isrv.status) { 582 printk(KERN_ERR "Failed to register Corrected Platform " 583 "Error interrupt vector with SAL (status %ld)\n", isrv.status); 584 return; 585 } 586 587 IA64_MCA_DEBUG("%s: corrected platform error " 588 "vector %#x registered\n", __FUNCTION__, cpev); 589 } 590 #endif /* CONFIG_ACPI */ 591 592 /* 593 * ia64_mca_cmc_vector_setup 594 * 595 * Setup the corrected machine check vector register in the processor. 596 * (The interrupt is masked on boot. ia64_mca_late_init unmask this.) 597 * This function is invoked on a per-processor basis. 598 * 599 * Inputs 600 * None 601 * 602 * Outputs 603 * None 604 */ 605 void __cpuinit 606 ia64_mca_cmc_vector_setup (void) 607 { 608 cmcv_reg_t cmcv; 609 610 cmcv.cmcv_regval = 0; 611 cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */ 612 cmcv.cmcv_vector = IA64_CMC_VECTOR; 613 ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); 614 615 IA64_MCA_DEBUG("%s: CPU %d corrected " 616 "machine check vector %#x registered.\n", 617 __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR); 618 619 IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", 620 __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV)); 621 } 622 623 /* 624 * ia64_mca_cmc_vector_disable 625 * 626 * Mask the corrected machine check vector register in the processor. 627 * This function is invoked on a per-processor basis. 628 * 629 * Inputs 630 * dummy(unused) 631 * 632 * Outputs 633 * None 634 */ 635 static void 636 ia64_mca_cmc_vector_disable (void *dummy) 637 { 638 cmcv_reg_t cmcv; 639 640 cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); 641 642 cmcv.cmcv_mask = 1; /* Mask/disable interrupt */ 643 ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); 644 645 IA64_MCA_DEBUG("%s: CPU %d corrected " 646 "machine check vector %#x disabled.\n", 647 __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector); 648 } 649 650 /* 651 * ia64_mca_cmc_vector_enable 652 * 653 * Unmask the corrected machine check vector register in the processor. 654 * This function is invoked on a per-processor basis. 655 * 656 * Inputs 657 * dummy(unused) 658 * 659 * Outputs 660 * None 661 */ 662 static void 663 ia64_mca_cmc_vector_enable (void *dummy) 664 { 665 cmcv_reg_t cmcv; 666 667 cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); 668 669 cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ 670 ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); 671 672 IA64_MCA_DEBUG("%s: CPU %d corrected " 673 "machine check vector %#x enabled.\n", 674 __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector); 675 } 676 677 /* 678 * ia64_mca_cmc_vector_disable_keventd 679 * 680 * Called via keventd (smp_call_function() is not safe in interrupt context) to 681 * disable the cmc interrupt vector. 682 */ 683 static void 684 ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) 685 { 686 on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0); 687 } 688 689 /* 690 * ia64_mca_cmc_vector_enable_keventd 691 * 692 * Called via keventd (smp_call_function() is not safe in interrupt context) to 693 * enable the cmc interrupt vector. 694 */ 695 static void 696 ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused) 697 { 698 on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0); 699 } 700 701 /* 702 * ia64_mca_wakeup 703 * 704 * Send an inter-cpu interrupt to wake-up a particular cpu. 705 * 706 * Inputs : cpuid 707 * Outputs : None 708 */ 709 static void 710 ia64_mca_wakeup(int cpu) 711 { 712 platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0); 713 } 714 715 /* 716 * ia64_mca_wakeup_all 717 * 718 * Wakeup all the slave cpus which have rendez'ed previously. 719 * 720 * Inputs : None 721 * Outputs : None 722 */ 723 static void 724 ia64_mca_wakeup_all(void) 725 { 726 int cpu; 727 728 /* Clear the Rendez checkin flag for all cpus */ 729 for_each_online_cpu(cpu) { 730 if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) 731 ia64_mca_wakeup(cpu); 732 } 733 734 } 735 736 /* 737 * ia64_mca_rendez_interrupt_handler 738 * 739 * This is handler used to put slave processors into spinloop 740 * while the monarch processor does the mca handling and later 741 * wake each slave up once the monarch is done. The state 742 * IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez'ed 743 * in SAL. The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates 744 * the cpu has come out of OS rendezvous. 745 * 746 * Inputs : None 747 * Outputs : None 748 */ 749 static irqreturn_t 750 ia64_mca_rendez_int_handler(int rendez_irq, void *arg) 751 { 752 unsigned long flags; 753 int cpu = smp_processor_id(); 754 struct ia64_mca_notify_die nd = 755 { .sos = NULL, .monarch_cpu = &monarch_cpu }; 756 757 /* Mask all interrupts */ 758 local_irq_save(flags); 759 if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", get_irq_regs(), 760 (long)&nd, 0, 0) == NOTIFY_STOP) 761 ia64_mca_spin(__FUNCTION__); 762 763 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; 764 /* Register with the SAL monarch that the slave has 765 * reached SAL 766 */ 767 ia64_sal_mc_rendez(); 768 769 if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", get_irq_regs(), 770 (long)&nd, 0, 0) == NOTIFY_STOP) 771 ia64_mca_spin(__FUNCTION__); 772 773 /* Wait for the monarch cpu to exit. */ 774 while (monarch_cpu != -1) 775 cpu_relax(); /* spin until monarch leaves */ 776 777 if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", get_irq_regs(), 778 (long)&nd, 0, 0) == NOTIFY_STOP) 779 ia64_mca_spin(__FUNCTION__); 780 781 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; 782 /* Enable all interrupts */ 783 local_irq_restore(flags); 784 return IRQ_HANDLED; 785 } 786 787 /* 788 * ia64_mca_wakeup_int_handler 789 * 790 * The interrupt handler for processing the inter-cpu interrupt to the 791 * slave cpu which was spinning in the rendez loop. 792 * Since this spinning is done by turning off the interrupts and 793 * polling on the wakeup-interrupt bit in the IRR, there is 794 * nothing useful to be done in the handler. 795 * 796 * Inputs : wakeup_irq (Wakeup-interrupt bit) 797 * arg (Interrupt handler specific argument) 798 * Outputs : None 799 * 800 */ 801 static irqreturn_t 802 ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg) 803 { 804 return IRQ_HANDLED; 805 } 806 807 /* Function pointer for extra MCA recovery */ 808 int (*ia64_mca_ucmc_extension) 809 (void*,struct ia64_sal_os_state*) 810 = NULL; 811 812 int 813 ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)) 814 { 815 if (ia64_mca_ucmc_extension) 816 return 1; 817 818 ia64_mca_ucmc_extension = fn; 819 return 0; 820 } 821 822 void 823 ia64_unreg_MCA_extension(void) 824 { 825 if (ia64_mca_ucmc_extension) 826 ia64_mca_ucmc_extension = NULL; 827 } 828 829 EXPORT_SYMBOL(ia64_reg_MCA_extension); 830 EXPORT_SYMBOL(ia64_unreg_MCA_extension); 831 832 833 static inline void 834 copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) 835 { 836 u64 fslot, tslot, nat; 837 *tr = *fr; 838 fslot = ((unsigned long)fr >> 3) & 63; 839 tslot = ((unsigned long)tr >> 3) & 63; 840 *tnat &= ~(1UL << tslot); 841 nat = (fnat >> fslot) & 1; 842 *tnat |= (nat << tslot); 843 } 844 845 /* Change the comm field on the MCA/INT task to include the pid that 846 * was interrupted, it makes for easier debugging. If that pid was 0 847 * (swapper or nested MCA/INIT) then use the start of the previous comm 848 * field suffixed with its cpu. 849 */ 850 851 static void 852 ia64_mca_modify_comm(const struct task_struct *previous_current) 853 { 854 char *p, comm[sizeof(current->comm)]; 855 if (previous_current->pid) 856 snprintf(comm, sizeof(comm), "%s %d", 857 current->comm, previous_current->pid); 858 else { 859 int l; 860 if ((p = strchr(previous_current->comm, ' '))) 861 l = p - previous_current->comm; 862 else 863 l = strlen(previous_current->comm); 864 snprintf(comm, sizeof(comm), "%s %*s %d", 865 current->comm, l, previous_current->comm, 866 task_thread_info(previous_current)->cpu); 867 } 868 memcpy(current->comm, comm, sizeof(current->comm)); 869 } 870 871 /* On entry to this routine, we are running on the per cpu stack, see 872 * mca_asm.h. The original stack has not been touched by this event. Some of 873 * the original stack's registers will be in the RBS on this stack. This stack 874 * also contains a partial pt_regs and switch_stack, the rest of the data is in 875 * PAL minstate. 876 * 877 * The first thing to do is modify the original stack to look like a blocked 878 * task so we can run backtrace on the original task. Also mark the per cpu 879 * stack as current to ensure that we use the correct task state, it also means 880 * that we can do backtrace on the MCA/INIT handler code itself. 881 */ 882 883 static struct task_struct * 884 ia64_mca_modify_original_stack(struct pt_regs *regs, 885 const struct switch_stack *sw, 886 struct ia64_sal_os_state *sos, 887 const char *type) 888 { 889 char *p; 890 ia64_va va; 891 extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ 892 const pal_min_state_area_t *ms = sos->pal_min_state; 893 struct task_struct *previous_current; 894 struct pt_regs *old_regs; 895 struct switch_stack *old_sw; 896 unsigned size = sizeof(struct pt_regs) + 897 sizeof(struct switch_stack) + 16; 898 u64 *old_bspstore, *old_bsp; 899 u64 *new_bspstore, *new_bsp; 900 u64 old_unat, old_rnat, new_rnat, nat; 901 u64 slots, loadrs = regs->loadrs; 902 u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; 903 u64 ar_bspstore = regs->ar_bspstore; 904 u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); 905 const u64 *bank; 906 const char *msg; 907 int cpu = smp_processor_id(); 908 909 previous_current = curr_task(cpu); 910 set_curr_task(cpu, current); 911 if ((p = strchr(current->comm, ' '))) 912 *p = '\0'; 913 914 /* Best effort attempt to cope with MCA/INIT delivered while in 915 * physical mode. 916 */ 917 regs->cr_ipsr = ms->pmsa_ipsr; 918 if (ia64_psr(regs)->dt == 0) { 919 va.l = r12; 920 if (va.f.reg == 0) { 921 va.f.reg = 7; 922 r12 = va.l; 923 } 924 va.l = r13; 925 if (va.f.reg == 0) { 926 va.f.reg = 7; 927 r13 = va.l; 928 } 929 } 930 if (ia64_psr(regs)->rt == 0) { 931 va.l = ar_bspstore; 932 if (va.f.reg == 0) { 933 va.f.reg = 7; 934 ar_bspstore = va.l; 935 } 936 va.l = ar_bsp; 937 if (va.f.reg == 0) { 938 va.f.reg = 7; 939 ar_bsp = va.l; 940 } 941 } 942 943 /* mca_asm.S ia64_old_stack() cannot assume that the dirty registers 944 * have been copied to the old stack, the old stack may fail the 945 * validation tests below. So ia64_old_stack() must restore the dirty 946 * registers from the new stack. The old and new bspstore probably 947 * have different alignments, so loadrs calculated on the old bsp 948 * cannot be used to restore from the new bsp. Calculate a suitable 949 * loadrs for the new stack and save it in the new pt_regs, where 950 * ia64_old_stack() can get it. 951 */ 952 old_bspstore = (u64 *)ar_bspstore; 953 old_bsp = (u64 *)ar_bsp; 954 slots = ia64_rse_num_regs(old_bspstore, old_bsp); 955 new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET); 956 new_bsp = ia64_rse_skip_regs(new_bspstore, slots); 957 regs->loadrs = (new_bsp - new_bspstore) * 8 << 16; 958 959 /* Verify the previous stack state before we change it */ 960 if (user_mode(regs)) { 961 msg = "occurred in user space"; 962 /* previous_current is guaranteed to be valid when the task was 963 * in user space, so ... 964 */ 965 ia64_mca_modify_comm(previous_current); 966 goto no_mod; 967 } 968 969 if (r13 != sos->prev_IA64_KR_CURRENT) { 970 msg = "inconsistent previous current and r13"; 971 goto no_mod; 972 } 973 974 if (!mca_recover_range(ms->pmsa_iip)) { 975 if ((r12 - r13) >= KERNEL_STACK_SIZE) { 976 msg = "inconsistent r12 and r13"; 977 goto no_mod; 978 } 979 if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { 980 msg = "inconsistent ar.bspstore and r13"; 981 goto no_mod; 982 } 983 va.p = old_bspstore; 984 if (va.f.reg < 5) { 985 msg = "old_bspstore is in the wrong region"; 986 goto no_mod; 987 } 988 if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { 989 msg = "inconsistent ar.bsp and r13"; 990 goto no_mod; 991 } 992 size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; 993 if (ar_bspstore + size > r12) { 994 msg = "no room for blocked state"; 995 goto no_mod; 996 } 997 } 998 999 ia64_mca_modify_comm(previous_current); 1000 1001 /* Make the original task look blocked. First stack a struct pt_regs, 1002 * describing the state at the time of interrupt. mca_asm.S built a 1003 * partial pt_regs, copy it and fill in the blanks using minstate. 1004 */ 1005 p = (char *)r12 - sizeof(*regs); 1006 old_regs = (struct pt_regs *)p; 1007 memcpy(old_regs, regs, sizeof(*regs)); 1008 /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use 1009 * pmsa_{xip,xpsr,xfs} 1010 */ 1011 if (ia64_psr(regs)->ic) { 1012 old_regs->cr_iip = ms->pmsa_iip; 1013 old_regs->cr_ipsr = ms->pmsa_ipsr; 1014 old_regs->cr_ifs = ms->pmsa_ifs; 1015 } else { 1016 old_regs->cr_iip = ms->pmsa_xip; 1017 old_regs->cr_ipsr = ms->pmsa_xpsr; 1018 old_regs->cr_ifs = ms->pmsa_xfs; 1019 } 1020 old_regs->pr = ms->pmsa_pr; 1021 old_regs->b0 = ms->pmsa_br0; 1022 old_regs->loadrs = loadrs; 1023 old_regs->ar_rsc = ms->pmsa_rsc; 1024 old_unat = old_regs->ar_unat; 1025 copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat); 1026 copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat); 1027 copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat); 1028 copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat); 1029 copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat); 1030 copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat); 1031 copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat); 1032 copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat); 1033 copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat); 1034 copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat); 1035 copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat); 1036 if (ia64_psr(old_regs)->bn) 1037 bank = ms->pmsa_bank1_gr; 1038 else 1039 bank = ms->pmsa_bank0_gr; 1040 copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat); 1041 copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat); 1042 copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat); 1043 copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat); 1044 copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat); 1045 copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat); 1046 copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat); 1047 copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat); 1048 copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat); 1049 copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat); 1050 copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat); 1051 copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat); 1052 copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat); 1053 copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat); 1054 copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat); 1055 copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat); 1056 1057 /* Next stack a struct switch_stack. mca_asm.S built a partial 1058 * switch_stack, copy it and fill in the blanks using pt_regs and 1059 * minstate. 1060 * 1061 * In the synthesized switch_stack, b0 points to ia64_leave_kernel, 1062 * ar.pfs is set to 0. 1063 * 1064 * unwind.c::unw_unwind() does special processing for interrupt frames. 1065 * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate 1066 * is clear then unw_unwind() does _not_ adjust bsp over pt_regs. Not 1067 * that this is documented, of course. Set PRED_NON_SYSCALL in the 1068 * switch_stack on the original stack so it will unwind correctly when 1069 * unwind.c reads pt_regs. 1070 * 1071 * thread.ksp is updated to point to the synthesized switch_stack. 1072 */ 1073 p -= sizeof(struct switch_stack); 1074 old_sw = (struct switch_stack *)p; 1075 memcpy(old_sw, sw, sizeof(*sw)); 1076 old_sw->caller_unat = old_unat; 1077 old_sw->ar_fpsr = old_regs->ar_fpsr; 1078 copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat); 1079 copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat); 1080 copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat); 1081 copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat); 1082 old_sw->b0 = (u64)ia64_leave_kernel; 1083 old_sw->b1 = ms->pmsa_br1; 1084 old_sw->ar_pfs = 0; 1085 old_sw->ar_unat = old_unat; 1086 old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL); 1087 previous_current->thread.ksp = (u64)p - 16; 1088 1089 /* Finally copy the original stack's registers back to its RBS. 1090 * Registers from ar.bspstore through ar.bsp at the time of the event 1091 * are in the current RBS, copy them back to the original stack. The 1092 * copy must be done register by register because the original bspstore 1093 * and the current one have different alignments, so the saved RNAT 1094 * data occurs at different places. 1095 * 1096 * mca_asm does cover, so the old_bsp already includes all registers at 1097 * the time of MCA/INIT. It also does flushrs, so all registers before 1098 * this function have been written to backing store on the MCA/INIT 1099 * stack. 1100 */ 1101 new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore)); 1102 old_rnat = regs->ar_rnat; 1103 while (slots--) { 1104 if (ia64_rse_is_rnat_slot(new_bspstore)) { 1105 new_rnat = ia64_get_rnat(new_bspstore++); 1106 } 1107 if (ia64_rse_is_rnat_slot(old_bspstore)) { 1108 *old_bspstore++ = old_rnat; 1109 old_rnat = 0; 1110 } 1111 nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL; 1112 old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore)); 1113 old_rnat |= (nat << ia64_rse_slot_num(old_bspstore)); 1114 *old_bspstore++ = *new_bspstore++; 1115 } 1116 old_sw->ar_bspstore = (unsigned long)old_bspstore; 1117 old_sw->ar_rnat = old_rnat; 1118 1119 sos->prev_task = previous_current; 1120 return previous_current; 1121 1122 no_mod: 1123 printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", 1124 smp_processor_id(), type, msg); 1125 return previous_current; 1126 } 1127 1128 /* The monarch/slave interaction is based on monarch_cpu and requires that all 1129 * slaves have entered rendezvous before the monarch leaves. If any cpu has 1130 * not entered rendezvous yet then wait a bit. The assumption is that any 1131 * slave that has not rendezvoused after a reasonable time is never going to do 1132 * so. In this context, slave includes cpus that respond to the MCA rendezvous 1133 * interrupt, as well as cpus that receive the INIT slave event. 1134 */ 1135 1136 static void 1137 ia64_wait_for_slaves(int monarch, const char *type) 1138 { 1139 int c, i , wait; 1140 1141 /* 1142 * wait 5 seconds total for slaves (arbitrary) 1143 */ 1144 for (i = 0; i < 5000; i++) { 1145 wait = 0; 1146 for_each_online_cpu(c) { 1147 if (c == monarch) 1148 continue; 1149 if (ia64_mc_info.imi_rendez_checkin[c] 1150 == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { 1151 udelay(1000); /* short wait */ 1152 wait = 1; 1153 break; 1154 } 1155 } 1156 if (!wait) 1157 goto all_in; 1158 } 1159 1160 /* 1161 * Maybe slave(s) dead. Print buffered messages immediately. 1162 */ 1163 ia64_mlogbuf_finish(0); 1164 mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); 1165 for_each_online_cpu(c) { 1166 if (c == monarch) 1167 continue; 1168 if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) 1169 mprintk(" %d", c); 1170 } 1171 mprintk("\n"); 1172 return; 1173 1174 all_in: 1175 mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); 1176 return; 1177 } 1178 1179 /* 1180 * ia64_mca_handler 1181 * 1182 * This is uncorrectable machine check handler called from OS_MCA 1183 * dispatch code which is in turn called from SAL_CHECK(). 1184 * This is the place where the core of OS MCA handling is done. 1185 * Right now the logs are extracted and displayed in a well-defined 1186 * format. This handler code is supposed to be run only on the 1187 * monarch processor. Once the monarch is done with MCA handling 1188 * further MCA logging is enabled by clearing logs. 1189 * Monarch also has the duty of sending wakeup-IPIs to pull the 1190 * slave processors out of rendezvous spinloop. 1191 * 1192 * If multiple processors call into OS_MCA, the first will become 1193 * the monarch. Subsequent cpus will be recorded in the mca_cpu 1194 * bitmask. After the first monarch has processed its MCA, it 1195 * will wake up the next cpu in the mca_cpu bitmask and then go 1196 * into the rendezvous loop. When all processors have serviced 1197 * their MCA, the last monarch frees up the rest of the processors. 1198 */ 1199 void 1200 ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, 1201 struct ia64_sal_os_state *sos) 1202 { 1203 int recover, cpu = smp_processor_id(); 1204 struct task_struct *previous_current; 1205 struct ia64_mca_notify_die nd = 1206 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1207 static atomic_t mca_count; 1208 static cpumask_t mca_cpu; 1209 1210 if (atomic_add_return(1, &mca_count) == 1) { 1211 monarch_cpu = cpu; 1212 sos->monarch = 1; 1213 } else { 1214 cpu_set(cpu, mca_cpu); 1215 sos->monarch = 0; 1216 } 1217 mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " 1218 "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); 1219 1220 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); 1221 1222 if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0) 1223 == NOTIFY_STOP) 1224 ia64_mca_spin(__FUNCTION__); 1225 1226 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; 1227 if (sos->monarch) { 1228 ia64_wait_for_slaves(cpu, "MCA"); 1229 1230 /* Wakeup all the processors which are spinning in the 1231 * rendezvous loop. They will leave SAL, then spin in the OS 1232 * with interrupts disabled until this monarch cpu leaves the 1233 * MCA handler. That gets control back to the OS so we can 1234 * backtrace the other cpus, backtrace when spinning in SAL 1235 * does not work. 1236 */ 1237 ia64_mca_wakeup_all(); 1238 if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, (long)&nd, 0, 0) 1239 == NOTIFY_STOP) 1240 ia64_mca_spin(__FUNCTION__); 1241 } else { 1242 while (cpu_isset(cpu, mca_cpu)) 1243 cpu_relax(); /* spin until monarch wakes us */ 1244 } 1245 1246 /* Get the MCA error record and log it */ 1247 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); 1248 1249 /* MCA error recovery */ 1250 recover = (ia64_mca_ucmc_extension 1251 && ia64_mca_ucmc_extension( 1252 IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), 1253 sos)); 1254 1255 if (recover) { 1256 sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); 1257 rh->severity = sal_log_severity_corrected; 1258 ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); 1259 sos->os_status = IA64_MCA_CORRECTED; 1260 } else { 1261 /* Dump buffered message to console */ 1262 ia64_mlogbuf_finish(1); 1263 #ifdef CONFIG_KEXEC 1264 atomic_set(&kdump_in_progress, 1); 1265 monarch_cpu = -1; 1266 #endif 1267 } 1268 if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) 1269 == NOTIFY_STOP) 1270 ia64_mca_spin(__FUNCTION__); 1271 1272 1273 if (atomic_dec_return(&mca_count) > 0) { 1274 int i; 1275 1276 /* wake up the next monarch cpu, 1277 * and put this cpu in the rendez loop. 1278 */ 1279 for_each_online_cpu(i) { 1280 if (cpu_isset(i, mca_cpu)) { 1281 monarch_cpu = i; 1282 cpu_clear(i, mca_cpu); /* wake next cpu */ 1283 while (monarch_cpu != -1) 1284 cpu_relax(); /* spin until last cpu leaves */ 1285 set_curr_task(cpu, previous_current); 1286 ia64_mc_info.imi_rendez_checkin[cpu] 1287 = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; 1288 return; 1289 } 1290 } 1291 } 1292 set_curr_task(cpu, previous_current); 1293 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; 1294 monarch_cpu = -1; /* This frees the slaves and previous monarchs */ 1295 } 1296 1297 static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd); 1298 static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd); 1299 1300 /* 1301 * ia64_mca_cmc_int_handler 1302 * 1303 * This is corrected machine check interrupt handler. 1304 * Right now the logs are extracted and displayed in a well-defined 1305 * format. 1306 * 1307 * Inputs 1308 * interrupt number 1309 * client data arg ptr 1310 * 1311 * Outputs 1312 * None 1313 */ 1314 static irqreturn_t 1315 ia64_mca_cmc_int_handler(int cmc_irq, void *arg) 1316 { 1317 static unsigned long cmc_history[CMC_HISTORY_LENGTH]; 1318 static int index; 1319 static DEFINE_SPINLOCK(cmc_history_lock); 1320 1321 IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", 1322 __FUNCTION__, cmc_irq, smp_processor_id()); 1323 1324 /* SAL spec states this should run w/ interrupts enabled */ 1325 local_irq_enable(); 1326 1327 spin_lock(&cmc_history_lock); 1328 if (!cmc_polling_enabled) { 1329 int i, count = 1; /* we know 1 happened now */ 1330 unsigned long now = jiffies; 1331 1332 for (i = 0; i < CMC_HISTORY_LENGTH; i++) { 1333 if (now - cmc_history[i] <= HZ) 1334 count++; 1335 } 1336 1337 IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH); 1338 if (count >= CMC_HISTORY_LENGTH) { 1339 1340 cmc_polling_enabled = 1; 1341 spin_unlock(&cmc_history_lock); 1342 /* If we're being hit with CMC interrupts, we won't 1343 * ever execute the schedule_work() below. Need to 1344 * disable CMC interrupts on this processor now. 1345 */ 1346 ia64_mca_cmc_vector_disable(NULL); 1347 schedule_work(&cmc_disable_work); 1348 1349 /* 1350 * Corrected errors will still be corrected, but 1351 * make sure there's a log somewhere that indicates 1352 * something is generating more than we can handle. 1353 */ 1354 printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n"); 1355 1356 mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); 1357 1358 /* lock already released, get out now */ 1359 goto out; 1360 } else { 1361 cmc_history[index++] = now; 1362 if (index == CMC_HISTORY_LENGTH) 1363 index = 0; 1364 } 1365 } 1366 spin_unlock(&cmc_history_lock); 1367 out: 1368 /* Get the CMC error record and log it */ 1369 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); 1370 1371 return IRQ_HANDLED; 1372 } 1373 1374 /* 1375 * ia64_mca_cmc_int_caller 1376 * 1377 * Triggered by sw interrupt from CMC polling routine. Calls 1378 * real interrupt handler and either triggers a sw interrupt 1379 * on the next cpu or does cleanup at the end. 1380 * 1381 * Inputs 1382 * interrupt number 1383 * client data arg ptr 1384 * Outputs 1385 * handled 1386 */ 1387 static irqreturn_t 1388 ia64_mca_cmc_int_caller(int cmc_irq, void *arg) 1389 { 1390 static int start_count = -1; 1391 unsigned int cpuid; 1392 1393 cpuid = smp_processor_id(); 1394 1395 /* If first cpu, update count */ 1396 if (start_count == -1) 1397 start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC); 1398 1399 ia64_mca_cmc_int_handler(cmc_irq, arg); 1400 1401 for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); 1402 1403 if (cpuid < NR_CPUS) { 1404 platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); 1405 } else { 1406 /* If no log record, switch out of polling mode */ 1407 if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) { 1408 1409 printk(KERN_WARNING "Returning to interrupt driven CMC handler\n"); 1410 schedule_work(&cmc_enable_work); 1411 cmc_polling_enabled = 0; 1412 1413 } else { 1414 1415 mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); 1416 } 1417 1418 start_count = -1; 1419 } 1420 1421 return IRQ_HANDLED; 1422 } 1423 1424 /* 1425 * ia64_mca_cmc_poll 1426 * 1427 * Poll for Corrected Machine Checks (CMCs) 1428 * 1429 * Inputs : dummy(unused) 1430 * Outputs : None 1431 * 1432 */ 1433 static void 1434 ia64_mca_cmc_poll (unsigned long dummy) 1435 { 1436 /* Trigger a CMC interrupt cascade */ 1437 platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); 1438 } 1439 1440 /* 1441 * ia64_mca_cpe_int_caller 1442 * 1443 * Triggered by sw interrupt from CPE polling routine. Calls 1444 * real interrupt handler and either triggers a sw interrupt 1445 * on the next cpu or does cleanup at the end. 1446 * 1447 * Inputs 1448 * interrupt number 1449 * client data arg ptr 1450 * Outputs 1451 * handled 1452 */ 1453 #ifdef CONFIG_ACPI 1454 1455 static irqreturn_t 1456 ia64_mca_cpe_int_caller(int cpe_irq, void *arg) 1457 { 1458 static int start_count = -1; 1459 static int poll_time = MIN_CPE_POLL_INTERVAL; 1460 unsigned int cpuid; 1461 1462 cpuid = smp_processor_id(); 1463 1464 /* If first cpu, update count */ 1465 if (start_count == -1) 1466 start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE); 1467 1468 ia64_mca_cpe_int_handler(cpe_irq, arg); 1469 1470 for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); 1471 1472 if (cpuid < NR_CPUS) { 1473 platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); 1474 } else { 1475 /* 1476 * If a log was recorded, increase our polling frequency, 1477 * otherwise, backoff or return to interrupt mode. 1478 */ 1479 if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) { 1480 poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2); 1481 } else if (cpe_vector < 0) { 1482 poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2); 1483 } else { 1484 poll_time = MIN_CPE_POLL_INTERVAL; 1485 1486 printk(KERN_WARNING "Returning to interrupt driven CPE handler\n"); 1487 enable_irq(local_vector_to_irq(IA64_CPE_VECTOR)); 1488 cpe_poll_enabled = 0; 1489 } 1490 1491 if (cpe_poll_enabled) 1492 mod_timer(&cpe_poll_timer, jiffies + poll_time); 1493 start_count = -1; 1494 } 1495 1496 return IRQ_HANDLED; 1497 } 1498 1499 /* 1500 * ia64_mca_cpe_poll 1501 * 1502 * Poll for Corrected Platform Errors (CPEs), trigger interrupt 1503 * on first cpu, from there it will trickle through all the cpus. 1504 * 1505 * Inputs : dummy(unused) 1506 * Outputs : None 1507 * 1508 */ 1509 static void 1510 ia64_mca_cpe_poll (unsigned long dummy) 1511 { 1512 /* Trigger a CPE interrupt cascade */ 1513 platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); 1514 } 1515 1516 #endif /* CONFIG_ACPI */ 1517 1518 static int 1519 default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data) 1520 { 1521 int c; 1522 struct task_struct *g, *t; 1523 if (val != DIE_INIT_MONARCH_PROCESS) 1524 return NOTIFY_DONE; 1525 #ifdef CONFIG_KEXEC 1526 if (atomic_read(&kdump_in_progress)) 1527 return NOTIFY_DONE; 1528 #endif 1529 1530 /* 1531 * FIXME: mlogbuf will brim over with INIT stack dumps. 1532 * To enable show_stack from INIT, we use oops_in_progress which should 1533 * be used in real oops. This would cause something wrong after INIT. 1534 */ 1535 BREAK_LOGLEVEL(console_loglevel); 1536 ia64_mlogbuf_dump_from_init(); 1537 1538 printk(KERN_ERR "Processes interrupted by INIT -"); 1539 for_each_online_cpu(c) { 1540 struct ia64_sal_os_state *s; 1541 t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); 1542 s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); 1543 g = s->prev_task; 1544 if (g) { 1545 if (g->pid) 1546 printk(" %d", g->pid); 1547 else 1548 printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); 1549 } 1550 } 1551 printk("\n\n"); 1552 if (read_trylock(&tasklist_lock)) { 1553 do_each_thread (g, t) { 1554 printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); 1555 show_stack(t, NULL); 1556 } while_each_thread (g, t); 1557 read_unlock(&tasklist_lock); 1558 } 1559 /* FIXME: This will not restore zapped printk locks. */ 1560 RESTORE_LOGLEVEL(console_loglevel); 1561 return NOTIFY_DONE; 1562 } 1563 1564 /* 1565 * C portion of the OS INIT handler 1566 * 1567 * Called from ia64_os_init_dispatch 1568 * 1569 * Inputs: pointer to pt_regs where processor info was saved. SAL/OS state for 1570 * this event. This code is used for both monarch and slave INIT events, see 1571 * sos->monarch. 1572 * 1573 * All INIT events switch to the INIT stack and change the previous process to 1574 * blocked status. If one of the INIT events is the monarch then we are 1575 * probably processing the nmi button/command. Use the monarch cpu to dump all 1576 * the processes. The slave INIT events all spin until the monarch cpu 1577 * returns. We can also get INIT slave events for MCA, in which case the MCA 1578 * process is the monarch. 1579 */ 1580 1581 void 1582 ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, 1583 struct ia64_sal_os_state *sos) 1584 { 1585 static atomic_t slaves; 1586 static atomic_t monarchs; 1587 struct task_struct *previous_current; 1588 int cpu = smp_processor_id(); 1589 struct ia64_mca_notify_die nd = 1590 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1591 1592 (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); 1593 1594 mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", 1595 sos->proc_state_param, cpu, sos->monarch); 1596 salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); 1597 1598 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT"); 1599 sos->os_status = IA64_INIT_RESUME; 1600 1601 /* FIXME: Workaround for broken proms that drive all INIT events as 1602 * slaves. The last slave that enters is promoted to be a monarch. 1603 * Remove this code in September 2006, that gives platforms a year to 1604 * fix their proms and get their customers updated. 1605 */ 1606 if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { 1607 mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", 1608 __FUNCTION__, cpu); 1609 atomic_dec(&slaves); 1610 sos->monarch = 1; 1611 } 1612 1613 /* FIXME: Workaround for broken proms that drive all INIT events as 1614 * monarchs. Second and subsequent monarchs are demoted to slaves. 1615 * Remove this code in September 2006, that gives platforms a year to 1616 * fix their proms and get their customers updated. 1617 */ 1618 if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { 1619 mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", 1620 __FUNCTION__, cpu); 1621 atomic_dec(&monarchs); 1622 sos->monarch = 0; 1623 } 1624 1625 if (!sos->monarch) { 1626 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; 1627 while (monarch_cpu == -1) 1628 cpu_relax(); /* spin until monarch enters */ 1629 if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, (long)&nd, 0, 0) 1630 == NOTIFY_STOP) 1631 ia64_mca_spin(__FUNCTION__); 1632 if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, (long)&nd, 0, 0) 1633 == NOTIFY_STOP) 1634 ia64_mca_spin(__FUNCTION__); 1635 while (monarch_cpu != -1) 1636 cpu_relax(); /* spin until monarch leaves */ 1637 if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) 1638 == NOTIFY_STOP) 1639 ia64_mca_spin(__FUNCTION__); 1640 mprintk("Slave on cpu %d returning to normal service.\n", cpu); 1641 set_curr_task(cpu, previous_current); 1642 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; 1643 atomic_dec(&slaves); 1644 return; 1645 } 1646 1647 monarch_cpu = cpu; 1648 if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, (long)&nd, 0, 0) 1649 == NOTIFY_STOP) 1650 ia64_mca_spin(__FUNCTION__); 1651 1652 /* 1653 * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be 1654 * generated via the BMC's command-line interface, but since the console is on the 1655 * same serial line, the user will need some time to switch out of the BMC before 1656 * the dump begins. 1657 */ 1658 mprintk("Delaying for 5 seconds...\n"); 1659 udelay(5*1000000); 1660 ia64_wait_for_slaves(cpu, "INIT"); 1661 /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through 1662 * to default_monarch_init_process() above and just print all the 1663 * tasks. 1664 */ 1665 if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, (long)&nd, 0, 0) 1666 == NOTIFY_STOP) 1667 ia64_mca_spin(__FUNCTION__); 1668 if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) 1669 == NOTIFY_STOP) 1670 ia64_mca_spin(__FUNCTION__); 1671 mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); 1672 atomic_dec(&monarchs); 1673 set_curr_task(cpu, previous_current); 1674 monarch_cpu = -1; 1675 return; 1676 } 1677 1678 static int __init 1679 ia64_mca_disable_cpe_polling(char *str) 1680 { 1681 cpe_poll_enabled = 0; 1682 return 1; 1683 } 1684 1685 __setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); 1686 1687 static struct irqaction cmci_irqaction = { 1688 .handler = ia64_mca_cmc_int_handler, 1689 .flags = IRQF_DISABLED, 1690 .name = "cmc_hndlr" 1691 }; 1692 1693 static struct irqaction cmcp_irqaction = { 1694 .handler = ia64_mca_cmc_int_caller, 1695 .flags = IRQF_DISABLED, 1696 .name = "cmc_poll" 1697 }; 1698 1699 static struct irqaction mca_rdzv_irqaction = { 1700 .handler = ia64_mca_rendez_int_handler, 1701 .flags = IRQF_DISABLED, 1702 .name = "mca_rdzv" 1703 }; 1704 1705 static struct irqaction mca_wkup_irqaction = { 1706 .handler = ia64_mca_wakeup_int_handler, 1707 .flags = IRQF_DISABLED, 1708 .name = "mca_wkup" 1709 }; 1710 1711 #ifdef CONFIG_ACPI 1712 static struct irqaction mca_cpe_irqaction = { 1713 .handler = ia64_mca_cpe_int_handler, 1714 .flags = IRQF_DISABLED, 1715 .name = "cpe_hndlr" 1716 }; 1717 1718 static struct irqaction mca_cpep_irqaction = { 1719 .handler = ia64_mca_cpe_int_caller, 1720 .flags = IRQF_DISABLED, 1721 .name = "cpe_poll" 1722 }; 1723 #endif /* CONFIG_ACPI */ 1724 1725 /* Minimal format of the MCA/INIT stacks. The pseudo processes that run on 1726 * these stacks can never sleep, they cannot return from the kernel to user 1727 * space, they do not appear in a normal ps listing. So there is no need to 1728 * format most of the fields. 1729 */ 1730 1731 static void __cpuinit 1732 format_mca_init_stack(void *mca_data, unsigned long offset, 1733 const char *type, int cpu) 1734 { 1735 struct task_struct *p = (struct task_struct *)((char *)mca_data + offset); 1736 struct thread_info *ti; 1737 memset(p, 0, KERNEL_STACK_SIZE); 1738 ti = task_thread_info(p); 1739 ti->flags = _TIF_MCA_INIT; 1740 ti->preempt_count = 1; 1741 ti->task = p; 1742 ti->cpu = cpu; 1743 p->stack = ti; 1744 p->state = TASK_UNINTERRUPTIBLE; 1745 cpu_set(cpu, p->cpus_allowed); 1746 INIT_LIST_HEAD(&p->tasks); 1747 p->parent = p->real_parent = p->group_leader = p; 1748 INIT_LIST_HEAD(&p->children); 1749 INIT_LIST_HEAD(&p->sibling); 1750 strncpy(p->comm, type, sizeof(p->comm)-1); 1751 } 1752 1753 /* Caller prevents this from being called after init */ 1754 static void * __init_refok mca_bootmem(void) 1755 { 1756 void *p; 1757 1758 p = alloc_bootmem(sizeof(struct ia64_mca_cpu) * NR_CPUS + 1759 KERNEL_STACK_SIZE); 1760 return (void *)ALIGN((unsigned long)p, KERNEL_STACK_SIZE); 1761 } 1762 1763 /* Do per-CPU MCA-related initialization. */ 1764 void __cpuinit 1765 ia64_mca_cpu_init(void *cpu_data) 1766 { 1767 void *pal_vaddr; 1768 static int first_time = 1; 1769 1770 if (first_time) { 1771 void *mca_data; 1772 int cpu; 1773 1774 first_time = 0; 1775 mca_data = mca_bootmem(); 1776 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1777 format_mca_init_stack(mca_data, 1778 offsetof(struct ia64_mca_cpu, mca_stack), 1779 "MCA", cpu); 1780 format_mca_init_stack(mca_data, 1781 offsetof(struct ia64_mca_cpu, init_stack), 1782 "INIT", cpu); 1783 __per_cpu_mca[cpu] = __pa(mca_data); 1784 mca_data += sizeof(struct ia64_mca_cpu); 1785 } 1786 } 1787 1788 /* 1789 * The MCA info structure was allocated earlier and its 1790 * physical address saved in __per_cpu_mca[cpu]. Copy that 1791 * address * to ia64_mca_data so we can access it as a per-CPU 1792 * variable. 1793 */ 1794 __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; 1795 1796 /* 1797 * Stash away a copy of the PTE needed to map the per-CPU page. 1798 * We may need it during MCA recovery. 1799 */ 1800 __get_cpu_var(ia64_mca_per_cpu_pte) = 1801 pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); 1802 1803 /* 1804 * Also, stash away a copy of the PAL address and the PTE 1805 * needed to map it. 1806 */ 1807 pal_vaddr = efi_get_pal_addr(); 1808 if (!pal_vaddr) 1809 return; 1810 __get_cpu_var(ia64_mca_pal_base) = 1811 GRANULEROUNDDOWN((unsigned long) pal_vaddr); 1812 __get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr), 1813 PAGE_KERNEL)); 1814 } 1815 1816 /* 1817 * ia64_mca_init 1818 * 1819 * Do all the system level mca specific initialization. 1820 * 1821 * 1. Register spinloop and wakeup request interrupt vectors 1822 * 1823 * 2. Register OS_MCA handler entry point 1824 * 1825 * 3. Register OS_INIT handler entry point 1826 * 1827 * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS. 1828 * 1829 * Note that this initialization is done very early before some kernel 1830 * services are available. 1831 * 1832 * Inputs : None 1833 * 1834 * Outputs : None 1835 */ 1836 void __init 1837 ia64_mca_init(void) 1838 { 1839 ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch; 1840 ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave; 1841 ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; 1842 int i; 1843 s64 rc; 1844 struct ia64_sal_retval isrv; 1845 u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ 1846 static struct notifier_block default_init_monarch_nb = { 1847 .notifier_call = default_monarch_init_process, 1848 .priority = 0/* we need to notified last */ 1849 }; 1850 1851 IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); 1852 1853 /* Clear the Rendez checkin flag for all cpus */ 1854 for(i = 0 ; i < NR_CPUS; i++) 1855 ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; 1856 1857 /* 1858 * Register the rendezvous spinloop and wakeup mechanism with SAL 1859 */ 1860 1861 /* Register the rendezvous interrupt vector with SAL */ 1862 while (1) { 1863 isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, 1864 SAL_MC_PARAM_MECHANISM_INT, 1865 IA64_MCA_RENDEZ_VECTOR, 1866 timeout, 1867 SAL_MC_PARAM_RZ_ALWAYS); 1868 rc = isrv.status; 1869 if (rc == 0) 1870 break; 1871 if (rc == -2) { 1872 printk(KERN_INFO "Increasing MCA rendezvous timeout from " 1873 "%ld to %ld milliseconds\n", timeout, isrv.v0); 1874 timeout = isrv.v0; 1875 (void) notify_die(DIE_MCA_NEW_TIMEOUT, "MCA", NULL, timeout, 0, 0); 1876 continue; 1877 } 1878 printk(KERN_ERR "Failed to register rendezvous interrupt " 1879 "with SAL (status %ld)\n", rc); 1880 return; 1881 } 1882 1883 /* Register the wakeup interrupt vector with SAL */ 1884 isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, 1885 SAL_MC_PARAM_MECHANISM_INT, 1886 IA64_MCA_WAKEUP_VECTOR, 1887 0, 0); 1888 rc = isrv.status; 1889 if (rc) { 1890 printk(KERN_ERR "Failed to register wakeup interrupt with SAL " 1891 "(status %ld)\n", rc); 1892 return; 1893 } 1894 1895 IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __FUNCTION__); 1896 1897 ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp); 1898 /* 1899 * XXX - disable SAL checksum by setting size to 0; should be 1900 * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch); 1901 */ 1902 ia64_mc_info.imi_mca_handler_size = 0; 1903 1904 /* Register the os mca handler with SAL */ 1905 if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 1906 ia64_mc_info.imi_mca_handler, 1907 ia64_tpa(mca_hldlr_ptr->gp), 1908 ia64_mc_info.imi_mca_handler_size, 1909 0, 0, 0))) 1910 { 1911 printk(KERN_ERR "Failed to register OS MCA handler with SAL " 1912 "(status %ld)\n", rc); 1913 return; 1914 } 1915 1916 IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __FUNCTION__, 1917 ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp)); 1918 1919 /* 1920 * XXX - disable SAL checksum by setting size to 0, should be 1921 * size of the actual init handler in mca_asm.S. 1922 */ 1923 ia64_mc_info.imi_monarch_init_handler = ia64_tpa(init_hldlr_ptr_monarch->fp); 1924 ia64_mc_info.imi_monarch_init_handler_size = 0; 1925 ia64_mc_info.imi_slave_init_handler = ia64_tpa(init_hldlr_ptr_slave->fp); 1926 ia64_mc_info.imi_slave_init_handler_size = 0; 1927 1928 IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__, 1929 ia64_mc_info.imi_monarch_init_handler); 1930 1931 /* Register the os init handler with SAL */ 1932 if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 1933 ia64_mc_info.imi_monarch_init_handler, 1934 ia64_tpa(ia64_getreg(_IA64_REG_GP)), 1935 ia64_mc_info.imi_monarch_init_handler_size, 1936 ia64_mc_info.imi_slave_init_handler, 1937 ia64_tpa(ia64_getreg(_IA64_REG_GP)), 1938 ia64_mc_info.imi_slave_init_handler_size))) 1939 { 1940 printk(KERN_ERR "Failed to register m/s INIT handlers with SAL " 1941 "(status %ld)\n", rc); 1942 return; 1943 } 1944 if (register_die_notifier(&default_init_monarch_nb)) { 1945 printk(KERN_ERR "Failed to register default monarch INIT process\n"); 1946 return; 1947 } 1948 1949 IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); 1950 1951 /* 1952 * Configure the CMCI/P vector and handler. Interrupts for CMC are 1953 * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). 1954 */ 1955 register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction); 1956 register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction); 1957 ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */ 1958 1959 /* Setup the MCA rendezvous interrupt vector */ 1960 register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction); 1961 1962 /* Setup the MCA wakeup interrupt vector */ 1963 register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); 1964 1965 #ifdef CONFIG_ACPI 1966 /* Setup the CPEI/P handler */ 1967 register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); 1968 #endif 1969 1970 /* Initialize the areas set aside by the OS to buffer the 1971 * platform/processor error states for MCA/INIT/CMC 1972 * handling. 1973 */ 1974 ia64_log_init(SAL_INFO_TYPE_MCA); 1975 ia64_log_init(SAL_INFO_TYPE_INIT); 1976 ia64_log_init(SAL_INFO_TYPE_CMC); 1977 ia64_log_init(SAL_INFO_TYPE_CPE); 1978 1979 mca_init = 1; 1980 printk(KERN_INFO "MCA related initialization done\n"); 1981 } 1982 1983 /* 1984 * ia64_mca_late_init 1985 * 1986 * Opportunity to setup things that require initialization later 1987 * than ia64_mca_init. Setup a timer to poll for CPEs if the 1988 * platform doesn't support an interrupt driven mechanism. 1989 * 1990 * Inputs : None 1991 * Outputs : Status 1992 */ 1993 static int __init 1994 ia64_mca_late_init(void) 1995 { 1996 if (!mca_init) 1997 return 0; 1998 1999 /* Setup the CMCI/P vector and handler */ 2000 init_timer(&cmc_poll_timer); 2001 cmc_poll_timer.function = ia64_mca_cmc_poll; 2002 2003 /* Unmask/enable the vector */ 2004 cmc_polling_enabled = 0; 2005 schedule_work(&cmc_enable_work); 2006 2007 IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __FUNCTION__); 2008 2009 #ifdef CONFIG_ACPI 2010 /* Setup the CPEI/P vector and handler */ 2011 cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); 2012 init_timer(&cpe_poll_timer); 2013 cpe_poll_timer.function = ia64_mca_cpe_poll; 2014 2015 { 2016 irq_desc_t *desc; 2017 unsigned int irq; 2018 2019 if (cpe_vector >= 0) { 2020 /* If platform supports CPEI, enable the irq. */ 2021 irq = local_vector_to_irq(cpe_vector); 2022 if (irq > 0) { 2023 cpe_poll_enabled = 0; 2024 desc = irq_desc + irq; 2025 desc->status |= IRQ_PER_CPU; 2026 setup_irq(irq, &mca_cpe_irqaction); 2027 ia64_cpe_irq = irq; 2028 ia64_mca_register_cpev(cpe_vector); 2029 IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", 2030 __FUNCTION__); 2031 return 0; 2032 } 2033 printk(KERN_ERR "%s: Failed to find irq for CPE " 2034 "interrupt handler, vector %d\n", 2035 __FUNCTION__, cpe_vector); 2036 } 2037 /* If platform doesn't support CPEI, get the timer going. */ 2038 if (cpe_poll_enabled) { 2039 ia64_mca_cpe_poll(0UL); 2040 IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__); 2041 } 2042 } 2043 #endif 2044 2045 return 0; 2046 } 2047 2048 device_initcall(ia64_mca_late_init); 2049