xref: /openbmc/linux/arch/ia64/kernel/mca.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * File:	mca.c
4   * Purpose:	Generic MCA handling layer
5   *
6   * Copyright (C) 2003 Hewlett-Packard Co
7   *	David Mosberger-Tang <davidm@hpl.hp.com>
8   *
9   * Copyright (C) 2002 Dell Inc.
10   * Copyright (C) Matt Domsch <Matt_Domsch@dell.com>
11   *
12   * Copyright (C) 2002 Intel
13   * Copyright (C) Jenna Hall <jenna.s.hall@intel.com>
14   *
15   * Copyright (C) 2001 Intel
16   * Copyright (C) Fred Lewis <frederick.v.lewis@intel.com>
17   *
18   * Copyright (C) 2000 Intel
19   * Copyright (C) Chuck Fleckenstein <cfleck@co.intel.com>
20   *
21   * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc.
22   * Copyright (C) Vijay Chander <vijay@engr.sgi.com>
23   *
24   * Copyright (C) 2006 FUJITSU LIMITED
25   * Copyright (C) Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
26   *
27   * 2000-03-29 Chuck Fleckenstein <cfleck@co.intel.com>
28   *	      Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
29   *	      added min save state dump, added INIT handler.
30   *
31   * 2001-01-03 Fred Lewis <frederick.v.lewis@intel.com>
32   *	      Added setup of CMCI and CPEI IRQs, logging of corrected platform
33   *	      errors, completed code for logging of corrected & uncorrected
34   *	      machine check errors, and updated for conformance with Nov. 2000
35   *	      revision of the SAL 3.0 spec.
36   *
37   * 2002-01-04 Jenna Hall <jenna.s.hall@intel.com>
38   *	      Aligned MCA stack to 16 bytes, added platform vs. CPU error flag,
39   *	      set SAL default return values, changed error record structure to
40   *	      linked list, added init call to sal_get_state_info_size().
41   *
42   * 2002-03-25 Matt Domsch <Matt_Domsch@dell.com>
43   *	      GUID cleanups.
44   *
45   * 2003-04-15 David Mosberger-Tang <davidm@hpl.hp.com>
46   *	      Added INIT backtrace support.
47   *
48   * 2003-12-08 Keith Owens <kaos@sgi.com>
49   *	      smp_call_function() must not be called from interrupt context
50   *	      (can deadlock on tasklist_lock).
51   *	      Use keventd to call smp_call_function().
52   *
53   * 2004-02-01 Keith Owens <kaos@sgi.com>
54   *	      Avoid deadlock when using printk() for MCA and INIT records.
55   *	      Delete all record printing code, moved to salinfo_decode in user
56   *	      space.  Mark variables and functions static where possible.
57   *	      Delete dead variables and functions.  Reorder to remove the need
58   *	      for forward declarations and to consolidate related code.
59   *
60   * 2005-08-12 Keith Owens <kaos@sgi.com>
61   *	      Convert MCA/INIT handlers to use per event stacks and SAL/OS
62   *	      state.
63   *
64   * 2005-10-07 Keith Owens <kaos@sgi.com>
65   *	      Add notify_die() hooks.
66   *
67   * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
68   *	      Add printing support for MCA/INIT.
69   *
70   * 2007-04-27 Russ Anderson <rja@sgi.com>
71   *	      Support multiple cpus going through OS_MCA in the same event.
72   */
73  #include <linux/jiffies.h>
74  #include <linux/types.h>
75  #include <linux/init.h>
76  #include <linux/sched/signal.h>
77  #include <linux/sched/debug.h>
78  #include <linux/sched/task.h>
79  #include <linux/interrupt.h>
80  #include <linux/irq.h>
81  #include <linux/memblock.h>
82  #include <linux/acpi.h>
83  #include <linux/timer.h>
84  #include <linux/module.h>
85  #include <linux/kernel.h>
86  #include <linux/smp.h>
87  #include <linux/workqueue.h>
88  #include <linux/cpumask.h>
89  #include <linux/kdebug.h>
90  #include <linux/cpu.h>
91  #include <linux/gfp.h>
92  
93  #include <asm/delay.h>
94  #include <asm/efi.h>
95  #include <asm/meminit.h>
96  #include <asm/page.h>
97  #include <asm/ptrace.h>
98  #include <asm/sal.h>
99  #include <asm/mca.h>
100  #include <asm/mca_asm.h>
101  #include <asm/kexec.h>
102  
103  #include <asm/irq.h>
104  #include <asm/hw_irq.h>
105  #include <asm/tlb.h>
106  
107  #include "mca_drv.h"
108  #include "entry.h"
109  #include "irq.h"
110  
111  #if defined(IA64_MCA_DEBUG_INFO)
112  # define IA64_MCA_DEBUG(fmt...) printk(fmt)
113  #else
114  # define IA64_MCA_DEBUG(fmt...) do {} while (0)
115  #endif
116  
117  #define NOTIFY_INIT(event, regs, arg, spin)				\
118  do {									\
119  	if ((notify_die((event), "INIT", (regs), (arg), 0, 0)		\
120  			== NOTIFY_STOP) && ((spin) == 1))		\
121  		ia64_mca_spin(__func__);				\
122  } while (0)
123  
124  #define NOTIFY_MCA(event, regs, arg, spin)				\
125  do {									\
126  	if ((notify_die((event), "MCA", (regs), (arg), 0, 0)		\
127  			== NOTIFY_STOP) && ((spin) == 1))		\
128  		ia64_mca_spin(__func__);				\
129  } while (0)
130  
131  /* Used by mca_asm.S */
132  DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
133  DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
134  DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
135  DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
136  DEFINE_PER_CPU(u64, ia64_mca_tr_reload);   /* Flag for TR reload */
137  
138  unsigned long __per_cpu_mca[NR_CPUS];
139  
140  /* In mca_asm.S */
141  extern void			ia64_os_init_dispatch_monarch (void);
142  extern void			ia64_os_init_dispatch_slave (void);
143  
144  static int monarch_cpu = -1;
145  
146  static ia64_mc_info_t		ia64_mc_info;
147  
148  #define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */
149  #define MIN_CPE_POLL_INTERVAL (2*60*HZ)  /* 2 minutes */
150  #define CMC_POLL_INTERVAL     (1*60*HZ)  /* 1 minute */
151  #define CPE_HISTORY_LENGTH    5
152  #define CMC_HISTORY_LENGTH    5
153  
154  static struct timer_list cpe_poll_timer;
155  static struct timer_list cmc_poll_timer;
156  /*
157   * This variable tells whether we are currently in polling mode.
158   * Start with this in the wrong state so we won't play w/ timers
159   * before the system is ready.
160   */
161  static int cmc_polling_enabled = 1;
162  
163  /*
164   * Clearing this variable prevents CPE polling from getting activated
165   * in mca_late_init.  Use it if your system doesn't provide a CPEI,
166   * but encounters problems retrieving CPE logs.  This should only be
167   * necessary for debugging.
168   */
169  static int cpe_poll_enabled = 1;
170  
171  extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
172  
173  static int mca_init __initdata;
174  
175  /*
176   * limited & delayed printing support for MCA/INIT handler
177   */
178  
179  #define mprintk(fmt...) ia64_mca_printk(fmt)
180  
181  #define MLOGBUF_SIZE (512+256*NR_CPUS)
182  #define MLOGBUF_MSGMAX 256
183  static char mlogbuf[MLOGBUF_SIZE];
184  static DEFINE_SPINLOCK(mlogbuf_wlock);	/* mca context only */
185  static DEFINE_SPINLOCK(mlogbuf_rlock);	/* normal context only */
186  static unsigned long mlogbuf_start;
187  static unsigned long mlogbuf_end;
188  static unsigned int mlogbuf_finished = 0;
189  static unsigned long mlogbuf_timestamp = 0;
190  
191  static int loglevel_save = -1;
192  #define BREAK_LOGLEVEL(__console_loglevel)		\
193  	oops_in_progress = 1;				\
194  	if (loglevel_save < 0)				\
195  		loglevel_save = __console_loglevel;	\
196  	__console_loglevel = 15;
197  
198  #define RESTORE_LOGLEVEL(__console_loglevel)		\
199  	if (loglevel_save >= 0) {			\
200  		__console_loglevel = loglevel_save;	\
201  		loglevel_save = -1;			\
202  	}						\
203  	mlogbuf_finished = 0;				\
204  	oops_in_progress = 0;
205  
206  /*
207   * Push messages into buffer, print them later if not urgent.
208   */
ia64_mca_printk(const char * fmt,...)209  void ia64_mca_printk(const char *fmt, ...)
210  {
211  	va_list args;
212  	int printed_len;
213  	char temp_buf[MLOGBUF_MSGMAX];
214  	char *p;
215  
216  	va_start(args, fmt);
217  	printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args);
218  	va_end(args);
219  
220  	/* Copy the output into mlogbuf */
221  	if (oops_in_progress) {
222  		/* mlogbuf was abandoned, use printk directly instead. */
223  		printk("%s", temp_buf);
224  	} else {
225  		spin_lock(&mlogbuf_wlock);
226  		for (p = temp_buf; *p; p++) {
227  			unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE;
228  			if (next != mlogbuf_start) {
229  				mlogbuf[mlogbuf_end] = *p;
230  				mlogbuf_end = next;
231  			} else {
232  				/* buffer full */
233  				break;
234  			}
235  		}
236  		mlogbuf[mlogbuf_end] = '\0';
237  		spin_unlock(&mlogbuf_wlock);
238  	}
239  }
240  EXPORT_SYMBOL(ia64_mca_printk);
241  
242  /*
243   * Print buffered messages.
244   *  NOTE: call this after returning normal context. (ex. from salinfod)
245   */
ia64_mlogbuf_dump(void)246  void ia64_mlogbuf_dump(void)
247  {
248  	char temp_buf[MLOGBUF_MSGMAX];
249  	char *p;
250  	unsigned long index;
251  	unsigned long flags;
252  	unsigned int printed_len;
253  
254  	/* Get output from mlogbuf */
255  	while (mlogbuf_start != mlogbuf_end) {
256  		temp_buf[0] = '\0';
257  		p = temp_buf;
258  		printed_len = 0;
259  
260  		spin_lock_irqsave(&mlogbuf_rlock, flags);
261  
262  		index = mlogbuf_start;
263  		while (index != mlogbuf_end) {
264  			*p = mlogbuf[index];
265  			index = (index + 1) % MLOGBUF_SIZE;
266  			if (!*p)
267  				break;
268  			p++;
269  			if (++printed_len >= MLOGBUF_MSGMAX - 1)
270  				break;
271  		}
272  		*p = '\0';
273  		if (temp_buf[0])
274  			printk("%s", temp_buf);
275  		mlogbuf_start = index;
276  
277  		mlogbuf_timestamp = 0;
278  		spin_unlock_irqrestore(&mlogbuf_rlock, flags);
279  	}
280  }
281  EXPORT_SYMBOL(ia64_mlogbuf_dump);
282  
283  /*
284   * Call this if system is going to down or if immediate flushing messages to
285   * console is required. (ex. recovery was failed, crash dump is going to be
286   * invoked, long-wait rendezvous etc.)
287   *  NOTE: this should be called from monarch.
288   */
ia64_mlogbuf_finish(int wait)289  static void ia64_mlogbuf_finish(int wait)
290  {
291  	BREAK_LOGLEVEL(console_loglevel);
292  
293  	ia64_mlogbuf_dump();
294  	printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, "
295  		"MCA/INIT might be dodgy or fail.\n");
296  
297  	if (!wait)
298  		return;
299  
300  	/* wait for console */
301  	printk("Delaying for 5 seconds...\n");
302  	udelay(5*1000000);
303  
304  	mlogbuf_finished = 1;
305  }
306  
307  /*
308   * Print buffered messages from INIT context.
309   */
ia64_mlogbuf_dump_from_init(void)310  static void ia64_mlogbuf_dump_from_init(void)
311  {
312  	if (mlogbuf_finished)
313  		return;
314  
315  	if (mlogbuf_timestamp &&
316  			time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
317  		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
318  			" and the system seems to be messed up.\n");
319  		ia64_mlogbuf_finish(0);
320  		return;
321  	}
322  
323  	if (!spin_trylock(&mlogbuf_rlock)) {
324  		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. "
325  			"Generated messages other than stack dump will be "
326  			"buffered to mlogbuf and will be printed later.\n");
327  		printk(KERN_ERR "INIT: If messages would not printed after "
328  			"this INIT, wait 30sec and assert INIT again.\n");
329  		if (!mlogbuf_timestamp)
330  			mlogbuf_timestamp = jiffies;
331  		return;
332  	}
333  	spin_unlock(&mlogbuf_rlock);
334  	ia64_mlogbuf_dump();
335  }
336  
337  static inline void
ia64_mca_spin(const char * func)338  ia64_mca_spin(const char *func)
339  {
340  	if (monarch_cpu == smp_processor_id())
341  		ia64_mlogbuf_finish(0);
342  	mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
343  	while (1)
344  		cpu_relax();
345  }
346  /*
347   * IA64_MCA log support
348   */
349  #define IA64_MAX_LOGS		2	/* Double-buffering for nested MCAs */
350  #define IA64_MAX_LOG_TYPES      4   /* MCA, INIT, CMC, CPE */
351  
352  typedef struct ia64_state_log_s
353  {
354  	spinlock_t	isl_lock;
355  	int		isl_index;
356  	unsigned long	isl_count;
357  	ia64_err_rec_t  *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
358  } ia64_state_log_t;
359  
360  static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
361  
362  #define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
363  #define IA64_LOG_LOCK(it)      spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
364  #define IA64_LOG_UNLOCK(it)    spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
365  #define IA64_LOG_NEXT_INDEX(it)    ia64_state_log[it].isl_index
366  #define IA64_LOG_CURR_INDEX(it)    1 - ia64_state_log[it].isl_index
367  #define IA64_LOG_INDEX_INC(it) \
368      {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \
369      ia64_state_log[it].isl_count++;}
370  #define IA64_LOG_INDEX_DEC(it) \
371      ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
372  #define IA64_LOG_NEXT_BUFFER(it)   (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
373  #define IA64_LOG_CURR_BUFFER(it)   (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
374  #define IA64_LOG_COUNT(it)         ia64_state_log[it].isl_count
375  
ia64_log_allocate(int it,u64 size)376  static inline void ia64_log_allocate(int it, u64 size)
377  {
378  	ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] =
379  		(ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);
380  	if (!ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])
381  		panic("%s: Failed to allocate %llu bytes\n", __func__, size);
382  
383  	ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] =
384  		(ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);
385  	if (!ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])
386  		panic("%s: Failed to allocate %llu bytes\n", __func__, size);
387  }
388  
389  /*
390   * ia64_log_init
391   *	Reset the OS ia64 log buffer
392   * Inputs   :   info_type   (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
393   * Outputs	:	None
394   */
395  static void __init
ia64_log_init(int sal_info_type)396  ia64_log_init(int sal_info_type)
397  {
398  	u64	max_size = 0;
399  
400  	IA64_LOG_NEXT_INDEX(sal_info_type) = 0;
401  	IA64_LOG_LOCK_INIT(sal_info_type);
402  
403  	// SAL will tell us the maximum size of any error record of this type
404  	max_size = ia64_sal_get_state_info_size(sal_info_type);
405  	if (!max_size)
406  		/* alloc_bootmem() doesn't like zero-sized allocations! */
407  		return;
408  
409  	// set up OS data structures to hold error info
410  	ia64_log_allocate(sal_info_type, max_size);
411  }
412  
413  /*
414   * ia64_log_get
415   *
416   *	Get the current MCA log from SAL and copy it into the OS log buffer.
417   *
418   *  Inputs  :   info_type   (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
419   *              irq_safe    whether you can use printk at this point
420   *  Outputs :   size        (total record length)
421   *              *buffer     (ptr to error record)
422   *
423   */
424  static u64
ia64_log_get(int sal_info_type,u8 ** buffer,int irq_safe)425  ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
426  {
427  	sal_log_record_header_t     *log_buffer;
428  	u64                         total_len = 0;
429  	unsigned long               s;
430  
431  	IA64_LOG_LOCK(sal_info_type);
432  
433  	/* Get the process state information */
434  	log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type);
435  
436  	total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer);
437  
438  	if (total_len) {
439  		IA64_LOG_INDEX_INC(sal_info_type);
440  		IA64_LOG_UNLOCK(sal_info_type);
441  		if (irq_safe) {
442  			IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. Record length = %ld\n",
443  				       __func__, sal_info_type, total_len);
444  		}
445  		*buffer = (u8 *) log_buffer;
446  		return total_len;
447  	} else {
448  		IA64_LOG_UNLOCK(sal_info_type);
449  		return 0;
450  	}
451  }
452  
453  /*
454   *  ia64_mca_log_sal_error_record
455   *
456   *  This function retrieves a specified error record type from SAL
457   *  and wakes up any processes waiting for error records.
458   *
459   *  Inputs  :   sal_info_type   (Type of error record MCA/CMC/CPE)
460   *              FIXME: remove MCA and irq_safe.
461   */
462  static void
ia64_mca_log_sal_error_record(int sal_info_type)463  ia64_mca_log_sal_error_record(int sal_info_type)
464  {
465  	u8 *buffer;
466  	sal_log_record_header_t *rh;
467  	u64 size;
468  	int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA;
469  #ifdef IA64_MCA_DEBUG_INFO
470  	static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" };
471  #endif
472  
473  	size = ia64_log_get(sal_info_type, &buffer, irq_safe);
474  	if (!size)
475  		return;
476  
477  	salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe);
478  
479  	if (irq_safe)
480  		IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n",
481  			smp_processor_id(),
482  			sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN");
483  
484  	/* Clear logs from corrected errors in case there's no user-level logger */
485  	rh = (sal_log_record_header_t *)buffer;
486  	if (rh->severity == sal_log_severity_corrected)
487  		ia64_sal_clear_state_info(sal_info_type);
488  }
489  
490  /*
491   * search_mca_table
492   *  See if the MCA surfaced in an instruction range
493   *  that has been tagged as recoverable.
494   *
495   *  Inputs
496   *	first	First address range to check
497   *	last	Last address range to check
498   *	ip	Instruction pointer, address we are looking for
499   *
500   * Return value:
501   *      1 on Success (in the table)/ 0 on Failure (not in the  table)
502   */
503  int
search_mca_table(const struct mca_table_entry * first,const struct mca_table_entry * last,unsigned long ip)504  search_mca_table (const struct mca_table_entry *first,
505                  const struct mca_table_entry *last,
506                  unsigned long ip)
507  {
508          const struct mca_table_entry *curr;
509          u64 curr_start, curr_end;
510  
511          curr = first;
512          while (curr <= last) {
513                  curr_start = (u64) &curr->start_addr + curr->start_addr;
514                  curr_end = (u64) &curr->end_addr + curr->end_addr;
515  
516                  if ((ip >= curr_start) && (ip <= curr_end)) {
517                          return 1;
518                  }
519                  curr++;
520          }
521          return 0;
522  }
523  
524  /* Given an address, look for it in the mca tables. */
mca_recover_range(unsigned long addr)525  int mca_recover_range(unsigned long addr)
526  {
527  	extern struct mca_table_entry __start___mca_table[];
528  	extern struct mca_table_entry __stop___mca_table[];
529  
530  	return search_mca_table(__start___mca_table, __stop___mca_table-1, addr);
531  }
532  EXPORT_SYMBOL_GPL(mca_recover_range);
533  
534  int cpe_vector = -1;
535  int ia64_cpe_irq = -1;
536  
537  static irqreturn_t
ia64_mca_cpe_int_handler(int cpe_irq,void * arg)538  ia64_mca_cpe_int_handler (int cpe_irq, void *arg)
539  {
540  	static unsigned long	cpe_history[CPE_HISTORY_LENGTH];
541  	static int		index;
542  	static DEFINE_SPINLOCK(cpe_history_lock);
543  
544  	IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
545  		       __func__, cpe_irq, smp_processor_id());
546  
547  	/* SAL spec states this should run w/ interrupts enabled */
548  	local_irq_enable();
549  
550  	spin_lock(&cpe_history_lock);
551  	if (!cpe_poll_enabled && cpe_vector >= 0) {
552  
553  		int i, count = 1; /* we know 1 happened now */
554  		unsigned long now = jiffies;
555  
556  		for (i = 0; i < CPE_HISTORY_LENGTH; i++) {
557  			if (now - cpe_history[i] <= HZ)
558  				count++;
559  		}
560  
561  		IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH);
562  		if (count >= CPE_HISTORY_LENGTH) {
563  
564  			cpe_poll_enabled = 1;
565  			spin_unlock(&cpe_history_lock);
566  			disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR));
567  
568  			/*
569  			 * Corrected errors will still be corrected, but
570  			 * make sure there's a log somewhere that indicates
571  			 * something is generating more than we can handle.
572  			 */
573  			printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n");
574  
575  			mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL);
576  
577  			/* lock already released, get out now */
578  			goto out;
579  		} else {
580  			cpe_history[index++] = now;
581  			if (index == CPE_HISTORY_LENGTH)
582  				index = 0;
583  		}
584  	}
585  	spin_unlock(&cpe_history_lock);
586  out:
587  	/* Get the CPE error record and log it */
588  	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
589  
590  	local_irq_disable();
591  
592  	return IRQ_HANDLED;
593  }
594  
595  /*
596   * ia64_mca_register_cpev
597   *
598   *  Register the corrected platform error vector with SAL.
599   *
600   *  Inputs
601   *      cpev        Corrected Platform Error Vector number
602   *
603   *  Outputs
604   *      None
605   */
606  void
ia64_mca_register_cpev(int cpev)607  ia64_mca_register_cpev (int cpev)
608  {
609  	/* Register the CPE interrupt vector with SAL */
610  	struct ia64_sal_retval isrv;
611  
612  	isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0);
613  	if (isrv.status) {
614  		printk(KERN_ERR "Failed to register Corrected Platform "
615  		       "Error interrupt vector with SAL (status %ld)\n", isrv.status);
616  		return;
617  	}
618  
619  	IA64_MCA_DEBUG("%s: corrected platform error "
620  		       "vector %#x registered\n", __func__, cpev);
621  }
622  
623  /*
624   * ia64_mca_cmc_vector_setup
625   *
626   *  Setup the corrected machine check vector register in the processor.
627   *  (The interrupt is masked on boot. ia64_mca_late_init unmask this.)
628   *  This function is invoked on a per-processor basis.
629   *
630   * Inputs
631   *      None
632   *
633   * Outputs
634   *	None
635   */
636  void
ia64_mca_cmc_vector_setup(void)637  ia64_mca_cmc_vector_setup (void)
638  {
639  	cmcv_reg_t	cmcv;
640  
641  	cmcv.cmcv_regval	= 0;
642  	cmcv.cmcv_mask		= 1;        /* Mask/disable interrupt at first */
643  	cmcv.cmcv_vector	= IA64_CMC_VECTOR;
644  	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
645  
646  	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x registered.\n",
647  		       __func__, smp_processor_id(), IA64_CMC_VECTOR);
648  
649  	IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n",
650  		       __func__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));
651  }
652  
653  /*
654   * ia64_mca_cmc_vector_disable
655   *
656   *  Mask the corrected machine check vector register in the processor.
657   *  This function is invoked on a per-processor basis.
658   *
659   * Inputs
660   *      dummy(unused)
661   *
662   * Outputs
663   *	None
664   */
665  static void
ia64_mca_cmc_vector_disable(void * dummy)666  ia64_mca_cmc_vector_disable (void *dummy)
667  {
668  	cmcv_reg_t	cmcv;
669  
670  	cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
671  
672  	cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
673  	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
674  
675  	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x disabled.\n",
676  		       __func__, smp_processor_id(), cmcv.cmcv_vector);
677  }
678  
679  /*
680   * ia64_mca_cmc_vector_enable
681   *
682   *  Unmask the corrected machine check vector register in the processor.
683   *  This function is invoked on a per-processor basis.
684   *
685   * Inputs
686   *      dummy(unused)
687   *
688   * Outputs
689   *	None
690   */
691  static void
ia64_mca_cmc_vector_enable(void * dummy)692  ia64_mca_cmc_vector_enable (void *dummy)
693  {
694  	cmcv_reg_t	cmcv;
695  
696  	cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
697  
698  	cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
699  	ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
700  
701  	IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x enabled.\n",
702  		       __func__, smp_processor_id(), cmcv.cmcv_vector);
703  }
704  
705  /*
706   * ia64_mca_cmc_vector_disable_keventd
707   *
708   * Called via keventd (smp_call_function() is not safe in interrupt context) to
709   * disable the cmc interrupt vector.
710   */
711  static void
ia64_mca_cmc_vector_disable_keventd(struct work_struct * unused)712  ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused)
713  {
714  	on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0);
715  }
716  
717  /*
718   * ia64_mca_cmc_vector_enable_keventd
719   *
720   * Called via keventd (smp_call_function() is not safe in interrupt context) to
721   * enable the cmc interrupt vector.
722   */
723  static void
ia64_mca_cmc_vector_enable_keventd(struct work_struct * unused)724  ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused)
725  {
726  	on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0);
727  }
728  
729  /*
730   * ia64_mca_wakeup
731   *
732   *	Send an inter-cpu interrupt to wake-up a particular cpu.
733   *
734   *  Inputs  :   cpuid
735   *  Outputs :   None
736   */
737  static void
ia64_mca_wakeup(int cpu)738  ia64_mca_wakeup(int cpu)
739  {
740  	ia64_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0);
741  }
742  
743  /*
744   * ia64_mca_wakeup_all
745   *
746   *	Wakeup all the slave cpus which have rendez'ed previously.
747   *
748   *  Inputs  :   None
749   *  Outputs :   None
750   */
751  static void
ia64_mca_wakeup_all(void)752  ia64_mca_wakeup_all(void)
753  {
754  	int cpu;
755  
756  	/* Clear the Rendez checkin flag for all cpus */
757  	for_each_online_cpu(cpu) {
758  		if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE)
759  			ia64_mca_wakeup(cpu);
760  	}
761  
762  }
763  
764  /*
765   * ia64_mca_rendez_interrupt_handler
766   *
767   *	This is handler used to put slave processors into spinloop
768   *	while the monarch processor does the mca handling and later
769   *	wake each slave up once the monarch is done.  The state
770   *	IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez'ed
771   *	in SAL.  The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates
772   *	the cpu has come out of OS rendezvous.
773   *
774   *  Inputs  :   None
775   *  Outputs :   None
776   */
777  static irqreturn_t
ia64_mca_rendez_int_handler(int rendez_irq,void * arg)778  ia64_mca_rendez_int_handler(int rendez_irq, void *arg)
779  {
780  	unsigned long flags;
781  	int cpu = smp_processor_id();
782  	struct ia64_mca_notify_die nd =
783  		{ .sos = NULL, .monarch_cpu = &monarch_cpu };
784  
785  	/* Mask all interrupts */
786  	local_irq_save(flags);
787  
788  	NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1);
789  
790  	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
791  	/* Register with the SAL monarch that the slave has
792  	 * reached SAL
793  	 */
794  	ia64_sal_mc_rendez();
795  
796  	NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1);
797  
798  	/* Wait for the monarch cpu to exit. */
799  	while (monarch_cpu != -1)
800  	       cpu_relax();	/* spin until monarch leaves */
801  
802  	NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1);
803  
804  	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
805  	/* Enable all interrupts */
806  	local_irq_restore(flags);
807  	return IRQ_HANDLED;
808  }
809  
810  /*
811   * ia64_mca_wakeup_int_handler
812   *
813   *	The interrupt handler for processing the inter-cpu interrupt to the
814   *	slave cpu which was spinning in the rendez loop.
815   *	Since this spinning is done by turning off the interrupts and
816   *	polling on the wakeup-interrupt bit in the IRR, there is
817   *	nothing useful to be done in the handler.
818   *
819   *  Inputs  :   wakeup_irq  (Wakeup-interrupt bit)
820   *	arg		(Interrupt handler specific argument)
821   *  Outputs :   None
822   *
823   */
824  static irqreturn_t
ia64_mca_wakeup_int_handler(int wakeup_irq,void * arg)825  ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg)
826  {
827  	return IRQ_HANDLED;
828  }
829  
830  /* Function pointer for extra MCA recovery */
831  int (*ia64_mca_ucmc_extension)
832  	(void*,struct ia64_sal_os_state*)
833  	= NULL;
834  
835  int
ia64_reg_MCA_extension(int (* fn)(void *,struct ia64_sal_os_state *))836  ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *))
837  {
838  	if (ia64_mca_ucmc_extension)
839  		return 1;
840  
841  	ia64_mca_ucmc_extension = fn;
842  	return 0;
843  }
844  
845  void
ia64_unreg_MCA_extension(void)846  ia64_unreg_MCA_extension(void)
847  {
848  	if (ia64_mca_ucmc_extension)
849  		ia64_mca_ucmc_extension = NULL;
850  }
851  
852  EXPORT_SYMBOL(ia64_reg_MCA_extension);
853  EXPORT_SYMBOL(ia64_unreg_MCA_extension);
854  
855  
856  static inline void
copy_reg(const u64 * fr,u64 fnat,unsigned long * tr,unsigned long * tnat)857  copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat)
858  {
859  	u64 fslot, tslot, nat;
860  	*tr = *fr;
861  	fslot = ((unsigned long)fr >> 3) & 63;
862  	tslot = ((unsigned long)tr >> 3) & 63;
863  	*tnat &= ~(1UL << tslot);
864  	nat = (fnat >> fslot) & 1;
865  	*tnat |= (nat << tslot);
866  }
867  
868  /* Change the comm field on the MCA/INT task to include the pid that
869   * was interrupted, it makes for easier debugging.  If that pid was 0
870   * (swapper or nested MCA/INIT) then use the start of the previous comm
871   * field suffixed with its cpu.
872   */
873  
874  static void
ia64_mca_modify_comm(const struct task_struct * previous_current)875  ia64_mca_modify_comm(const struct task_struct *previous_current)
876  {
877  	char *p, comm[sizeof(current->comm)];
878  	if (previous_current->pid)
879  		snprintf(comm, sizeof(comm), "%s %d",
880  			current->comm, previous_current->pid);
881  	else {
882  		int l;
883  		if ((p = strchr(previous_current->comm, ' ')))
884  			l = p - previous_current->comm;
885  		else
886  			l = strlen(previous_current->comm);
887  		snprintf(comm, sizeof(comm), "%s %*s %d",
888  			current->comm, l, previous_current->comm,
889  			task_thread_info(previous_current)->cpu);
890  	}
891  	memcpy(current->comm, comm, sizeof(current->comm));
892  }
893  
894  static void
finish_pt_regs(struct pt_regs * regs,struct ia64_sal_os_state * sos,unsigned long * nat)895  finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos,
896  		unsigned long *nat)
897  {
898  	const struct pal_min_state_area *ms = sos->pal_min_state;
899  	const u64 *bank;
900  
901  	/* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
902  	 * pmsa_{xip,xpsr,xfs}
903  	 */
904  	if (ia64_psr(regs)->ic) {
905  		regs->cr_iip = ms->pmsa_iip;
906  		regs->cr_ipsr = ms->pmsa_ipsr;
907  		regs->cr_ifs = ms->pmsa_ifs;
908  	} else {
909  		regs->cr_iip = ms->pmsa_xip;
910  		regs->cr_ipsr = ms->pmsa_xpsr;
911  		regs->cr_ifs = ms->pmsa_xfs;
912  
913  		sos->iip = ms->pmsa_iip;
914  		sos->ipsr = ms->pmsa_ipsr;
915  		sos->ifs = ms->pmsa_ifs;
916  	}
917  	regs->pr = ms->pmsa_pr;
918  	regs->b0 = ms->pmsa_br0;
919  	regs->ar_rsc = ms->pmsa_rsc;
920  	copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &regs->r1, nat);
921  	copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &regs->r2, nat);
922  	copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &regs->r3, nat);
923  	copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &regs->r8, nat);
924  	copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &regs->r9, nat);
925  	copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &regs->r10, nat);
926  	copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &regs->r11, nat);
927  	copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &regs->r12, nat);
928  	copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &regs->r13, nat);
929  	copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &regs->r14, nat);
930  	copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &regs->r15, nat);
931  	if (ia64_psr(regs)->bn)
932  		bank = ms->pmsa_bank1_gr;
933  	else
934  		bank = ms->pmsa_bank0_gr;
935  	copy_reg(&bank[16-16], ms->pmsa_nat_bits, &regs->r16, nat);
936  	copy_reg(&bank[17-16], ms->pmsa_nat_bits, &regs->r17, nat);
937  	copy_reg(&bank[18-16], ms->pmsa_nat_bits, &regs->r18, nat);
938  	copy_reg(&bank[19-16], ms->pmsa_nat_bits, &regs->r19, nat);
939  	copy_reg(&bank[20-16], ms->pmsa_nat_bits, &regs->r20, nat);
940  	copy_reg(&bank[21-16], ms->pmsa_nat_bits, &regs->r21, nat);
941  	copy_reg(&bank[22-16], ms->pmsa_nat_bits, &regs->r22, nat);
942  	copy_reg(&bank[23-16], ms->pmsa_nat_bits, &regs->r23, nat);
943  	copy_reg(&bank[24-16], ms->pmsa_nat_bits, &regs->r24, nat);
944  	copy_reg(&bank[25-16], ms->pmsa_nat_bits, &regs->r25, nat);
945  	copy_reg(&bank[26-16], ms->pmsa_nat_bits, &regs->r26, nat);
946  	copy_reg(&bank[27-16], ms->pmsa_nat_bits, &regs->r27, nat);
947  	copy_reg(&bank[28-16], ms->pmsa_nat_bits, &regs->r28, nat);
948  	copy_reg(&bank[29-16], ms->pmsa_nat_bits, &regs->r29, nat);
949  	copy_reg(&bank[30-16], ms->pmsa_nat_bits, &regs->r30, nat);
950  	copy_reg(&bank[31-16], ms->pmsa_nat_bits, &regs->r31, nat);
951  }
952  
953  /* On entry to this routine, we are running on the per cpu stack, see
954   * mca_asm.h.  The original stack has not been touched by this event.  Some of
955   * the original stack's registers will be in the RBS on this stack.  This stack
956   * also contains a partial pt_regs and switch_stack, the rest of the data is in
957   * PAL minstate.
958   *
959   * The first thing to do is modify the original stack to look like a blocked
960   * task so we can run backtrace on the original task.  Also mark the per cpu
961   * stack as current to ensure that we use the correct task state, it also means
962   * that we can do backtrace on the MCA/INIT handler code itself.
963   */
964  
965  static struct task_struct *
ia64_mca_modify_original_stack(struct pt_regs * regs,const struct switch_stack * sw,struct ia64_sal_os_state * sos,const char * type)966  ia64_mca_modify_original_stack(struct pt_regs *regs,
967  		const struct switch_stack *sw,
968  		struct ia64_sal_os_state *sos,
969  		const char *type)
970  {
971  	char *p;
972  	ia64_va va;
973  	extern char ia64_leave_kernel[];	/* Need asm address, not function descriptor */
974  	const struct pal_min_state_area *ms = sos->pal_min_state;
975  	struct task_struct *previous_current;
976  	struct pt_regs *old_regs;
977  	struct switch_stack *old_sw;
978  	unsigned size = sizeof(struct pt_regs) +
979  			sizeof(struct switch_stack) + 16;
980  	unsigned long *old_bspstore, *old_bsp;
981  	unsigned long *new_bspstore, *new_bsp;
982  	unsigned long old_unat, old_rnat, new_rnat, nat;
983  	u64 slots, loadrs = regs->loadrs;
984  	u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1];
985  	u64 ar_bspstore = regs->ar_bspstore;
986  	u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16);
987  	const char *msg;
988  	int cpu = smp_processor_id();
989  
990  	previous_current = curr_task(cpu);
991  	ia64_set_curr_task(cpu, current);
992  	if ((p = strchr(current->comm, ' ')))
993  		*p = '\0';
994  
995  	/* Best effort attempt to cope with MCA/INIT delivered while in
996  	 * physical mode.
997  	 */
998  	regs->cr_ipsr = ms->pmsa_ipsr;
999  	if (ia64_psr(regs)->dt == 0) {
1000  		va.l = r12;
1001  		if (va.f.reg == 0) {
1002  			va.f.reg = 7;
1003  			r12 = va.l;
1004  		}
1005  		va.l = r13;
1006  		if (va.f.reg == 0) {
1007  			va.f.reg = 7;
1008  			r13 = va.l;
1009  		}
1010  	}
1011  	if (ia64_psr(regs)->rt == 0) {
1012  		va.l = ar_bspstore;
1013  		if (va.f.reg == 0) {
1014  			va.f.reg = 7;
1015  			ar_bspstore = va.l;
1016  		}
1017  		va.l = ar_bsp;
1018  		if (va.f.reg == 0) {
1019  			va.f.reg = 7;
1020  			ar_bsp = va.l;
1021  		}
1022  	}
1023  
1024  	/* mca_asm.S ia64_old_stack() cannot assume that the dirty registers
1025  	 * have been copied to the old stack, the old stack may fail the
1026  	 * validation tests below.  So ia64_old_stack() must restore the dirty
1027  	 * registers from the new stack.  The old and new bspstore probably
1028  	 * have different alignments, so loadrs calculated on the old bsp
1029  	 * cannot be used to restore from the new bsp.  Calculate a suitable
1030  	 * loadrs for the new stack and save it in the new pt_regs, where
1031  	 * ia64_old_stack() can get it.
1032  	 */
1033  	old_bspstore = (unsigned long *)ar_bspstore;
1034  	old_bsp = (unsigned long *)ar_bsp;
1035  	slots = ia64_rse_num_regs(old_bspstore, old_bsp);
1036  	new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET);
1037  	new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
1038  	regs->loadrs = (new_bsp - new_bspstore) * 8 << 16;
1039  
1040  	/* Verify the previous stack state before we change it */
1041  	if (user_mode(regs)) {
1042  		msg = "occurred in user space";
1043  		/* previous_current is guaranteed to be valid when the task was
1044  		 * in user space, so ...
1045  		 */
1046  		ia64_mca_modify_comm(previous_current);
1047  		goto no_mod;
1048  	}
1049  
1050  	if (r13 != sos->prev_IA64_KR_CURRENT) {
1051  		msg = "inconsistent previous current and r13";
1052  		goto no_mod;
1053  	}
1054  
1055  	if (!mca_recover_range(ms->pmsa_iip)) {
1056  		if ((r12 - r13) >= KERNEL_STACK_SIZE) {
1057  			msg = "inconsistent r12 and r13";
1058  			goto no_mod;
1059  		}
1060  		if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) {
1061  			msg = "inconsistent ar.bspstore and r13";
1062  			goto no_mod;
1063  		}
1064  		va.p = old_bspstore;
1065  		if (va.f.reg < 5) {
1066  			msg = "old_bspstore is in the wrong region";
1067  			goto no_mod;
1068  		}
1069  		if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) {
1070  			msg = "inconsistent ar.bsp and r13";
1071  			goto no_mod;
1072  		}
1073  		size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
1074  		if (ar_bspstore + size > r12) {
1075  			msg = "no room for blocked state";
1076  			goto no_mod;
1077  		}
1078  	}
1079  
1080  	ia64_mca_modify_comm(previous_current);
1081  
1082  	/* Make the original task look blocked.  First stack a struct pt_regs,
1083  	 * describing the state at the time of interrupt.  mca_asm.S built a
1084  	 * partial pt_regs, copy it and fill in the blanks using minstate.
1085  	 */
1086  	p = (char *)r12 - sizeof(*regs);
1087  	old_regs = (struct pt_regs *)p;
1088  	memcpy(old_regs, regs, sizeof(*regs));
1089  	old_regs->loadrs = loadrs;
1090  	old_unat = old_regs->ar_unat;
1091  	finish_pt_regs(old_regs, sos, &old_unat);
1092  
1093  	/* Next stack a struct switch_stack.  mca_asm.S built a partial
1094  	 * switch_stack, copy it and fill in the blanks using pt_regs and
1095  	 * minstate.
1096  	 *
1097  	 * In the synthesized switch_stack, b0 points to ia64_leave_kernel,
1098  	 * ar.pfs is set to 0.
1099  	 *
1100  	 * unwind.c::unw_unwind() does special processing for interrupt frames.
1101  	 * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate
1102  	 * is clear then unw_unwind() does _not_ adjust bsp over pt_regs.  Not
1103  	 * that this is documented, of course.  Set PRED_NON_SYSCALL in the
1104  	 * switch_stack on the original stack so it will unwind correctly when
1105  	 * unwind.c reads pt_regs.
1106  	 *
1107  	 * thread.ksp is updated to point to the synthesized switch_stack.
1108  	 */
1109  	p -= sizeof(struct switch_stack);
1110  	old_sw = (struct switch_stack *)p;
1111  	memcpy(old_sw, sw, sizeof(*sw));
1112  	old_sw->caller_unat = old_unat;
1113  	old_sw->ar_fpsr = old_regs->ar_fpsr;
1114  	copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat);
1115  	copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat);
1116  	copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat);
1117  	copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat);
1118  	old_sw->b0 = (u64)ia64_leave_kernel;
1119  	old_sw->b1 = ms->pmsa_br1;
1120  	old_sw->ar_pfs = 0;
1121  	old_sw->ar_unat = old_unat;
1122  	old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL);
1123  	previous_current->thread.ksp = (u64)p - 16;
1124  
1125  	/* Finally copy the original stack's registers back to its RBS.
1126  	 * Registers from ar.bspstore through ar.bsp at the time of the event
1127  	 * are in the current RBS, copy them back to the original stack.  The
1128  	 * copy must be done register by register because the original bspstore
1129  	 * and the current one have different alignments, so the saved RNAT
1130  	 * data occurs at different places.
1131  	 *
1132  	 * mca_asm does cover, so the old_bsp already includes all registers at
1133  	 * the time of MCA/INIT.  It also does flushrs, so all registers before
1134  	 * this function have been written to backing store on the MCA/INIT
1135  	 * stack.
1136  	 */
1137  	new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore));
1138  	old_rnat = regs->ar_rnat;
1139  	while (slots--) {
1140  		if (ia64_rse_is_rnat_slot(new_bspstore)) {
1141  			new_rnat = ia64_get_rnat(new_bspstore++);
1142  		}
1143  		if (ia64_rse_is_rnat_slot(old_bspstore)) {
1144  			*old_bspstore++ = old_rnat;
1145  			old_rnat = 0;
1146  		}
1147  		nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL;
1148  		old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore));
1149  		old_rnat |= (nat << ia64_rse_slot_num(old_bspstore));
1150  		*old_bspstore++ = *new_bspstore++;
1151  	}
1152  	old_sw->ar_bspstore = (unsigned long)old_bspstore;
1153  	old_sw->ar_rnat = old_rnat;
1154  
1155  	sos->prev_task = previous_current;
1156  	return previous_current;
1157  
1158  no_mod:
1159  	mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n",
1160  			smp_processor_id(), type, msg);
1161  	old_unat = regs->ar_unat;
1162  	finish_pt_regs(regs, sos, &old_unat);
1163  	return previous_current;
1164  }
1165  
1166  /* The monarch/slave interaction is based on monarch_cpu and requires that all
1167   * slaves have entered rendezvous before the monarch leaves.  If any cpu has
1168   * not entered rendezvous yet then wait a bit.  The assumption is that any
1169   * slave that has not rendezvoused after a reasonable time is never going to do
1170   * so.  In this context, slave includes cpus that respond to the MCA rendezvous
1171   * interrupt, as well as cpus that receive the INIT slave event.
1172   */
1173  
1174  static void
ia64_wait_for_slaves(int monarch,const char * type)1175  ia64_wait_for_slaves(int monarch, const char *type)
1176  {
1177  	int c, i , wait;
1178  
1179  	/*
1180  	 * wait 5 seconds total for slaves (arbitrary)
1181  	 */
1182  	for (i = 0; i < 5000; i++) {
1183  		wait = 0;
1184  		for_each_online_cpu(c) {
1185  			if (c == monarch)
1186  				continue;
1187  			if (ia64_mc_info.imi_rendez_checkin[c]
1188  					== IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
1189  				udelay(1000);		/* short wait */
1190  				wait = 1;
1191  				break;
1192  			}
1193  		}
1194  		if (!wait)
1195  			goto all_in;
1196  	}
1197  
1198  	/*
1199  	 * Maybe slave(s) dead. Print buffered messages immediately.
1200  	 */
1201  	ia64_mlogbuf_finish(0);
1202  	mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
1203  	for_each_online_cpu(c) {
1204  		if (c == monarch)
1205  			continue;
1206  		if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
1207  			mprintk(" %d", c);
1208  	}
1209  	mprintk("\n");
1210  	return;
1211  
1212  all_in:
1213  	mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
1214  	return;
1215  }
1216  
1217  /*  mca_insert_tr
1218   *
1219   *  Switch rid when TR reload and needed!
1220   *  iord: 1: itr, 2: itr;
1221   *
1222  */
mca_insert_tr(u64 iord)1223  static void mca_insert_tr(u64 iord)
1224  {
1225  
1226  	int i;
1227  	u64 old_rr;
1228  	struct ia64_tr_entry *p;
1229  	unsigned long psr;
1230  	int cpu = smp_processor_id();
1231  
1232  	if (!ia64_idtrs[cpu])
1233  		return;
1234  
1235  	psr = ia64_clear_ic();
1236  	for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
1237  		p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX;
1238  		if (p->pte & 0x1) {
1239  			old_rr = ia64_get_rr(p->ifa);
1240  			if (old_rr != p->rr) {
1241  				ia64_set_rr(p->ifa, p->rr);
1242  				ia64_srlz_d();
1243  			}
1244  			ia64_ptr(iord, p->ifa, p->itir >> 2);
1245  			ia64_srlz_i();
1246  			if (iord & 0x1) {
1247  				ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
1248  				ia64_srlz_i();
1249  			}
1250  			if (iord & 0x2) {
1251  				ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
1252  				ia64_srlz_i();
1253  			}
1254  			if (old_rr != p->rr) {
1255  				ia64_set_rr(p->ifa, old_rr);
1256  				ia64_srlz_d();
1257  			}
1258  		}
1259  	}
1260  	ia64_set_psr(psr);
1261  }
1262  
1263  /*
1264   * ia64_mca_handler
1265   *
1266   *	This is uncorrectable machine check handler called from OS_MCA
1267   *	dispatch code which is in turn called from SAL_CHECK().
1268   *	This is the place where the core of OS MCA handling is done.
1269   *	Right now the logs are extracted and displayed in a well-defined
1270   *	format. This handler code is supposed to be run only on the
1271   *	monarch processor. Once the monarch is done with MCA handling
1272   *	further MCA logging is enabled by clearing logs.
1273   *	Monarch also has the duty of sending wakeup-IPIs to pull the
1274   *	slave processors out of rendezvous spinloop.
1275   *
1276   *	If multiple processors call into OS_MCA, the first will become
1277   *	the monarch.  Subsequent cpus will be recorded in the mca_cpu
1278   *	bitmask.  After the first monarch has processed its MCA, it
1279   *	will wake up the next cpu in the mca_cpu bitmask and then go
1280   *	into the rendezvous loop.  When all processors have serviced
1281   *	their MCA, the last monarch frees up the rest of the processors.
1282   */
1283  void
ia64_mca_handler(struct pt_regs * regs,struct switch_stack * sw,struct ia64_sal_os_state * sos)1284  ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1285  		 struct ia64_sal_os_state *sos)
1286  {
1287  	int recover, cpu = smp_processor_id();
1288  	struct task_struct *previous_current;
1289  	struct ia64_mca_notify_die nd =
1290  		{ .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover };
1291  	static atomic_t mca_count;
1292  	static cpumask_t mca_cpu;
1293  
1294  	if (atomic_add_return(1, &mca_count) == 1) {
1295  		monarch_cpu = cpu;
1296  		sos->monarch = 1;
1297  	} else {
1298  		cpumask_set_cpu(cpu, &mca_cpu);
1299  		sos->monarch = 0;
1300  	}
1301  	mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
1302  		"monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
1303  
1304  	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
1305  
1306  	NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1);
1307  
1308  	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
1309  	if (sos->monarch) {
1310  		ia64_wait_for_slaves(cpu, "MCA");
1311  
1312  		/* Wakeup all the processors which are spinning in the
1313  		 * rendezvous loop.  They will leave SAL, then spin in the OS
1314  		 * with interrupts disabled until this monarch cpu leaves the
1315  		 * MCA handler.  That gets control back to the OS so we can
1316  		 * backtrace the other cpus, backtrace when spinning in SAL
1317  		 * does not work.
1318  		 */
1319  		ia64_mca_wakeup_all();
1320  	} else {
1321  		while (cpumask_test_cpu(cpu, &mca_cpu))
1322  			cpu_relax();	/* spin until monarch wakes us */
1323  	}
1324  
1325  	NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1);
1326  
1327  	/* Get the MCA error record and log it */
1328  	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
1329  
1330  	/* MCA error recovery */
1331  	recover = (ia64_mca_ucmc_extension
1332  		&& ia64_mca_ucmc_extension(
1333  			IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
1334  			sos));
1335  
1336  	if (recover) {
1337  		sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA);
1338  		rh->severity = sal_log_severity_corrected;
1339  		ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
1340  		sos->os_status = IA64_MCA_CORRECTED;
1341  	} else {
1342  		/* Dump buffered message to console */
1343  		ia64_mlogbuf_finish(1);
1344  	}
1345  
1346  	if (__this_cpu_read(ia64_mca_tr_reload)) {
1347  		mca_insert_tr(0x1); /*Reload dynamic itrs*/
1348  		mca_insert_tr(0x2); /*Reload dynamic itrs*/
1349  	}
1350  
1351  	NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1);
1352  
1353  	if (atomic_dec_return(&mca_count) > 0) {
1354  		int i;
1355  
1356  		/* wake up the next monarch cpu,
1357  		 * and put this cpu in the rendez loop.
1358  		 */
1359  		for_each_online_cpu(i) {
1360  			if (cpumask_test_cpu(i, &mca_cpu)) {
1361  				monarch_cpu = i;
1362  				cpumask_clear_cpu(i, &mca_cpu);	/* wake next cpu */
1363  				while (monarch_cpu != -1)
1364  					cpu_relax();	/* spin until last cpu leaves */
1365  				ia64_set_curr_task(cpu, previous_current);
1366  				ia64_mc_info.imi_rendez_checkin[cpu]
1367  						= IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
1368  				return;
1369  			}
1370  		}
1371  	}
1372  	ia64_set_curr_task(cpu, previous_current);
1373  	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
1374  	monarch_cpu = -1;	/* This frees the slaves and previous monarchs */
1375  }
1376  
1377  static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd);
1378  static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd);
1379  
1380  /*
1381   * ia64_mca_cmc_int_handler
1382   *
1383   *  This is corrected machine check interrupt handler.
1384   *	Right now the logs are extracted and displayed in a well-defined
1385   *	format.
1386   *
1387   * Inputs
1388   *      interrupt number
1389   *      client data arg ptr
1390   *
1391   * Outputs
1392   *	None
1393   */
1394  static irqreturn_t
ia64_mca_cmc_int_handler(int cmc_irq,void * arg)1395  ia64_mca_cmc_int_handler(int cmc_irq, void *arg)
1396  {
1397  	static unsigned long	cmc_history[CMC_HISTORY_LENGTH];
1398  	static int		index;
1399  	static DEFINE_SPINLOCK(cmc_history_lock);
1400  
1401  	IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
1402  		       __func__, cmc_irq, smp_processor_id());
1403  
1404  	/* SAL spec states this should run w/ interrupts enabled */
1405  	local_irq_enable();
1406  
1407  	spin_lock(&cmc_history_lock);
1408  	if (!cmc_polling_enabled) {
1409  		int i, count = 1; /* we know 1 happened now */
1410  		unsigned long now = jiffies;
1411  
1412  		for (i = 0; i < CMC_HISTORY_LENGTH; i++) {
1413  			if (now - cmc_history[i] <= HZ)
1414  				count++;
1415  		}
1416  
1417  		IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH);
1418  		if (count >= CMC_HISTORY_LENGTH) {
1419  
1420  			cmc_polling_enabled = 1;
1421  			spin_unlock(&cmc_history_lock);
1422  			/* If we're being hit with CMC interrupts, we won't
1423  			 * ever execute the schedule_work() below.  Need to
1424  			 * disable CMC interrupts on this processor now.
1425  			 */
1426  			ia64_mca_cmc_vector_disable(NULL);
1427  			schedule_work(&cmc_disable_work);
1428  
1429  			/*
1430  			 * Corrected errors will still be corrected, but
1431  			 * make sure there's a log somewhere that indicates
1432  			 * something is generating more than we can handle.
1433  			 */
1434  			printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n");
1435  
1436  			mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
1437  
1438  			/* lock already released, get out now */
1439  			goto out;
1440  		} else {
1441  			cmc_history[index++] = now;
1442  			if (index == CMC_HISTORY_LENGTH)
1443  				index = 0;
1444  		}
1445  	}
1446  	spin_unlock(&cmc_history_lock);
1447  out:
1448  	/* Get the CMC error record and log it */
1449  	ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
1450  
1451  	local_irq_disable();
1452  
1453  	return IRQ_HANDLED;
1454  }
1455  
1456  /*
1457   *  ia64_mca_cmc_int_caller
1458   *
1459   * 	Triggered by sw interrupt from CMC polling routine.  Calls
1460   * 	real interrupt handler and either triggers a sw interrupt
1461   * 	on the next cpu or does cleanup at the end.
1462   *
1463   * Inputs
1464   *	interrupt number
1465   *	client data arg ptr
1466   * Outputs
1467   * 	handled
1468   */
1469  static irqreturn_t
ia64_mca_cmc_int_caller(int cmc_irq,void * arg)1470  ia64_mca_cmc_int_caller(int cmc_irq, void *arg)
1471  {
1472  	static int start_count = -1;
1473  	unsigned int cpuid;
1474  
1475  	cpuid = smp_processor_id();
1476  
1477  	/* If first cpu, update count */
1478  	if (start_count == -1)
1479  		start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
1480  
1481  	ia64_mca_cmc_int_handler(cmc_irq, arg);
1482  
1483  	cpuid = cpumask_next(cpuid+1, cpu_online_mask);
1484  
1485  	if (cpuid < nr_cpu_ids) {
1486  		ia64_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
1487  	} else {
1488  		/* If no log record, switch out of polling mode */
1489  		if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
1490  
1491  			printk(KERN_WARNING "Returning to interrupt driven CMC handler\n");
1492  			schedule_work(&cmc_enable_work);
1493  			cmc_polling_enabled = 0;
1494  
1495  		} else {
1496  
1497  			mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
1498  		}
1499  
1500  		start_count = -1;
1501  	}
1502  
1503  	return IRQ_HANDLED;
1504  }
1505  
1506  /*
1507   *  ia64_mca_cmc_poll
1508   *
1509   *	Poll for Corrected Machine Checks (CMCs)
1510   *
1511   * Inputs   :   dummy(unused)
1512   * Outputs  :   None
1513   *
1514   */
1515  static void
ia64_mca_cmc_poll(struct timer_list * unused)1516  ia64_mca_cmc_poll (struct timer_list *unused)
1517  {
1518  	/* Trigger a CMC interrupt cascade  */
1519  	ia64_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR,
1520  							IA64_IPI_DM_INT, 0);
1521  }
1522  
1523  /*
1524   *  ia64_mca_cpe_int_caller
1525   *
1526   * 	Triggered by sw interrupt from CPE polling routine.  Calls
1527   * 	real interrupt handler and either triggers a sw interrupt
1528   * 	on the next cpu or does cleanup at the end.
1529   *
1530   * Inputs
1531   *	interrupt number
1532   *	client data arg ptr
1533   * Outputs
1534   * 	handled
1535   */
1536  static irqreturn_t
ia64_mca_cpe_int_caller(int cpe_irq,void * arg)1537  ia64_mca_cpe_int_caller(int cpe_irq, void *arg)
1538  {
1539  	static int start_count = -1;
1540  	static int poll_time = MIN_CPE_POLL_INTERVAL;
1541  	unsigned int cpuid;
1542  
1543  	cpuid = smp_processor_id();
1544  
1545  	/* If first cpu, update count */
1546  	if (start_count == -1)
1547  		start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
1548  
1549  	ia64_mca_cpe_int_handler(cpe_irq, arg);
1550  
1551  	cpuid = cpumask_next(cpuid+1, cpu_online_mask);
1552  
1553  	if (cpuid < NR_CPUS) {
1554  		ia64_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
1555  	} else {
1556  		/*
1557  		 * If a log was recorded, increase our polling frequency,
1558  		 * otherwise, backoff or return to interrupt mode.
1559  		 */
1560  		if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) {
1561  			poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2);
1562  		} else if (cpe_vector < 0) {
1563  			poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2);
1564  		} else {
1565  			poll_time = MIN_CPE_POLL_INTERVAL;
1566  
1567  			printk(KERN_WARNING "Returning to interrupt driven CPE handler\n");
1568  			enable_irq(local_vector_to_irq(IA64_CPE_VECTOR));
1569  			cpe_poll_enabled = 0;
1570  		}
1571  
1572  		if (cpe_poll_enabled)
1573  			mod_timer(&cpe_poll_timer, jiffies + poll_time);
1574  		start_count = -1;
1575  	}
1576  
1577  	return IRQ_HANDLED;
1578  }
1579  
1580  /*
1581   *  ia64_mca_cpe_poll
1582   *
1583   *	Poll for Corrected Platform Errors (CPEs), trigger interrupt
1584   *	on first cpu, from there it will trickle through all the cpus.
1585   *
1586   * Inputs   :   dummy(unused)
1587   * Outputs  :   None
1588   *
1589   */
1590  static void
ia64_mca_cpe_poll(struct timer_list * unused)1591  ia64_mca_cpe_poll (struct timer_list *unused)
1592  {
1593  	/* Trigger a CPE interrupt cascade  */
1594  	ia64_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR,
1595  							IA64_IPI_DM_INT, 0);
1596  }
1597  
1598  static int
default_monarch_init_process(struct notifier_block * self,unsigned long val,void * data)1599  default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data)
1600  {
1601  	int c;
1602  	struct task_struct *g, *t;
1603  	if (val != DIE_INIT_MONARCH_PROCESS)
1604  		return NOTIFY_DONE;
1605  #ifdef CONFIG_KEXEC
1606  	if (atomic_read(&kdump_in_progress))
1607  		return NOTIFY_DONE;
1608  #endif
1609  
1610  	/*
1611  	 * FIXME: mlogbuf will brim over with INIT stack dumps.
1612  	 * To enable show_stack from INIT, we use oops_in_progress which should
1613  	 * be used in real oops. This would cause something wrong after INIT.
1614  	 */
1615  	BREAK_LOGLEVEL(console_loglevel);
1616  	ia64_mlogbuf_dump_from_init();
1617  
1618  	printk(KERN_ERR "Processes interrupted by INIT -");
1619  	for_each_online_cpu(c) {
1620  		struct ia64_sal_os_state *s;
1621  		t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET);
1622  		s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET);
1623  		g = s->prev_task;
1624  		if (g) {
1625  			if (g->pid)
1626  				printk(" %d", g->pid);
1627  			else
1628  				printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g);
1629  		}
1630  	}
1631  	printk("\n\n");
1632  	if (read_trylock(&tasklist_lock)) {
1633  		for_each_process_thread(g, t) {
1634  			printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
1635  			show_stack(t, NULL, KERN_DEFAULT);
1636  		}
1637  		read_unlock(&tasklist_lock);
1638  	}
1639  	/* FIXME: This will not restore zapped printk locks. */
1640  	RESTORE_LOGLEVEL(console_loglevel);
1641  	return NOTIFY_DONE;
1642  }
1643  
1644  /*
1645   * C portion of the OS INIT handler
1646   *
1647   * Called from ia64_os_init_dispatch
1648   *
1649   * Inputs: pointer to pt_regs where processor info was saved.  SAL/OS state for
1650   * this event.  This code is used for both monarch and slave INIT events, see
1651   * sos->monarch.
1652   *
1653   * All INIT events switch to the INIT stack and change the previous process to
1654   * blocked status.  If one of the INIT events is the monarch then we are
1655   * probably processing the nmi button/command.  Use the monarch cpu to dump all
1656   * the processes.  The slave INIT events all spin until the monarch cpu
1657   * returns.  We can also get INIT slave events for MCA, in which case the MCA
1658   * process is the monarch.
1659   */
1660  
1661  void
ia64_init_handler(struct pt_regs * regs,struct switch_stack * sw,struct ia64_sal_os_state * sos)1662  ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1663  		  struct ia64_sal_os_state *sos)
1664  {
1665  	static atomic_t slaves;
1666  	static atomic_t monarchs;
1667  	struct task_struct *previous_current;
1668  	int cpu = smp_processor_id();
1669  	struct ia64_mca_notify_die nd =
1670  		{ .sos = sos, .monarch_cpu = &monarch_cpu };
1671  
1672  	NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0);
1673  
1674  	mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
1675  		sos->proc_state_param, cpu, sos->monarch);
1676  	salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
1677  
1678  	previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT");
1679  	sos->os_status = IA64_INIT_RESUME;
1680  
1681  	/* FIXME: Workaround for broken proms that drive all INIT events as
1682  	 * slaves.  The last slave that enters is promoted to be a monarch.
1683  	 * Remove this code in September 2006, that gives platforms a year to
1684  	 * fix their proms and get their customers updated.
1685  	 */
1686  	if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
1687  		mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
1688  		        __func__, cpu);
1689  		atomic_dec(&slaves);
1690  		sos->monarch = 1;
1691  	}
1692  
1693  	/* FIXME: Workaround for broken proms that drive all INIT events as
1694  	 * monarchs.  Second and subsequent monarchs are demoted to slaves.
1695  	 * Remove this code in September 2006, that gives platforms a year to
1696  	 * fix their proms and get their customers updated.
1697  	 */
1698  	if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
1699  		mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
1700  			       __func__, cpu);
1701  		atomic_dec(&monarchs);
1702  		sos->monarch = 0;
1703  	}
1704  
1705  	if (!sos->monarch) {
1706  		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
1707  
1708  #ifdef CONFIG_KEXEC
1709  		while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress))
1710  			udelay(1000);
1711  #else
1712  		while (monarch_cpu == -1)
1713  			cpu_relax();	/* spin until monarch enters */
1714  #endif
1715  
1716  		NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1);
1717  		NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1);
1718  
1719  #ifdef CONFIG_KEXEC
1720  		while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress))
1721  			udelay(1000);
1722  #else
1723  		while (monarch_cpu != -1)
1724  			cpu_relax();	/* spin until monarch leaves */
1725  #endif
1726  
1727  		NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
1728  
1729  		mprintk("Slave on cpu %d returning to normal service.\n", cpu);
1730  		ia64_set_curr_task(cpu, previous_current);
1731  		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
1732  		atomic_dec(&slaves);
1733  		return;
1734  	}
1735  
1736  	monarch_cpu = cpu;
1737  	NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1);
1738  
1739  	/*
1740  	 * Wait for a bit.  On some machines (e.g., HP's zx2000 and zx6000, INIT can be
1741  	 * generated via the BMC's command-line interface, but since the console is on the
1742  	 * same serial line, the user will need some time to switch out of the BMC before
1743  	 * the dump begins.
1744  	 */
1745  	mprintk("Delaying for 5 seconds...\n");
1746  	udelay(5*1000000);
1747  	ia64_wait_for_slaves(cpu, "INIT");
1748  	/* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
1749  	 * to default_monarch_init_process() above and just print all the
1750  	 * tasks.
1751  	 */
1752  	NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1);
1753  	NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1);
1754  
1755  	mprintk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
1756  	atomic_dec(&monarchs);
1757  	ia64_set_curr_task(cpu, previous_current);
1758  	monarch_cpu = -1;
1759  	return;
1760  }
1761  
1762  static int __init
ia64_mca_disable_cpe_polling(char * str)1763  ia64_mca_disable_cpe_polling(char *str)
1764  {
1765  	cpe_poll_enabled = 0;
1766  	return 1;
1767  }
1768  
1769  __setup("disable_cpe_poll", ia64_mca_disable_cpe_polling);
1770  
1771  /* Minimal format of the MCA/INIT stacks.  The pseudo processes that run on
1772   * these stacks can never sleep, they cannot return from the kernel to user
1773   * space, they do not appear in a normal ps listing.  So there is no need to
1774   * format most of the fields.
1775   */
1776  
1777  static void
format_mca_init_stack(void * mca_data,unsigned long offset,const char * type,int cpu)1778  format_mca_init_stack(void *mca_data, unsigned long offset,
1779  		const char *type, int cpu)
1780  {
1781  	struct task_struct *p = (struct task_struct *)((char *)mca_data + offset);
1782  	struct thread_info *ti;
1783  	memset(p, 0, KERNEL_STACK_SIZE);
1784  	ti = task_thread_info(p);
1785  	ti->flags = _TIF_MCA_INIT;
1786  	ti->preempt_count = 1;
1787  	ti->task = p;
1788  	ti->cpu = cpu;
1789  	p->stack = ti;
1790  	p->__state = TASK_UNINTERRUPTIBLE;
1791  	cpumask_set_cpu(cpu, &p->cpus_mask);
1792  	INIT_LIST_HEAD(&p->tasks);
1793  	p->parent = p->real_parent = p->group_leader = p;
1794  	INIT_LIST_HEAD(&p->children);
1795  	INIT_LIST_HEAD(&p->sibling);
1796  	strscpy(p->comm, type, sizeof(p->comm)-1);
1797  }
1798  
1799  /* Caller prevents this from being called after init */
mca_bootmem(void)1800  static void * __ref mca_bootmem(void)
1801  {
1802  	return memblock_alloc(sizeof(struct ia64_mca_cpu), KERNEL_STACK_SIZE);
1803  }
1804  
1805  /* Do per-CPU MCA-related initialization.  */
1806  void
ia64_mca_cpu_init(void * cpu_data)1807  ia64_mca_cpu_init(void *cpu_data)
1808  {
1809  	void *pal_vaddr;
1810  	void *data;
1811  	long sz = sizeof(struct ia64_mca_cpu);
1812  	int cpu = smp_processor_id();
1813  	static int first_time = 1;
1814  
1815  	/*
1816  	 * Structure will already be allocated if cpu has been online,
1817  	 * then offlined.
1818  	 */
1819  	if (__per_cpu_mca[cpu]) {
1820  		data = __va(__per_cpu_mca[cpu]);
1821  	} else {
1822  		if (first_time) {
1823  			data = mca_bootmem();
1824  			first_time = 0;
1825  		} else
1826  			data = (void *)__get_free_pages(GFP_ATOMIC,
1827  							get_order(sz));
1828  		if (!data)
1829  			panic("Could not allocate MCA memory for cpu %d\n",
1830  					cpu);
1831  	}
1832  	format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack),
1833  		"MCA", cpu);
1834  	format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack),
1835  		"INIT", cpu);
1836  	__this_cpu_write(ia64_mca_data, (__per_cpu_mca[cpu] = __pa(data)));
1837  
1838  	/*
1839  	 * Stash away a copy of the PTE needed to map the per-CPU page.
1840  	 * We may need it during MCA recovery.
1841  	 */
1842  	__this_cpu_write(ia64_mca_per_cpu_pte,
1843  		pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)));
1844  
1845  	/*
1846  	 * Also, stash away a copy of the PAL address and the PTE
1847  	 * needed to map it.
1848  	 */
1849  	pal_vaddr = efi_get_pal_addr();
1850  	if (!pal_vaddr)
1851  		return;
1852  	__this_cpu_write(ia64_mca_pal_base,
1853  		GRANULEROUNDDOWN((unsigned long) pal_vaddr));
1854  	__this_cpu_write(ia64_mca_pal_pte, pte_val(mk_pte_phys(__pa(pal_vaddr),
1855  							      PAGE_KERNEL)));
1856  }
1857  
ia64_mca_cpu_online(unsigned int cpu)1858  static int ia64_mca_cpu_online(unsigned int cpu)
1859  {
1860  	unsigned long flags;
1861  
1862  	local_irq_save(flags);
1863  	if (!cmc_polling_enabled)
1864  		ia64_mca_cmc_vector_enable(NULL);
1865  	local_irq_restore(flags);
1866  	return 0;
1867  }
1868  
1869  /*
1870   * ia64_mca_init
1871   *
1872   *  Do all the system level mca specific initialization.
1873   *
1874   *	1. Register spinloop and wakeup request interrupt vectors
1875   *
1876   *	2. Register OS_MCA handler entry point
1877   *
1878   *	3. Register OS_INIT handler entry point
1879   *
1880   *  4. Initialize MCA/CMC/INIT related log buffers maintained by the OS.
1881   *
1882   *  Note that this initialization is done very early before some kernel
1883   *  services are available.
1884   *
1885   *  Inputs  :   None
1886   *
1887   *  Outputs :   None
1888   */
1889  void __init
ia64_mca_init(void)1890  ia64_mca_init(void)
1891  {
1892  	ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch;
1893  	ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave;
1894  	ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
1895  	int i;
1896  	long rc;
1897  	struct ia64_sal_retval isrv;
1898  	unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
1899  	static struct notifier_block default_init_monarch_nb = {
1900  		.notifier_call = default_monarch_init_process,
1901  		.priority = 0/* we need to notified last */
1902  	};
1903  
1904  	IA64_MCA_DEBUG("%s: begin\n", __func__);
1905  
1906  	/* Clear the Rendez checkin flag for all cpus */
1907  	for(i = 0 ; i < NR_CPUS; i++)
1908  		ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
1909  
1910  	/*
1911  	 * Register the rendezvous spinloop and wakeup mechanism with SAL
1912  	 */
1913  
1914  	/* Register the rendezvous interrupt vector with SAL */
1915  	while (1) {
1916  		isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
1917  					      SAL_MC_PARAM_MECHANISM_INT,
1918  					      IA64_MCA_RENDEZ_VECTOR,
1919  					      timeout,
1920  					      SAL_MC_PARAM_RZ_ALWAYS);
1921  		rc = isrv.status;
1922  		if (rc == 0)
1923  			break;
1924  		if (rc == -2) {
1925  			printk(KERN_INFO "Increasing MCA rendezvous timeout from "
1926  				"%ld to %ld milliseconds\n", timeout, isrv.v0);
1927  			timeout = isrv.v0;
1928  			NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0);
1929  			continue;
1930  		}
1931  		printk(KERN_ERR "Failed to register rendezvous interrupt "
1932  		       "with SAL (status %ld)\n", rc);
1933  		return;
1934  	}
1935  
1936  	/* Register the wakeup interrupt vector with SAL */
1937  	isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
1938  				      SAL_MC_PARAM_MECHANISM_INT,
1939  				      IA64_MCA_WAKEUP_VECTOR,
1940  				      0, 0);
1941  	rc = isrv.status;
1942  	if (rc) {
1943  		printk(KERN_ERR "Failed to register wakeup interrupt with SAL "
1944  		       "(status %ld)\n", rc);
1945  		return;
1946  	}
1947  
1948  	IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __func__);
1949  
1950  	ia64_mc_info.imi_mca_handler        = ia64_tpa(mca_hldlr_ptr->fp);
1951  	/*
1952  	 * XXX - disable SAL checksum by setting size to 0; should be
1953  	 *	ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch);
1954  	 */
1955  	ia64_mc_info.imi_mca_handler_size	= 0;
1956  
1957  	/* Register the os mca handler with SAL */
1958  	if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
1959  				       ia64_mc_info.imi_mca_handler,
1960  				       ia64_tpa(mca_hldlr_ptr->gp),
1961  				       ia64_mc_info.imi_mca_handler_size,
1962  				       0, 0, 0)))
1963  	{
1964  		printk(KERN_ERR "Failed to register OS MCA handler with SAL "
1965  		       "(status %ld)\n", rc);
1966  		return;
1967  	}
1968  
1969  	IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __func__,
1970  		       ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp));
1971  
1972  	/*
1973  	 * XXX - disable SAL checksum by setting size to 0, should be
1974  	 * size of the actual init handler in mca_asm.S.
1975  	 */
1976  	ia64_mc_info.imi_monarch_init_handler		= ia64_tpa(init_hldlr_ptr_monarch->fp);
1977  	ia64_mc_info.imi_monarch_init_handler_size	= 0;
1978  	ia64_mc_info.imi_slave_init_handler		= ia64_tpa(init_hldlr_ptr_slave->fp);
1979  	ia64_mc_info.imi_slave_init_handler_size	= 0;
1980  
1981  	IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __func__,
1982  		       ia64_mc_info.imi_monarch_init_handler);
1983  
1984  	/* Register the os init handler with SAL */
1985  	if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
1986  				       ia64_mc_info.imi_monarch_init_handler,
1987  				       ia64_tpa(ia64_getreg(_IA64_REG_GP)),
1988  				       ia64_mc_info.imi_monarch_init_handler_size,
1989  				       ia64_mc_info.imi_slave_init_handler,
1990  				       ia64_tpa(ia64_getreg(_IA64_REG_GP)),
1991  				       ia64_mc_info.imi_slave_init_handler_size)))
1992  	{
1993  		printk(KERN_ERR "Failed to register m/s INIT handlers with SAL "
1994  		       "(status %ld)\n", rc);
1995  		return;
1996  	}
1997  	if (register_die_notifier(&default_init_monarch_nb)) {
1998  		printk(KERN_ERR "Failed to register default monarch INIT process\n");
1999  		return;
2000  	}
2001  
2002  	IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__);
2003  
2004  	/* Initialize the areas set aside by the OS to buffer the
2005  	 * platform/processor error states for MCA/INIT/CMC
2006  	 * handling.
2007  	 */
2008  	ia64_log_init(SAL_INFO_TYPE_MCA);
2009  	ia64_log_init(SAL_INFO_TYPE_INIT);
2010  	ia64_log_init(SAL_INFO_TYPE_CMC);
2011  	ia64_log_init(SAL_INFO_TYPE_CPE);
2012  
2013  	mca_init = 1;
2014  	printk(KERN_INFO "MCA related initialization done\n");
2015  }
2016  
2017  
2018  /*
2019   * These pieces cannot be done in ia64_mca_init() because it is called before
2020   * early_irq_init() which would wipe out our percpu irq registrations. But we
2021   * cannot leave them until ia64_mca_late_init() because by then all the other
2022   * processors have been brought online and have set their own CMC vectors to
2023   * point at a non-existant action. Called from arch_early_irq_init().
2024   */
ia64_mca_irq_init(void)2025  void __init ia64_mca_irq_init(void)
2026  {
2027  	/*
2028  	 *  Configure the CMCI/P vector and handler. Interrupts for CMC are
2029  	 *  per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
2030  	 */
2031  	register_percpu_irq(IA64_CMC_VECTOR, ia64_mca_cmc_int_handler, 0,
2032  			    "cmc_hndlr");
2033  	register_percpu_irq(IA64_CMCP_VECTOR, ia64_mca_cmc_int_caller, 0,
2034  			    "cmc_poll");
2035  	ia64_mca_cmc_vector_setup();       /* Setup vector on BSP */
2036  
2037  	/* Setup the MCA rendezvous interrupt vector */
2038  	register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, ia64_mca_rendez_int_handler,
2039  			    0, "mca_rdzv");
2040  
2041  	/* Setup the MCA wakeup interrupt vector */
2042  	register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, ia64_mca_wakeup_int_handler,
2043  			    0, "mca_wkup");
2044  
2045  	/* Setup the CPEI/P handler */
2046  	register_percpu_irq(IA64_CPEP_VECTOR, ia64_mca_cpe_int_caller, 0,
2047  			    "cpe_poll");
2048  }
2049  
2050  /*
2051   * ia64_mca_late_init
2052   *
2053   *	Opportunity to setup things that require initialization later
2054   *	than ia64_mca_init.  Setup a timer to poll for CPEs if the
2055   *	platform doesn't support an interrupt driven mechanism.
2056   *
2057   *  Inputs  :   None
2058   *  Outputs :   Status
2059   */
2060  static int __init
ia64_mca_late_init(void)2061  ia64_mca_late_init(void)
2062  {
2063  	if (!mca_init)
2064  		return 0;
2065  
2066  	/* Setup the CMCI/P vector and handler */
2067  	timer_setup(&cmc_poll_timer, ia64_mca_cmc_poll, 0);
2068  
2069  	/* Unmask/enable the vector */
2070  	cmc_polling_enabled = 0;
2071  	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/mca:online",
2072  			  ia64_mca_cpu_online, NULL);
2073  	IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__);
2074  
2075  	/* Setup the CPEI/P vector and handler */
2076  	cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
2077  	timer_setup(&cpe_poll_timer, ia64_mca_cpe_poll, 0);
2078  
2079  	{
2080  		unsigned int irq;
2081  
2082  		if (cpe_vector >= 0) {
2083  			/* If platform supports CPEI, enable the irq. */
2084  			irq = local_vector_to_irq(cpe_vector);
2085  			if (irq > 0) {
2086  				cpe_poll_enabled = 0;
2087  				irq_set_status_flags(irq, IRQ_PER_CPU);
2088  				if (request_irq(irq, ia64_mca_cpe_int_handler,
2089  						0, "cpe_hndlr", NULL))
2090  					pr_err("Failed to register cpe_hndlr interrupt\n");
2091  				ia64_cpe_irq = irq;
2092  				ia64_mca_register_cpev(cpe_vector);
2093  				IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n",
2094  					__func__);
2095  				return 0;
2096  			}
2097  			printk(KERN_ERR "%s: Failed to find irq for CPE "
2098  					"interrupt handler, vector %d\n",
2099  					__func__, cpe_vector);
2100  		}
2101  		/* If platform doesn't support CPEI, get the timer going. */
2102  		if (cpe_poll_enabled) {
2103  			ia64_mca_cpe_poll(0UL);
2104  			IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __func__);
2105  		}
2106  	}
2107  
2108  	return 0;
2109  }
2110  
2111  device_initcall(ia64_mca_late_init);
2112