xref: /openbmc/linux/arch/ia64/kernel/mca_drv.c (revision 03ab8e6297acd1bc0eedaa050e2a1635c576fd11)
109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * File:	mca_drv.c
41da177e4SLinus Torvalds  * Purpose:	Generic MCA handling layer
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * Copyright (C) 2004 FUJITSU LIMITED
7fe77efb8SHidetoshi Seto  * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
87f613c7dSKeith Owens  * Copyright (C) 2005 Silicon Graphics, Inc
97f613c7dSKeith Owens  * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
10d2a28ad9SRuss Anderson  * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
111da177e4SLinus Torvalds  */
121da177e4SLinus Torvalds #include <linux/types.h>
131da177e4SLinus Torvalds #include <linux/init.h>
141da177e4SLinus Torvalds #include <linux/sched.h>
151da177e4SLinus Torvalds #include <linux/interrupt.h>
161da177e4SLinus Torvalds #include <linux/irq.h>
171da177e4SLinus Torvalds #include <linux/kallsyms.h>
1857c8a661SMike Rapoport #include <linux/memblock.h>
191da177e4SLinus Torvalds #include <linux/acpi.h>
201da177e4SLinus Torvalds #include <linux/timer.h>
211da177e4SLinus Torvalds #include <linux/module.h>
221da177e4SLinus Torvalds #include <linux/kernel.h>
231da177e4SLinus Torvalds #include <linux/smp.h>
241da177e4SLinus Torvalds #include <linux/workqueue.h>
251da177e4SLinus Torvalds #include <linux/mm.h>
265a0e3ad6STejun Heo #include <linux/slab.h>
271da177e4SLinus Torvalds 
281da177e4SLinus Torvalds #include <asm/delay.h>
291da177e4SLinus Torvalds #include <asm/page.h>
301da177e4SLinus Torvalds #include <asm/ptrace.h>
311da177e4SLinus Torvalds #include <asm/sal.h>
321da177e4SLinus Torvalds #include <asm/mca.h>
331da177e4SLinus Torvalds 
341da177e4SLinus Torvalds #include <asm/irq.h>
351da177e4SLinus Torvalds #include <asm/hw_irq.h>
361da177e4SLinus Torvalds 
371da177e4SLinus Torvalds #include "mca_drv.h"
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds /* max size of SAL error record (default) */
401da177e4SLinus Torvalds static int sal_rec_max = 10000;
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds /* from mca_drv_asm.S */
431da177e4SLinus Torvalds extern void *mca_handler_bhhook(void);
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds static DEFINE_SPINLOCK(mca_bh_lock);
461da177e4SLinus Torvalds 
471da177e4SLinus Torvalds typedef enum {
481da177e4SLinus Torvalds 	MCA_IS_LOCAL  = 0,
491da177e4SLinus Torvalds 	MCA_IS_GLOBAL = 1
501da177e4SLinus Torvalds } mca_type_t;
511da177e4SLinus Torvalds 
521da177e4SLinus Torvalds #define MAX_PAGE_ISOLATE 1024
531da177e4SLinus Torvalds 
541da177e4SLinus Torvalds static struct page *page_isolate[MAX_PAGE_ISOLATE];
551da177e4SLinus Torvalds static int num_page_isolate = 0;
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds typedef enum {
584881e2cdSHidetoshi Seto 	ISOLATE_NG,
594881e2cdSHidetoshi Seto 	ISOLATE_OK,
604881e2cdSHidetoshi Seto 	ISOLATE_NONE
611da177e4SLinus Torvalds } isolate_status_t;
621da177e4SLinus Torvalds 
6318997961SRuss Anderson typedef enum {
6418997961SRuss Anderson 	MCA_NOT_RECOVERED = 0,
6518997961SRuss Anderson 	MCA_RECOVERED	  = 1
6618997961SRuss Anderson } recovery_status_t;
6718997961SRuss Anderson 
681da177e4SLinus Torvalds /*
691da177e4SLinus Torvalds  *  This pool keeps pointers to the section part of SAL error record
701da177e4SLinus Torvalds  */
711da177e4SLinus Torvalds static struct {
721da177e4SLinus Torvalds 	slidx_list_t *buffer; /* section pointer list pool */
731da177e4SLinus Torvalds 	int	     cur_idx; /* Current index of section pointer list pool */
741da177e4SLinus Torvalds 	int	     max_idx; /* Maximum index of section pointer list pool */
751da177e4SLinus Torvalds } slidx_pool;
761da177e4SLinus Torvalds 
7718997961SRuss Anderson static int
fatal_mca(const char * fmt,...)7818997961SRuss Anderson fatal_mca(const char *fmt, ...)
7918997961SRuss Anderson {
8018997961SRuss Anderson 	va_list args;
8143ed3bafSHidetoshi Seto 	char buf[256];
8218997961SRuss Anderson 
8318997961SRuss Anderson 	va_start(args, fmt);
8443ed3bafSHidetoshi Seto 	vsnprintf(buf, sizeof(buf), fmt, args);
8518997961SRuss Anderson 	va_end(args);
8643ed3bafSHidetoshi Seto 	ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
8718997961SRuss Anderson 
8818997961SRuss Anderson 	return MCA_NOT_RECOVERED;
8918997961SRuss Anderson }
9018997961SRuss Anderson 
9143ed3bafSHidetoshi Seto static int
mca_recovered(const char * fmt,...)9243ed3bafSHidetoshi Seto mca_recovered(const char *fmt, ...)
9343ed3bafSHidetoshi Seto {
9443ed3bafSHidetoshi Seto 	va_list args;
9543ed3bafSHidetoshi Seto 	char buf[256];
9643ed3bafSHidetoshi Seto 
9743ed3bafSHidetoshi Seto 	va_start(args, fmt);
9843ed3bafSHidetoshi Seto 	vsnprintf(buf, sizeof(buf), fmt, args);
9943ed3bafSHidetoshi Seto 	va_end(args);
10043ed3bafSHidetoshi Seto 	ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
10143ed3bafSHidetoshi Seto 
10243ed3bafSHidetoshi Seto 	return MCA_RECOVERED;
10343ed3bafSHidetoshi Seto }
10443ed3bafSHidetoshi Seto 
1051da177e4SLinus Torvalds /**
1061da177e4SLinus Torvalds  * mca_page_isolate - isolate a poisoned page in order not to use it later
1071da177e4SLinus Torvalds  * @paddr:	poisoned memory location
1081da177e4SLinus Torvalds  *
1091da177e4SLinus Torvalds  * Return value:
1104881e2cdSHidetoshi Seto  *	one of isolate_status_t, ISOLATE_OK/NG/NONE.
1111da177e4SLinus Torvalds  */
1121da177e4SLinus Torvalds 
1131da177e4SLinus Torvalds static isolate_status_t
mca_page_isolate(unsigned long paddr)1141da177e4SLinus Torvalds mca_page_isolate(unsigned long paddr)
1151da177e4SLinus Torvalds {
1161da177e4SLinus Torvalds 	int i;
1171da177e4SLinus Torvalds 	struct page *p;
1181da177e4SLinus Torvalds 
1191da177e4SLinus Torvalds 	/* whether physical address is valid or not */
1201da177e4SLinus Torvalds 	if (!ia64_phys_addr_valid(paddr))
1214881e2cdSHidetoshi Seto 		return ISOLATE_NONE;
1224881e2cdSHidetoshi Seto 
12356f87b82SRuss Anderson 	if (!pfn_valid(paddr >> PAGE_SHIFT))
1244881e2cdSHidetoshi Seto 		return ISOLATE_NONE;
1251da177e4SLinus Torvalds 
1261da177e4SLinus Torvalds 	/* convert physical address to physical page number */
1271da177e4SLinus Torvalds 	p = pfn_to_page(paddr>>PAGE_SHIFT);
1281da177e4SLinus Torvalds 
1291da177e4SLinus Torvalds 	/* check whether a page number have been already registered or not */
1301da177e4SLinus Torvalds 	for (i = 0; i < num_page_isolate; i++)
1311da177e4SLinus Torvalds 		if (page_isolate[i] == p)
1321da177e4SLinus Torvalds 			return ISOLATE_OK; /* already listed */
1331da177e4SLinus Torvalds 
1341da177e4SLinus Torvalds 	/* limitation check */
1351da177e4SLinus Torvalds 	if (num_page_isolate == MAX_PAGE_ISOLATE)
1361da177e4SLinus Torvalds 		return ISOLATE_NG;
1371da177e4SLinus Torvalds 
1381da177e4SLinus Torvalds 	/* kick pages having attribute 'SLAB' or 'Reserved' */
1391da177e4SLinus Torvalds 	if (PageSlab(p) || PageReserved(p))
1401da177e4SLinus Torvalds 		return ISOLATE_NG;
1411da177e4SLinus Torvalds 
1421da177e4SLinus Torvalds 	/* add attribute 'Reserved' and register the page */
143cbb92144SRuss Anderson 	get_page(p);
1441da177e4SLinus Torvalds 	SetPageReserved(p);
1451da177e4SLinus Torvalds 	page_isolate[num_page_isolate++] = p;
1461da177e4SLinus Torvalds 
1471da177e4SLinus Torvalds 	return ISOLATE_OK;
1481da177e4SLinus Torvalds }
1491da177e4SLinus Torvalds 
1501da177e4SLinus Torvalds /**
1511da177e4SLinus Torvalds  * mca_hanlder_bh - Kill the process which occurred memory read error
1521da177e4SLinus Torvalds  * @paddr:	poisoned address received from MCA Handler
1531da177e4SLinus Torvalds  */
1541da177e4SLinus Torvalds 
1551da177e4SLinus Torvalds void
mca_handler_bh(unsigned long paddr,void * iip,unsigned long ipsr)156d2a28ad9SRuss Anderson mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
1571da177e4SLinus Torvalds {
15843ed3bafSHidetoshi Seto 	ia64_mlogbuf_dump();
159d2a28ad9SRuss Anderson 	printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
160d2a28ad9SRuss Anderson 		"iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
1616c1ee033SEric W. Biederman 	       raw_smp_processor_id(), current->pid,
1626c1ee033SEric W. Biederman 		from_kuid(&init_user_ns, current_uid()),
163d2a28ad9SRuss Anderson 		iip, ipsr, paddr, current->comm);
1641da177e4SLinus Torvalds 
1651da177e4SLinus Torvalds 	spin_lock(&mca_bh_lock);
1664881e2cdSHidetoshi Seto 	switch (mca_page_isolate(paddr)) {
1674881e2cdSHidetoshi Seto 	case ISOLATE_OK:
1681da177e4SLinus Torvalds 		printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
1694881e2cdSHidetoshi Seto 		break;
1704881e2cdSHidetoshi Seto 	case ISOLATE_NG:
171ea0e92a6SRuss Anderson 		printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr);
1724881e2cdSHidetoshi Seto 		break;
1734881e2cdSHidetoshi Seto 	default:
1744881e2cdSHidetoshi Seto 		break;
1751da177e4SLinus Torvalds 	}
1761da177e4SLinus Torvalds 	spin_unlock(&mca_bh_lock);
1771da177e4SLinus Torvalds 
1781da177e4SLinus Torvalds 	/* This process is about to be killed itself */
179*0e25498fSEric W. Biederman 	make_task_dead(SIGKILL);
1801da177e4SLinus Torvalds }
1811da177e4SLinus Torvalds 
1821da177e4SLinus Torvalds /**
1831da177e4SLinus Torvalds  * mca_make_peidx - Make index of processor error section
1841da177e4SLinus Torvalds  * @slpi:	pointer to record of processor error section
1851da177e4SLinus Torvalds  * @peidx:	pointer to index of processor error section
1861da177e4SLinus Torvalds  */
1871da177e4SLinus Torvalds 
1881da177e4SLinus Torvalds static void
mca_make_peidx(sal_log_processor_info_t * slpi,peidx_table_t * peidx)1891da177e4SLinus Torvalds mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
1901da177e4SLinus Torvalds {
1911da177e4SLinus Torvalds 	/*
1921da177e4SLinus Torvalds 	 * calculate the start address of
1931da177e4SLinus Torvalds 	 *   "struct cpuid_info" and "sal_processor_static_info_t".
1941da177e4SLinus Torvalds 	 */
1951da177e4SLinus Torvalds 	u64 total_check_num = slpi->valid.num_cache_check
1961da177e4SLinus Torvalds 				+ slpi->valid.num_tlb_check
1971da177e4SLinus Torvalds 				+ slpi->valid.num_bus_check
1981da177e4SLinus Torvalds 				+ slpi->valid.num_reg_file_check
1991da177e4SLinus Torvalds 				+ slpi->valid.num_ms_check;
2001da177e4SLinus Torvalds 	u64 head_size =	sizeof(sal_log_mod_error_info_t) * total_check_num
2011da177e4SLinus Torvalds 			+ sizeof(sal_log_processor_info_t);
2021da177e4SLinus Torvalds 	u64 mid_size  = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
2031da177e4SLinus Torvalds 
2041da177e4SLinus Torvalds 	peidx_head(peidx)   = slpi;
2051da177e4SLinus Torvalds 	peidx_mid(peidx)    = (struct sal_cpuid_info *)
2061da177e4SLinus Torvalds 		(slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
2071da177e4SLinus Torvalds 	peidx_bottom(peidx) = (sal_processor_static_info_t *)
2081da177e4SLinus Torvalds 		(slpi->valid.psi_static_struct ?
2091da177e4SLinus Torvalds 			((char*)slpi + head_size + mid_size) : NULL);
2101da177e4SLinus Torvalds }
2111da177e4SLinus Torvalds 
2121da177e4SLinus Torvalds /**
2131da177e4SLinus Torvalds  * mca_make_slidx -  Make index of SAL error record
2141da177e4SLinus Torvalds  * @buffer:	pointer to SAL error record
2151da177e4SLinus Torvalds  * @slidx:	pointer to index of SAL error record
2161da177e4SLinus Torvalds  *
2171da177e4SLinus Torvalds  * Return value:
2181da177e4SLinus Torvalds  *	1 if record has platform error / 0 if not
2191da177e4SLinus Torvalds  */
2201da177e4SLinus Torvalds #define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
2211da177e4SLinus Torvalds 	{slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
2221da177e4SLinus Torvalds 	hl->hdr = ptr; \
2231da177e4SLinus Torvalds 	list_add(&hl->list, &(sect)); \
2241da177e4SLinus Torvalds 	slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
2251da177e4SLinus Torvalds 
2261da177e4SLinus Torvalds static int
mca_make_slidx(void * buffer,slidx_table_t * slidx)2271da177e4SLinus Torvalds mca_make_slidx(void *buffer, slidx_table_t *slidx)
2281da177e4SLinus Torvalds {
2291da177e4SLinus Torvalds 	int platform_err = 0;
2301da177e4SLinus Torvalds 	int record_len = ((sal_log_record_header_t*)buffer)->len;
2311da177e4SLinus Torvalds 	u32 ercd_pos;
2321da177e4SLinus Torvalds 	int sects;
2331da177e4SLinus Torvalds 	sal_log_section_hdr_t *sp;
2341da177e4SLinus Torvalds 
2351da177e4SLinus Torvalds 	/*
2361da177e4SLinus Torvalds 	 * Initialize index referring current record
2371da177e4SLinus Torvalds 	 */
2381da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->proc_err));
2391da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->mem_dev_err));
2401da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->sel_dev_err));
2411da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->pci_bus_err));
2421da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->smbios_dev_err));
2431da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->pci_comp_err));
2441da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->plat_specific_err));
2451da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->host_ctlr_err));
2461da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->plat_bus_err));
2471da177e4SLinus Torvalds 	INIT_LIST_HEAD(&(slidx->unsupported));
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds 	/*
2501da177e4SLinus Torvalds 	 * Extract a Record Header
2511da177e4SLinus Torvalds 	 */
2521da177e4SLinus Torvalds 	slidx->header = buffer;
2531da177e4SLinus Torvalds 
2541da177e4SLinus Torvalds 	/*
2551da177e4SLinus Torvalds 	 * Extract each section records
2561da177e4SLinus Torvalds 	 * (arranged from "int ia64_log_platform_info_print()")
2571da177e4SLinus Torvalds 	 */
2581da177e4SLinus Torvalds 	for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
2591da177e4SLinus Torvalds 		ercd_pos < record_len; ercd_pos += sp->len, sects++) {
2601da177e4SLinus Torvalds 		sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
2611da177e4SLinus Torvalds 		if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
2621da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
26320305e59SHidetoshi Seto 		} else if (!efi_guidcmp(sp->guid,
26420305e59SHidetoshi Seto 				SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
2651da177e4SLinus Torvalds 			platform_err = 1;
2661da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
26720305e59SHidetoshi Seto 		} else if (!efi_guidcmp(sp->guid,
26820305e59SHidetoshi Seto 				SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
2691da177e4SLinus Torvalds 			platform_err = 1;
2701da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
27120305e59SHidetoshi Seto 		} else if (!efi_guidcmp(sp->guid,
27220305e59SHidetoshi Seto 				SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
2731da177e4SLinus Torvalds 			platform_err = 1;
2741da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
27520305e59SHidetoshi Seto 		} else if (!efi_guidcmp(sp->guid,
27620305e59SHidetoshi Seto 				SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
2771da177e4SLinus Torvalds 			platform_err = 1;
2781da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
27920305e59SHidetoshi Seto 		} else if (!efi_guidcmp(sp->guid,
28020305e59SHidetoshi Seto 				SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
2811da177e4SLinus Torvalds 			platform_err = 1;
2821da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
28320305e59SHidetoshi Seto 		} else if (!efi_guidcmp(sp->guid,
28420305e59SHidetoshi Seto 				SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
2851da177e4SLinus Torvalds 			platform_err = 1;
2861da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
28720305e59SHidetoshi Seto 		} else if (!efi_guidcmp(sp->guid,
28820305e59SHidetoshi Seto 				SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
2891da177e4SLinus Torvalds 			platform_err = 1;
2901da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
29120305e59SHidetoshi Seto 		} else if (!efi_guidcmp(sp->guid,
29220305e59SHidetoshi Seto 				SAL_PLAT_BUS_ERR_SECT_GUID)) {
2931da177e4SLinus Torvalds 			platform_err = 1;
2941da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
2951da177e4SLinus Torvalds 		} else {
2961da177e4SLinus Torvalds 			LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
2971da177e4SLinus Torvalds 		}
2981da177e4SLinus Torvalds 	}
2991da177e4SLinus Torvalds 	slidx->n_sections = sects;
3001da177e4SLinus Torvalds 
3011da177e4SLinus Torvalds 	return platform_err;
3021da177e4SLinus Torvalds }
3031da177e4SLinus Torvalds 
3041da177e4SLinus Torvalds /**
3051da177e4SLinus Torvalds  * init_record_index_pools - Initialize pool of lists for SAL record index
3061da177e4SLinus Torvalds  *
3071da177e4SLinus Torvalds  * Return value:
3081da177e4SLinus Torvalds  *	0 on Success / -ENOMEM on Failure
3091da177e4SLinus Torvalds  */
3101da177e4SLinus Torvalds static int
init_record_index_pools(void)3111da177e4SLinus Torvalds init_record_index_pools(void)
3121da177e4SLinus Torvalds {
3131da177e4SLinus Torvalds 	int i;
3141da177e4SLinus Torvalds 	int rec_max_size;  /* Maximum size of SAL error records */
3151da177e4SLinus Torvalds 	int sect_min_size; /* Minimum size of SAL error sections */
3161da177e4SLinus Torvalds 	/* minimum size table of each section */
3171da177e4SLinus Torvalds 	static int sal_log_sect_min_sizes[] = {
31820305e59SHidetoshi Seto 		sizeof(sal_log_processor_info_t)
31920305e59SHidetoshi Seto 		+ sizeof(sal_processor_static_info_t),
3201da177e4SLinus Torvalds 		sizeof(sal_log_mem_dev_err_info_t),
3211da177e4SLinus Torvalds 		sizeof(sal_log_sel_dev_err_info_t),
3221da177e4SLinus Torvalds 		sizeof(sal_log_pci_bus_err_info_t),
3231da177e4SLinus Torvalds 		sizeof(sal_log_smbios_dev_err_info_t),
3241da177e4SLinus Torvalds 		sizeof(sal_log_pci_comp_err_info_t),
3251da177e4SLinus Torvalds 		sizeof(sal_log_plat_specific_err_info_t),
3261da177e4SLinus Torvalds 		sizeof(sal_log_host_ctlr_err_info_t),
3271da177e4SLinus Torvalds 		sizeof(sal_log_plat_bus_err_info_t),
3281da177e4SLinus Torvalds 	};
3291da177e4SLinus Torvalds 
3301da177e4SLinus Torvalds 	/*
3311da177e4SLinus Torvalds 	 * MCA handler cannot allocate new memory on flight,
3321da177e4SLinus Torvalds 	 * so we preallocate enough memory to handle a SAL record.
3331da177e4SLinus Torvalds 	 *
3341da177e4SLinus Torvalds 	 * Initialize a handling set of slidx_pool:
3351da177e4SLinus Torvalds 	 *   1. Pick up the max size of SAL error records
3361da177e4SLinus Torvalds 	 *   2. Pick up the min size of SAL error sections
3371da177e4SLinus Torvalds 	 *   3. Allocate the pool as enough to 2 SAL records
3381da177e4SLinus Torvalds 	 *     (now we can estimate the maxinum of section in a record.)
3391da177e4SLinus Torvalds 	 */
3401da177e4SLinus Torvalds 
3411da177e4SLinus Torvalds 	/* - 1 - */
3421da177e4SLinus Torvalds 	rec_max_size = sal_rec_max;
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds 	/* - 2 - */
3451da177e4SLinus Torvalds 	sect_min_size = sal_log_sect_min_sizes[0];
346c5f320ffSArnd Bergmann 	for (i = 1; i < ARRAY_SIZE(sal_log_sect_min_sizes); i++)
3471da177e4SLinus Torvalds 		if (sect_min_size > sal_log_sect_min_sizes[i])
3481da177e4SLinus Torvalds 			sect_min_size = sal_log_sect_min_sizes[i];
3491da177e4SLinus Torvalds 
3501da177e4SLinus Torvalds 	/* - 3 - */
3511da177e4SLinus Torvalds 	slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
3527c13e0d1SZhang Yanfei 	slidx_pool.buffer =
3536da2ec56SKees Cook 		kmalloc_array(slidx_pool.max_idx, sizeof(slidx_list_t),
3546da2ec56SKees Cook 			      GFP_KERNEL);
3551da177e4SLinus Torvalds 
3561da177e4SLinus Torvalds 	return slidx_pool.buffer ? 0 : -ENOMEM;
3571da177e4SLinus Torvalds }
3581da177e4SLinus Torvalds 
3591da177e4SLinus Torvalds 
3601da177e4SLinus Torvalds /*****************************************************************************
3611da177e4SLinus Torvalds  * Recovery functions                                                        *
3621da177e4SLinus Torvalds  *****************************************************************************/
3631da177e4SLinus Torvalds 
3641da177e4SLinus Torvalds /**
3651da177e4SLinus Torvalds  * is_mca_global - Check whether this MCA is global or not
3661da177e4SLinus Torvalds  * @peidx:	pointer of index of processor error section
3671da177e4SLinus Torvalds  * @pbci:	pointer to pal_bus_check_info_t
36820305e59SHidetoshi Seto  * @sos:	pointer to hand off struct between SAL and OS
3691da177e4SLinus Torvalds  *
3701da177e4SLinus Torvalds  * Return value:
3711da177e4SLinus Torvalds  *	MCA_IS_LOCAL / MCA_IS_GLOBAL
3721da177e4SLinus Torvalds  */
3731da177e4SLinus Torvalds 
3741da177e4SLinus Torvalds static mca_type_t
is_mca_global(peidx_table_t * peidx,pal_bus_check_info_t * pbci,struct ia64_sal_os_state * sos)3757f613c7dSKeith Owens is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
3767f613c7dSKeith Owens 	      struct ia64_sal_os_state *sos)
3771da177e4SLinus Torvalds {
37820305e59SHidetoshi Seto 	pal_processor_state_info_t *psp =
37920305e59SHidetoshi Seto 		(pal_processor_state_info_t*)peidx_psp(peidx);
3801da177e4SLinus Torvalds 
3811da177e4SLinus Torvalds 	/*
3821da177e4SLinus Torvalds 	 * PAL can request a rendezvous, if the MCA has a global scope.
3831da177e4SLinus Torvalds 	 * If "rz_always" flag is set, SAL requests MCA rendezvous
3841da177e4SLinus Torvalds 	 * in spite of global MCA.
3851da177e4SLinus Torvalds 	 * Therefore it is local MCA when rendezvous has not been requested.
3861da177e4SLinus Torvalds 	 * Failed to rendezvous, the system must be down.
3871da177e4SLinus Torvalds 	 */
3887f613c7dSKeith Owens 	switch (sos->rv_rc) {
3891da177e4SLinus Torvalds 		case -1: /* SAL rendezvous unsuccessful */
3901da177e4SLinus Torvalds 			return MCA_IS_GLOBAL;
3911da177e4SLinus Torvalds 		case  0: /* SAL rendezvous not required */
3921da177e4SLinus Torvalds 			return MCA_IS_LOCAL;
3931da177e4SLinus Torvalds 		case  1: /* SAL rendezvous successful int */
3941da177e4SLinus Torvalds 		case  2: /* SAL rendezvous successful int with init */
3951da177e4SLinus Torvalds 		default:
3961da177e4SLinus Torvalds 			break;
3971da177e4SLinus Torvalds 	}
3981da177e4SLinus Torvalds 
3991da177e4SLinus Torvalds 	/*
4001da177e4SLinus Torvalds 	 * If One or more Cache/TLB/Reg_File/Uarch_Check is here,
4011da177e4SLinus Torvalds 	 * it would be a local MCA. (i.e. processor internal error)
4021da177e4SLinus Torvalds 	 */
4031da177e4SLinus Torvalds 	if (psp->tc || psp->cc || psp->rc || psp->uc)
4041da177e4SLinus Torvalds 		return MCA_IS_LOCAL;
4051da177e4SLinus Torvalds 
4061da177e4SLinus Torvalds 	/*
4071da177e4SLinus Torvalds 	 * Bus_Check structure with Bus_Check.ib (internal bus error) flag set
4081da177e4SLinus Torvalds 	 * would be a global MCA. (e.g. a system bus address parity error)
4091da177e4SLinus Torvalds 	 */
4101da177e4SLinus Torvalds 	if (!pbci || pbci->ib)
4111da177e4SLinus Torvalds 		return MCA_IS_GLOBAL;
4121da177e4SLinus Torvalds 
4131da177e4SLinus Torvalds 	/*
4141da177e4SLinus Torvalds 	 * Bus_Check structure with Bus_Check.eb (external bus error) flag set
4151da177e4SLinus Torvalds 	 * could be either a local MCA or a global MCA.
4161da177e4SLinus Torvalds 	 *
4171da177e4SLinus Torvalds 	 * Referring Bus_Check.bsi:
4181da177e4SLinus Torvalds 	 *   0: Unknown/unclassified
4191da177e4SLinus Torvalds 	 *   1: BERR#
4201da177e4SLinus Torvalds 	 *   2: BINIT#
4211da177e4SLinus Torvalds 	 *   3: Hard Fail
4221da177e4SLinus Torvalds 	 * (FIXME: Are these SGI specific or generic bsi values?)
4231da177e4SLinus Torvalds 	 */
4241da177e4SLinus Torvalds 	if (pbci->eb)
4251da177e4SLinus Torvalds 		switch (pbci->bsi) {
4261da177e4SLinus Torvalds 			case 0:
4271da177e4SLinus Torvalds 				/* e.g. a load from poisoned memory */
4281da177e4SLinus Torvalds 				return MCA_IS_LOCAL;
4291da177e4SLinus Torvalds 			case 1:
4301da177e4SLinus Torvalds 			case 2:
4311da177e4SLinus Torvalds 			case 3:
4321da177e4SLinus Torvalds 				return MCA_IS_GLOBAL;
4331da177e4SLinus Torvalds 		}
4341da177e4SLinus Torvalds 
4351da177e4SLinus Torvalds 	return MCA_IS_GLOBAL;
4361da177e4SLinus Torvalds }
4371da177e4SLinus Torvalds 
4381da177e4SLinus Torvalds /**
439264b0f99SRuss Anderson  * get_target_identifier - Get the valid Cache or Bus check target identifier.
440264b0f99SRuss Anderson  * @peidx:	pointer of index of processor error section
441264b0f99SRuss Anderson  *
442264b0f99SRuss Anderson  * Return value:
44372fdbdceSSimon Arlott  *	target address on Success / 0 on Failure
444264b0f99SRuss Anderson  */
445264b0f99SRuss Anderson static u64
get_target_identifier(peidx_table_t * peidx)446264b0f99SRuss Anderson get_target_identifier(peidx_table_t *peidx)
447264b0f99SRuss Anderson {
448264b0f99SRuss Anderson 	u64 target_address = 0;
449264b0f99SRuss Anderson 	sal_log_mod_error_info_t *smei;
450264b0f99SRuss Anderson 	pal_cache_check_info_t *pcci;
451264b0f99SRuss Anderson 	int i, level = 9;
452264b0f99SRuss Anderson 
453264b0f99SRuss Anderson 	/*
454264b0f99SRuss Anderson 	 * Look through the cache checks for a valid target identifier
455264b0f99SRuss Anderson 	 * If more than one valid target identifier, return the one
456264b0f99SRuss Anderson 	 * with the lowest cache level.
457264b0f99SRuss Anderson 	 */
458264b0f99SRuss Anderson 	for (i = 0; i < peidx_cache_check_num(peidx); i++) {
459264b0f99SRuss Anderson 		smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i);
460264b0f99SRuss Anderson 		if (smei->valid.target_identifier && smei->target_identifier) {
461264b0f99SRuss Anderson 			pcci = (pal_cache_check_info_t *)&(smei->check_info);
462264b0f99SRuss Anderson 			if (!target_address || (pcci->level < level)) {
463264b0f99SRuss Anderson 				target_address = smei->target_identifier;
464264b0f99SRuss Anderson 				level = pcci->level;
465264b0f99SRuss Anderson 				continue;
466264b0f99SRuss Anderson 			}
467264b0f99SRuss Anderson 		}
468264b0f99SRuss Anderson 	}
469264b0f99SRuss Anderson 	if (target_address)
470264b0f99SRuss Anderson 		return target_address;
471264b0f99SRuss Anderson 
472264b0f99SRuss Anderson 	/*
473264b0f99SRuss Anderson 	 * Look at the bus check for a valid target identifier
474264b0f99SRuss Anderson 	 */
475264b0f99SRuss Anderson 	smei = peidx_bus_check(peidx, 0);
476264b0f99SRuss Anderson 	if (smei && smei->valid.target_identifier)
477264b0f99SRuss Anderson 		return smei->target_identifier;
478264b0f99SRuss Anderson 
479264b0f99SRuss Anderson 	return 0;
480264b0f99SRuss Anderson }
481264b0f99SRuss Anderson 
482264b0f99SRuss Anderson /**
4831da177e4SLinus Torvalds  * recover_from_read_error - Try to recover the errors which type are "read"s.
4841da177e4SLinus Torvalds  * @slidx:	pointer of index of SAL error record
4851da177e4SLinus Torvalds  * @peidx:	pointer of index of processor error section
4861da177e4SLinus Torvalds  * @pbci:	pointer of pal_bus_check_info
48720305e59SHidetoshi Seto  * @sos:	pointer to hand off struct between SAL and OS
4881da177e4SLinus Torvalds  *
4891da177e4SLinus Torvalds  * Return value:
4901da177e4SLinus Torvalds  *	1 on Success / 0 on Failure
4911da177e4SLinus Torvalds  */
4921da177e4SLinus Torvalds 
4931da177e4SLinus Torvalds static int
recover_from_read_error(slidx_table_t * slidx,peidx_table_t * peidx,pal_bus_check_info_t * pbci,struct ia64_sal_os_state * sos)49420305e59SHidetoshi Seto recover_from_read_error(slidx_table_t *slidx,
49520305e59SHidetoshi Seto 			peidx_table_t *peidx, pal_bus_check_info_t *pbci,
4967f613c7dSKeith Owens 			struct ia64_sal_os_state *sos)
4971da177e4SLinus Torvalds {
498264b0f99SRuss Anderson 	u64 target_identifier;
4992770ef7cSMasahiro Yamada 	struct pal_min_state_area *pmsa;
5001da177e4SLinus Torvalds 	struct ia64_psr *psr1, *psr2;
5011da177e4SLinus Torvalds 	ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
5021da177e4SLinus Torvalds 
5031da177e4SLinus Torvalds 	/* Is target address valid? */
504264b0f99SRuss Anderson 	target_identifier = get_target_identifier(peidx);
505264b0f99SRuss Anderson 	if (!target_identifier)
50643ed3bafSHidetoshi Seto 		return fatal_mca("target address not valid");
5071da177e4SLinus Torvalds 
5081da177e4SLinus Torvalds 	/*
5091da177e4SLinus Torvalds 	 * cpu read or memory-mapped io read
5101da177e4SLinus Torvalds 	 *
5111da177e4SLinus Torvalds 	 *    offending process  affected process  OS MCA do
5121da177e4SLinus Torvalds 	 *     kernel mode        kernel mode       down system
5131da177e4SLinus Torvalds 	 *     kernel mode        user   mode       kill the process
5141da177e4SLinus Torvalds 	 *     user   mode        kernel mode       down system (*)
5151da177e4SLinus Torvalds 	 *     user   mode        user   mode       kill the process
5161da177e4SLinus Torvalds 	 *
5171da177e4SLinus Torvalds 	 * (*) You could terminate offending user-mode process
5181da177e4SLinus Torvalds 	 *    if (pbci->pv && pbci->pl != 0) *and* if you sure
5191da177e4SLinus Torvalds 	 *    the process not have any locks of kernel.
5201da177e4SLinus Torvalds 	 */
5211da177e4SLinus Torvalds 
522a9474646SHidetoshi Seto 	/* Is minstate valid? */
523a9474646SHidetoshi Seto 	if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
52443ed3bafSHidetoshi Seto 		return fatal_mca("minstate not valid");
5251da177e4SLinus Torvalds 	psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
526d2a28ad9SRuss Anderson 	psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
5271da177e4SLinus Torvalds 
5281da177e4SLinus Torvalds 	/*
5291da177e4SLinus Torvalds 	 *  Check the privilege level of interrupted context.
5301da177e4SLinus Torvalds 	 *   If it is user-mode, then terminate affected process.
5311da177e4SLinus Torvalds 	 */
532d2a28ad9SRuss Anderson 
533d2a28ad9SRuss Anderson 	pmsa = sos->pal_min_state;
534d2a28ad9SRuss Anderson 	if (psr1->cpl != 0 ||
535d2a28ad9SRuss Anderson 	   ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
5361da177e4SLinus Torvalds 		/*
5371da177e4SLinus Torvalds 		 *  setup for resume to bottom half of MCA,
5381da177e4SLinus Torvalds 		 * "mca_handler_bhhook"
5391da177e4SLinus Torvalds 		 */
540d2a28ad9SRuss Anderson 		/* pass to bhhook as argument (gr8, ...) */
541264b0f99SRuss Anderson 		pmsa->pmsa_gr[8-1] = target_identifier;
542d2a28ad9SRuss Anderson 		pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
543d2a28ad9SRuss Anderson 		pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
5441da177e4SLinus Torvalds 		/* set interrupted return address (but no use) */
5451da177e4SLinus Torvalds 		pmsa->pmsa_br0 = pmsa->pmsa_iip;
5461da177e4SLinus Torvalds 		/* change resume address to bottom half */
5471da177e4SLinus Torvalds 		pmsa->pmsa_iip = mca_hdlr_bh->fp;
5481da177e4SLinus Torvalds 		pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
5491da177e4SLinus Torvalds 		/* set cpl with kernel mode */
5501da177e4SLinus Torvalds 		psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
5511da177e4SLinus Torvalds 		psr2->cpl = 0;
5521da177e4SLinus Torvalds 		psr2->ri  = 0;
553d2a28ad9SRuss Anderson 		psr2->bn  = 1;
554b1b901c2SRuss Anderson 		psr2->i  = 0;
5551da177e4SLinus Torvalds 
55643ed3bafSHidetoshi Seto 		return mca_recovered("user memory corruption. "
55743ed3bafSHidetoshi Seto 				"kill affected process - recovered.");
5581da177e4SLinus Torvalds 	}
5591da177e4SLinus Torvalds 
56043ed3bafSHidetoshi Seto 	return fatal_mca("kernel context not recovered, iip 0x%lx\n",
56143ed3bafSHidetoshi Seto 			 pmsa->pmsa_iip);
5621da177e4SLinus Torvalds }
5631da177e4SLinus Torvalds 
5641da177e4SLinus Torvalds /**
5651da177e4SLinus Torvalds  * recover_from_platform_error - Recover from platform error.
5661da177e4SLinus Torvalds  * @slidx:	pointer of index of SAL error record
5671da177e4SLinus Torvalds  * @peidx:	pointer of index of processor error section
5681da177e4SLinus Torvalds  * @pbci:	pointer of pal_bus_check_info
56920305e59SHidetoshi Seto  * @sos:	pointer to hand off struct between SAL and OS
5701da177e4SLinus Torvalds  *
5711da177e4SLinus Torvalds  * Return value:
5721da177e4SLinus Torvalds  *	1 on Success / 0 on Failure
5731da177e4SLinus Torvalds  */
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds static int
recover_from_platform_error(slidx_table_t * slidx,peidx_table_t * peidx,pal_bus_check_info_t * pbci,struct ia64_sal_os_state * sos)57620305e59SHidetoshi Seto recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
57720305e59SHidetoshi Seto 			    pal_bus_check_info_t *pbci,
5787f613c7dSKeith Owens 			    struct ia64_sal_os_state *sos)
5791da177e4SLinus Torvalds {
5801da177e4SLinus Torvalds 	int status = 0;
58120305e59SHidetoshi Seto 	pal_processor_state_info_t *psp =
58220305e59SHidetoshi Seto 		(pal_processor_state_info_t*)peidx_psp(peidx);
5831da177e4SLinus Torvalds 
5841da177e4SLinus Torvalds 	if (psp->bc && pbci->eb && pbci->bsi == 0) {
5851da177e4SLinus Torvalds 		switch(pbci->type) {
5861da177e4SLinus Torvalds 		case 1: /* partial read */
5871da177e4SLinus Torvalds 		case 3: /* full line(cpu) read */
5881da177e4SLinus Torvalds 		case 9: /* I/O space read */
58920305e59SHidetoshi Seto 			status = recover_from_read_error(slidx, peidx, pbci,
59020305e59SHidetoshi Seto 							 sos);
5911da177e4SLinus Torvalds 			break;
5921da177e4SLinus Torvalds 		case 0: /* unknown */
5931da177e4SLinus Torvalds 		case 2: /* partial write */
5941da177e4SLinus Torvalds 		case 4: /* full line write */
5951da177e4SLinus Torvalds 		case 5: /* implicit or explicit write-back operation */
5961da177e4SLinus Torvalds 		case 6: /* snoop probe */
5971da177e4SLinus Torvalds 		case 7: /* incoming or outgoing ptc.g */
5981da177e4SLinus Torvalds 		case 8: /* write coalescing transactions */
5991da177e4SLinus Torvalds 		case 10: /* I/O space write */
6001da177e4SLinus Torvalds 		case 11: /* inter-processor interrupt message(IPI) */
60120305e59SHidetoshi Seto 		case 12: /* interrupt acknowledge or
60220305e59SHidetoshi Seto 				external task priority cycle */
6031da177e4SLinus Torvalds 		default:
6041da177e4SLinus Torvalds 			break;
6051da177e4SLinus Torvalds 		}
606396e8e76SRuss Anderson 	} else if (psp->cc && !psp->bc) {	/* Cache error */
607396e8e76SRuss Anderson 		status = recover_from_read_error(slidx, peidx, pbci, sos);
6081da177e4SLinus Torvalds 	}
6091da177e4SLinus Torvalds 
6101da177e4SLinus Torvalds 	return status;
6111da177e4SLinus Torvalds }
6121da177e4SLinus Torvalds 
613618b206fSRuss Anderson /*
614618b206fSRuss Anderson  * recover_from_tlb_check
615618b206fSRuss Anderson  * @peidx:	pointer of index of processor error section
616618b206fSRuss Anderson  *
617618b206fSRuss Anderson  * Return value:
618618b206fSRuss Anderson  *	1 on Success / 0 on Failure
619618b206fSRuss Anderson  */
620618b206fSRuss Anderson static int
recover_from_tlb_check(peidx_table_t * peidx)621618b206fSRuss Anderson recover_from_tlb_check(peidx_table_t *peidx)
622618b206fSRuss Anderson {
623618b206fSRuss Anderson 	sal_log_mod_error_info_t *smei;
624618b206fSRuss Anderson 	pal_tlb_check_info_t *ptci;
625618b206fSRuss Anderson 
626618b206fSRuss Anderson 	smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
627618b206fSRuss Anderson 	ptci = (pal_tlb_check_info_t *)&(smei->check_info);
628618b206fSRuss Anderson 
629618b206fSRuss Anderson 	/*
630618b206fSRuss Anderson 	 * Look for signature of a duplicate TLB DTC entry, which is
631618b206fSRuss Anderson 	 * a SW bug and always fatal.
632618b206fSRuss Anderson 	 */
633618b206fSRuss Anderson 	if (ptci->op == PAL_TLB_CHECK_OP_PURGE
634618b206fSRuss Anderson 	    && !(ptci->itr || ptci->dtc || ptci->itc))
635618b206fSRuss Anderson 		return fatal_mca("Duplicate TLB entry");
636618b206fSRuss Anderson 
637618b206fSRuss Anderson 	return mca_recovered("TLB check recovered");
638618b206fSRuss Anderson }
639618b206fSRuss Anderson 
6401da177e4SLinus Torvalds /**
6411da177e4SLinus Torvalds  * recover_from_processor_error
6421da177e4SLinus Torvalds  * @platform:	whether there are some platform error section or not
6431da177e4SLinus Torvalds  * @slidx:	pointer of index of SAL error record
6441da177e4SLinus Torvalds  * @peidx:	pointer of index of processor error section
6451da177e4SLinus Torvalds  * @pbci:	pointer of pal_bus_check_info
64620305e59SHidetoshi Seto  * @sos:	pointer to hand off struct between SAL and OS
6471da177e4SLinus Torvalds  *
6481da177e4SLinus Torvalds  * Return value:
6491da177e4SLinus Torvalds  *	1 on Success / 0 on Failure
6501da177e4SLinus Torvalds  */
6511da177e4SLinus Torvalds 
6521da177e4SLinus Torvalds static int
recover_from_processor_error(int platform,slidx_table_t * slidx,peidx_table_t * peidx,pal_bus_check_info_t * pbci,struct ia64_sal_os_state * sos)65320305e59SHidetoshi Seto recover_from_processor_error(int platform, slidx_table_t *slidx,
65420305e59SHidetoshi Seto 			     peidx_table_t *peidx, pal_bus_check_info_t *pbci,
6557f613c7dSKeith Owens 			     struct ia64_sal_os_state *sos)
6561da177e4SLinus Torvalds {
65720305e59SHidetoshi Seto 	pal_processor_state_info_t *psp =
65820305e59SHidetoshi Seto 		(pal_processor_state_info_t*)peidx_psp(peidx);
6591da177e4SLinus Torvalds 
6601da177e4SLinus Torvalds 	/*
661a14f25a0SRuss Anderson 	 * Processor recovery status must key off of the PAL recovery
662a14f25a0SRuss Anderson 	 * status in the Processor State Parameter.
6631da177e4SLinus Torvalds 	 */
664a14f25a0SRuss Anderson 
665a14f25a0SRuss Anderson 	/*
666a14f25a0SRuss Anderson 	 * The machine check is corrected.
667a14f25a0SRuss Anderson 	 */
668a14f25a0SRuss Anderson 	if (psp->cm == 1)
66943ed3bafSHidetoshi Seto 		return mca_recovered("machine check is already corrected.");
670a14f25a0SRuss Anderson 
671a14f25a0SRuss Anderson 	/*
672a14f25a0SRuss Anderson 	 * The error was not contained.  Software must be reset.
673a14f25a0SRuss Anderson 	 */
674a14f25a0SRuss Anderson 	if (psp->us || psp->ci == 0)
67543ed3bafSHidetoshi Seto 		return fatal_mca("error not contained");
6761da177e4SLinus Torvalds 
6771da177e4SLinus Torvalds 	/*
678618b206fSRuss Anderson 	 * Look for recoverable TLB check
679618b206fSRuss Anderson 	 */
680618b206fSRuss Anderson 	if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
681618b206fSRuss Anderson 		return recover_from_tlb_check(peidx);
682618b206fSRuss Anderson 
683618b206fSRuss Anderson 	/*
684e1c48554SRuss Anderson 	 * The cache check and bus check bits have four possible states
685e1c48554SRuss Anderson 	 *   cc bc
686e1c48554SRuss Anderson 	 *    1  1	Memory error, attempt recovery
687396e8e76SRuss Anderson 	 *    1  0	Cache error, attempt recovery
688396e8e76SRuss Anderson 	 *    0  1	I/O error, attempt recovery
689396e8e76SRuss Anderson 	 *    0  0	Other error type, not recovered
6901da177e4SLinus Torvalds 	 */
691396e8e76SRuss Anderson 	if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL))
692396e8e76SRuss Anderson 		return fatal_mca("No cache or bus check");
6931da177e4SLinus Torvalds 
6941da177e4SLinus Torvalds 	/*
695396e8e76SRuss Anderson 	 * Cannot handle more than one bus check.
6961da177e4SLinus Torvalds 	 */
6971da177e4SLinus Torvalds 	if (peidx_bus_check_num(peidx) > 1)
69843ed3bafSHidetoshi Seto 		return fatal_mca("Too many bus checks");
699396e8e76SRuss Anderson 
70018997961SRuss Anderson 	if (pbci->ib)
70143ed3bafSHidetoshi Seto 		return fatal_mca("Internal Bus error");
7021da177e4SLinus Torvalds 	if (pbci->eb && pbci->bsi > 0)
70343ed3bafSHidetoshi Seto 		return fatal_mca("External bus check fatal status");
7041da177e4SLinus Torvalds 
7051da177e4SLinus Torvalds 	/*
70672fdbdceSSimon Arlott 	 * This is a local MCA and estimated as a recoverable error.
7071da177e4SLinus Torvalds 	 */
7081da177e4SLinus Torvalds 	if (platform)
7097f613c7dSKeith Owens 		return recover_from_platform_error(slidx, peidx, pbci, sos);
710396e8e76SRuss Anderson 
7111da177e4SLinus Torvalds 	/*
7121da177e4SLinus Torvalds 	 * On account of strange SAL error record, we cannot recover.
7131da177e4SLinus Torvalds 	 */
71443ed3bafSHidetoshi Seto 	return fatal_mca("Strange SAL record");
7151da177e4SLinus Torvalds }
7161da177e4SLinus Torvalds 
7171da177e4SLinus Torvalds /**
7181da177e4SLinus Torvalds  * mca_try_to_recover - Try to recover from MCA
7191da177e4SLinus Torvalds  * @rec:	pointer to a SAL error record
72020305e59SHidetoshi Seto  * @sos:	pointer to hand off struct between SAL and OS
7211da177e4SLinus Torvalds  *
7221da177e4SLinus Torvalds  * Return value:
7231da177e4SLinus Torvalds  *	1 on Success / 0 on Failure
7241da177e4SLinus Torvalds  */
7251da177e4SLinus Torvalds 
7261da177e4SLinus Torvalds static int
mca_try_to_recover(void * rec,struct ia64_sal_os_state * sos)72720305e59SHidetoshi Seto mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
7281da177e4SLinus Torvalds {
7291da177e4SLinus Torvalds 	int platform_err;
7301da177e4SLinus Torvalds 	int n_proc_err;
7311da177e4SLinus Torvalds 	slidx_table_t slidx;
7321da177e4SLinus Torvalds 	peidx_table_t peidx;
7331da177e4SLinus Torvalds 	pal_bus_check_info_t pbci;
7341da177e4SLinus Torvalds 
7351da177e4SLinus Torvalds 	/* Make index of SAL error record */
7361da177e4SLinus Torvalds 	platform_err = mca_make_slidx(rec, &slidx);
7371da177e4SLinus Torvalds 
7381da177e4SLinus Torvalds 	/* Count processor error sections */
7391da177e4SLinus Torvalds 	n_proc_err = slidx_count(&slidx, proc_err);
7401da177e4SLinus Torvalds 
7411da177e4SLinus Torvalds 	 /* Now, OS can recover when there is one processor error section */
7421da177e4SLinus Torvalds 	if (n_proc_err > 1)
74343ed3bafSHidetoshi Seto 		return fatal_mca("Too Many Errors");
74418997961SRuss Anderson 	else if (n_proc_err == 0)
74543ed3bafSHidetoshi Seto 		/* Weird SAL record ... We can't do anything */
74643ed3bafSHidetoshi Seto 		return fatal_mca("Weird SAL record");
7471da177e4SLinus Torvalds 
7481da177e4SLinus Torvalds 	/* Make index of processor error section */
74920305e59SHidetoshi Seto 	mca_make_peidx((sal_log_processor_info_t*)
75020305e59SHidetoshi Seto 		slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
7511da177e4SLinus Torvalds 
7521da177e4SLinus Torvalds 	/* Extract Processor BUS_CHECK[0] */
7531da177e4SLinus Torvalds 	*((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
7541da177e4SLinus Torvalds 
7551da177e4SLinus Torvalds 	/* Check whether MCA is global or not */
7567f613c7dSKeith Owens 	if (is_mca_global(&peidx, &pbci, sos))
75743ed3bafSHidetoshi Seto 		return fatal_mca("global MCA");
7581da177e4SLinus Torvalds 
7591da177e4SLinus Torvalds 	/* Try to recover a processor error */
76020305e59SHidetoshi Seto 	return recover_from_processor_error(platform_err, &slidx, &peidx,
76120305e59SHidetoshi Seto 					    &pbci, sos);
7621da177e4SLinus Torvalds }
7631da177e4SLinus Torvalds 
7641da177e4SLinus Torvalds /*
7651da177e4SLinus Torvalds  * =============================================================================
7661da177e4SLinus Torvalds  */
7671da177e4SLinus Torvalds 
mca_external_handler_init(void)7681da177e4SLinus Torvalds int __init mca_external_handler_init(void)
7691da177e4SLinus Torvalds {
7701da177e4SLinus Torvalds 	if (init_record_index_pools())
7711da177e4SLinus Torvalds 		return -ENOMEM;
7721da177e4SLinus Torvalds 
7731da177e4SLinus Torvalds 	/* register external mca handlers */
7741da177e4SLinus Torvalds 	if (ia64_reg_MCA_extension(mca_try_to_recover)) {
7751da177e4SLinus Torvalds 		printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
7761da177e4SLinus Torvalds 		kfree(slidx_pool.buffer);
7771da177e4SLinus Torvalds 		return -EFAULT;
7781da177e4SLinus Torvalds 	}
7791da177e4SLinus Torvalds 	return 0;
7801da177e4SLinus Torvalds }
7811da177e4SLinus Torvalds 
mca_external_handler_exit(void)7821da177e4SLinus Torvalds void __exit mca_external_handler_exit(void)
7831da177e4SLinus Torvalds {
7841da177e4SLinus Torvalds 	/* unregister external mca handlers */
7851da177e4SLinus Torvalds 	ia64_unreg_MCA_extension();
7861da177e4SLinus Torvalds 	kfree(slidx_pool.buffer);
7871da177e4SLinus Torvalds }
7881da177e4SLinus Torvalds 
7891da177e4SLinus Torvalds module_init(mca_external_handler_init);
7901da177e4SLinus Torvalds module_exit(mca_external_handler_exit);
7911da177e4SLinus Torvalds 
7921da177e4SLinus Torvalds module_param(sal_rec_max, int, 0644);
7931da177e4SLinus Torvalds MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
7941da177e4SLinus Torvalds 
7951da177e4SLinus Torvalds MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
7961da177e4SLinus Torvalds MODULE_LICENSE("GPL");
797