109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * File: mca_drv.c
41da177e4SLinus Torvalds * Purpose: Generic MCA handling layer
51da177e4SLinus Torvalds *
61da177e4SLinus Torvalds * Copyright (C) 2004 FUJITSU LIMITED
7fe77efb8SHidetoshi Seto * Copyright (C) 2004 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
87f613c7dSKeith Owens * Copyright (C) 2005 Silicon Graphics, Inc
97f613c7dSKeith Owens * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
10d2a28ad9SRuss Anderson * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
111da177e4SLinus Torvalds */
121da177e4SLinus Torvalds #include <linux/types.h>
131da177e4SLinus Torvalds #include <linux/init.h>
141da177e4SLinus Torvalds #include <linux/sched.h>
151da177e4SLinus Torvalds #include <linux/interrupt.h>
161da177e4SLinus Torvalds #include <linux/irq.h>
171da177e4SLinus Torvalds #include <linux/kallsyms.h>
1857c8a661SMike Rapoport #include <linux/memblock.h>
191da177e4SLinus Torvalds #include <linux/acpi.h>
201da177e4SLinus Torvalds #include <linux/timer.h>
211da177e4SLinus Torvalds #include <linux/module.h>
221da177e4SLinus Torvalds #include <linux/kernel.h>
231da177e4SLinus Torvalds #include <linux/smp.h>
241da177e4SLinus Torvalds #include <linux/workqueue.h>
251da177e4SLinus Torvalds #include <linux/mm.h>
265a0e3ad6STejun Heo #include <linux/slab.h>
271da177e4SLinus Torvalds
281da177e4SLinus Torvalds #include <asm/delay.h>
291da177e4SLinus Torvalds #include <asm/page.h>
301da177e4SLinus Torvalds #include <asm/ptrace.h>
311da177e4SLinus Torvalds #include <asm/sal.h>
321da177e4SLinus Torvalds #include <asm/mca.h>
331da177e4SLinus Torvalds
341da177e4SLinus Torvalds #include <asm/irq.h>
351da177e4SLinus Torvalds #include <asm/hw_irq.h>
361da177e4SLinus Torvalds
371da177e4SLinus Torvalds #include "mca_drv.h"
381da177e4SLinus Torvalds
391da177e4SLinus Torvalds /* max size of SAL error record (default) */
401da177e4SLinus Torvalds static int sal_rec_max = 10000;
411da177e4SLinus Torvalds
421da177e4SLinus Torvalds /* from mca_drv_asm.S */
431da177e4SLinus Torvalds extern void *mca_handler_bhhook(void);
441da177e4SLinus Torvalds
451da177e4SLinus Torvalds static DEFINE_SPINLOCK(mca_bh_lock);
461da177e4SLinus Torvalds
471da177e4SLinus Torvalds typedef enum {
481da177e4SLinus Torvalds MCA_IS_LOCAL = 0,
491da177e4SLinus Torvalds MCA_IS_GLOBAL = 1
501da177e4SLinus Torvalds } mca_type_t;
511da177e4SLinus Torvalds
521da177e4SLinus Torvalds #define MAX_PAGE_ISOLATE 1024
531da177e4SLinus Torvalds
541da177e4SLinus Torvalds static struct page *page_isolate[MAX_PAGE_ISOLATE];
551da177e4SLinus Torvalds static int num_page_isolate = 0;
561da177e4SLinus Torvalds
571da177e4SLinus Torvalds typedef enum {
584881e2cdSHidetoshi Seto ISOLATE_NG,
594881e2cdSHidetoshi Seto ISOLATE_OK,
604881e2cdSHidetoshi Seto ISOLATE_NONE
611da177e4SLinus Torvalds } isolate_status_t;
621da177e4SLinus Torvalds
6318997961SRuss Anderson typedef enum {
6418997961SRuss Anderson MCA_NOT_RECOVERED = 0,
6518997961SRuss Anderson MCA_RECOVERED = 1
6618997961SRuss Anderson } recovery_status_t;
6718997961SRuss Anderson
681da177e4SLinus Torvalds /*
691da177e4SLinus Torvalds * This pool keeps pointers to the section part of SAL error record
701da177e4SLinus Torvalds */
711da177e4SLinus Torvalds static struct {
721da177e4SLinus Torvalds slidx_list_t *buffer; /* section pointer list pool */
731da177e4SLinus Torvalds int cur_idx; /* Current index of section pointer list pool */
741da177e4SLinus Torvalds int max_idx; /* Maximum index of section pointer list pool */
751da177e4SLinus Torvalds } slidx_pool;
761da177e4SLinus Torvalds
7718997961SRuss Anderson static int
fatal_mca(const char * fmt,...)7818997961SRuss Anderson fatal_mca(const char *fmt, ...)
7918997961SRuss Anderson {
8018997961SRuss Anderson va_list args;
8143ed3bafSHidetoshi Seto char buf[256];
8218997961SRuss Anderson
8318997961SRuss Anderson va_start(args, fmt);
8443ed3bafSHidetoshi Seto vsnprintf(buf, sizeof(buf), fmt, args);
8518997961SRuss Anderson va_end(args);
8643ed3bafSHidetoshi Seto ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf);
8718997961SRuss Anderson
8818997961SRuss Anderson return MCA_NOT_RECOVERED;
8918997961SRuss Anderson }
9018997961SRuss Anderson
9143ed3bafSHidetoshi Seto static int
mca_recovered(const char * fmt,...)9243ed3bafSHidetoshi Seto mca_recovered(const char *fmt, ...)
9343ed3bafSHidetoshi Seto {
9443ed3bafSHidetoshi Seto va_list args;
9543ed3bafSHidetoshi Seto char buf[256];
9643ed3bafSHidetoshi Seto
9743ed3bafSHidetoshi Seto va_start(args, fmt);
9843ed3bafSHidetoshi Seto vsnprintf(buf, sizeof(buf), fmt, args);
9943ed3bafSHidetoshi Seto va_end(args);
10043ed3bafSHidetoshi Seto ia64_mca_printk(KERN_INFO "MCA: %s\n", buf);
10143ed3bafSHidetoshi Seto
10243ed3bafSHidetoshi Seto return MCA_RECOVERED;
10343ed3bafSHidetoshi Seto }
10443ed3bafSHidetoshi Seto
1051da177e4SLinus Torvalds /**
1061da177e4SLinus Torvalds * mca_page_isolate - isolate a poisoned page in order not to use it later
1071da177e4SLinus Torvalds * @paddr: poisoned memory location
1081da177e4SLinus Torvalds *
1091da177e4SLinus Torvalds * Return value:
1104881e2cdSHidetoshi Seto * one of isolate_status_t, ISOLATE_OK/NG/NONE.
1111da177e4SLinus Torvalds */
1121da177e4SLinus Torvalds
1131da177e4SLinus Torvalds static isolate_status_t
mca_page_isolate(unsigned long paddr)1141da177e4SLinus Torvalds mca_page_isolate(unsigned long paddr)
1151da177e4SLinus Torvalds {
1161da177e4SLinus Torvalds int i;
1171da177e4SLinus Torvalds struct page *p;
1181da177e4SLinus Torvalds
1191da177e4SLinus Torvalds /* whether physical address is valid or not */
1201da177e4SLinus Torvalds if (!ia64_phys_addr_valid(paddr))
1214881e2cdSHidetoshi Seto return ISOLATE_NONE;
1224881e2cdSHidetoshi Seto
12356f87b82SRuss Anderson if (!pfn_valid(paddr >> PAGE_SHIFT))
1244881e2cdSHidetoshi Seto return ISOLATE_NONE;
1251da177e4SLinus Torvalds
1261da177e4SLinus Torvalds /* convert physical address to physical page number */
1271da177e4SLinus Torvalds p = pfn_to_page(paddr>>PAGE_SHIFT);
1281da177e4SLinus Torvalds
1291da177e4SLinus Torvalds /* check whether a page number have been already registered or not */
1301da177e4SLinus Torvalds for (i = 0; i < num_page_isolate; i++)
1311da177e4SLinus Torvalds if (page_isolate[i] == p)
1321da177e4SLinus Torvalds return ISOLATE_OK; /* already listed */
1331da177e4SLinus Torvalds
1341da177e4SLinus Torvalds /* limitation check */
1351da177e4SLinus Torvalds if (num_page_isolate == MAX_PAGE_ISOLATE)
1361da177e4SLinus Torvalds return ISOLATE_NG;
1371da177e4SLinus Torvalds
1381da177e4SLinus Torvalds /* kick pages having attribute 'SLAB' or 'Reserved' */
1391da177e4SLinus Torvalds if (PageSlab(p) || PageReserved(p))
1401da177e4SLinus Torvalds return ISOLATE_NG;
1411da177e4SLinus Torvalds
1421da177e4SLinus Torvalds /* add attribute 'Reserved' and register the page */
143cbb92144SRuss Anderson get_page(p);
1441da177e4SLinus Torvalds SetPageReserved(p);
1451da177e4SLinus Torvalds page_isolate[num_page_isolate++] = p;
1461da177e4SLinus Torvalds
1471da177e4SLinus Torvalds return ISOLATE_OK;
1481da177e4SLinus Torvalds }
1491da177e4SLinus Torvalds
1501da177e4SLinus Torvalds /**
1511da177e4SLinus Torvalds * mca_hanlder_bh - Kill the process which occurred memory read error
1521da177e4SLinus Torvalds * @paddr: poisoned address received from MCA Handler
1531da177e4SLinus Torvalds */
1541da177e4SLinus Torvalds
1551da177e4SLinus Torvalds void
mca_handler_bh(unsigned long paddr,void * iip,unsigned long ipsr)156d2a28ad9SRuss Anderson mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
1571da177e4SLinus Torvalds {
15843ed3bafSHidetoshi Seto ia64_mlogbuf_dump();
159d2a28ad9SRuss Anderson printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
160d2a28ad9SRuss Anderson "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
1616c1ee033SEric W. Biederman raw_smp_processor_id(), current->pid,
1626c1ee033SEric W. Biederman from_kuid(&init_user_ns, current_uid()),
163d2a28ad9SRuss Anderson iip, ipsr, paddr, current->comm);
1641da177e4SLinus Torvalds
1651da177e4SLinus Torvalds spin_lock(&mca_bh_lock);
1664881e2cdSHidetoshi Seto switch (mca_page_isolate(paddr)) {
1674881e2cdSHidetoshi Seto case ISOLATE_OK:
1681da177e4SLinus Torvalds printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
1694881e2cdSHidetoshi Seto break;
1704881e2cdSHidetoshi Seto case ISOLATE_NG:
171ea0e92a6SRuss Anderson printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr);
1724881e2cdSHidetoshi Seto break;
1734881e2cdSHidetoshi Seto default:
1744881e2cdSHidetoshi Seto break;
1751da177e4SLinus Torvalds }
1761da177e4SLinus Torvalds spin_unlock(&mca_bh_lock);
1771da177e4SLinus Torvalds
1781da177e4SLinus Torvalds /* This process is about to be killed itself */
179*0e25498fSEric W. Biederman make_task_dead(SIGKILL);
1801da177e4SLinus Torvalds }
1811da177e4SLinus Torvalds
1821da177e4SLinus Torvalds /**
1831da177e4SLinus Torvalds * mca_make_peidx - Make index of processor error section
1841da177e4SLinus Torvalds * @slpi: pointer to record of processor error section
1851da177e4SLinus Torvalds * @peidx: pointer to index of processor error section
1861da177e4SLinus Torvalds */
1871da177e4SLinus Torvalds
1881da177e4SLinus Torvalds static void
mca_make_peidx(sal_log_processor_info_t * slpi,peidx_table_t * peidx)1891da177e4SLinus Torvalds mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
1901da177e4SLinus Torvalds {
1911da177e4SLinus Torvalds /*
1921da177e4SLinus Torvalds * calculate the start address of
1931da177e4SLinus Torvalds * "struct cpuid_info" and "sal_processor_static_info_t".
1941da177e4SLinus Torvalds */
1951da177e4SLinus Torvalds u64 total_check_num = slpi->valid.num_cache_check
1961da177e4SLinus Torvalds + slpi->valid.num_tlb_check
1971da177e4SLinus Torvalds + slpi->valid.num_bus_check
1981da177e4SLinus Torvalds + slpi->valid.num_reg_file_check
1991da177e4SLinus Torvalds + slpi->valid.num_ms_check;
2001da177e4SLinus Torvalds u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num
2011da177e4SLinus Torvalds + sizeof(sal_log_processor_info_t);
2021da177e4SLinus Torvalds u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
2031da177e4SLinus Torvalds
2041da177e4SLinus Torvalds peidx_head(peidx) = slpi;
2051da177e4SLinus Torvalds peidx_mid(peidx) = (struct sal_cpuid_info *)
2061da177e4SLinus Torvalds (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
2071da177e4SLinus Torvalds peidx_bottom(peidx) = (sal_processor_static_info_t *)
2081da177e4SLinus Torvalds (slpi->valid.psi_static_struct ?
2091da177e4SLinus Torvalds ((char*)slpi + head_size + mid_size) : NULL);
2101da177e4SLinus Torvalds }
2111da177e4SLinus Torvalds
2121da177e4SLinus Torvalds /**
2131da177e4SLinus Torvalds * mca_make_slidx - Make index of SAL error record
2141da177e4SLinus Torvalds * @buffer: pointer to SAL error record
2151da177e4SLinus Torvalds * @slidx: pointer to index of SAL error record
2161da177e4SLinus Torvalds *
2171da177e4SLinus Torvalds * Return value:
2181da177e4SLinus Torvalds * 1 if record has platform error / 0 if not
2191da177e4SLinus Torvalds */
2201da177e4SLinus Torvalds #define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
2211da177e4SLinus Torvalds {slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
2221da177e4SLinus Torvalds hl->hdr = ptr; \
2231da177e4SLinus Torvalds list_add(&hl->list, &(sect)); \
2241da177e4SLinus Torvalds slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
2251da177e4SLinus Torvalds
2261da177e4SLinus Torvalds static int
mca_make_slidx(void * buffer,slidx_table_t * slidx)2271da177e4SLinus Torvalds mca_make_slidx(void *buffer, slidx_table_t *slidx)
2281da177e4SLinus Torvalds {
2291da177e4SLinus Torvalds int platform_err = 0;
2301da177e4SLinus Torvalds int record_len = ((sal_log_record_header_t*)buffer)->len;
2311da177e4SLinus Torvalds u32 ercd_pos;
2321da177e4SLinus Torvalds int sects;
2331da177e4SLinus Torvalds sal_log_section_hdr_t *sp;
2341da177e4SLinus Torvalds
2351da177e4SLinus Torvalds /*
2361da177e4SLinus Torvalds * Initialize index referring current record
2371da177e4SLinus Torvalds */
2381da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->proc_err));
2391da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->mem_dev_err));
2401da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->sel_dev_err));
2411da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->pci_bus_err));
2421da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->smbios_dev_err));
2431da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->pci_comp_err));
2441da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->plat_specific_err));
2451da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->host_ctlr_err));
2461da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->plat_bus_err));
2471da177e4SLinus Torvalds INIT_LIST_HEAD(&(slidx->unsupported));
2481da177e4SLinus Torvalds
2491da177e4SLinus Torvalds /*
2501da177e4SLinus Torvalds * Extract a Record Header
2511da177e4SLinus Torvalds */
2521da177e4SLinus Torvalds slidx->header = buffer;
2531da177e4SLinus Torvalds
2541da177e4SLinus Torvalds /*
2551da177e4SLinus Torvalds * Extract each section records
2561da177e4SLinus Torvalds * (arranged from "int ia64_log_platform_info_print()")
2571da177e4SLinus Torvalds */
2581da177e4SLinus Torvalds for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
2591da177e4SLinus Torvalds ercd_pos < record_len; ercd_pos += sp->len, sects++) {
2601da177e4SLinus Torvalds sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
2611da177e4SLinus Torvalds if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
2621da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
26320305e59SHidetoshi Seto } else if (!efi_guidcmp(sp->guid,
26420305e59SHidetoshi Seto SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
2651da177e4SLinus Torvalds platform_err = 1;
2661da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
26720305e59SHidetoshi Seto } else if (!efi_guidcmp(sp->guid,
26820305e59SHidetoshi Seto SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
2691da177e4SLinus Torvalds platform_err = 1;
2701da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
27120305e59SHidetoshi Seto } else if (!efi_guidcmp(sp->guid,
27220305e59SHidetoshi Seto SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
2731da177e4SLinus Torvalds platform_err = 1;
2741da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
27520305e59SHidetoshi Seto } else if (!efi_guidcmp(sp->guid,
27620305e59SHidetoshi Seto SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
2771da177e4SLinus Torvalds platform_err = 1;
2781da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
27920305e59SHidetoshi Seto } else if (!efi_guidcmp(sp->guid,
28020305e59SHidetoshi Seto SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
2811da177e4SLinus Torvalds platform_err = 1;
2821da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
28320305e59SHidetoshi Seto } else if (!efi_guidcmp(sp->guid,
28420305e59SHidetoshi Seto SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
2851da177e4SLinus Torvalds platform_err = 1;
2861da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
28720305e59SHidetoshi Seto } else if (!efi_guidcmp(sp->guid,
28820305e59SHidetoshi Seto SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
2891da177e4SLinus Torvalds platform_err = 1;
2901da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
29120305e59SHidetoshi Seto } else if (!efi_guidcmp(sp->guid,
29220305e59SHidetoshi Seto SAL_PLAT_BUS_ERR_SECT_GUID)) {
2931da177e4SLinus Torvalds platform_err = 1;
2941da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
2951da177e4SLinus Torvalds } else {
2961da177e4SLinus Torvalds LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
2971da177e4SLinus Torvalds }
2981da177e4SLinus Torvalds }
2991da177e4SLinus Torvalds slidx->n_sections = sects;
3001da177e4SLinus Torvalds
3011da177e4SLinus Torvalds return platform_err;
3021da177e4SLinus Torvalds }
3031da177e4SLinus Torvalds
3041da177e4SLinus Torvalds /**
3051da177e4SLinus Torvalds * init_record_index_pools - Initialize pool of lists for SAL record index
3061da177e4SLinus Torvalds *
3071da177e4SLinus Torvalds * Return value:
3081da177e4SLinus Torvalds * 0 on Success / -ENOMEM on Failure
3091da177e4SLinus Torvalds */
3101da177e4SLinus Torvalds static int
init_record_index_pools(void)3111da177e4SLinus Torvalds init_record_index_pools(void)
3121da177e4SLinus Torvalds {
3131da177e4SLinus Torvalds int i;
3141da177e4SLinus Torvalds int rec_max_size; /* Maximum size of SAL error records */
3151da177e4SLinus Torvalds int sect_min_size; /* Minimum size of SAL error sections */
3161da177e4SLinus Torvalds /* minimum size table of each section */
3171da177e4SLinus Torvalds static int sal_log_sect_min_sizes[] = {
31820305e59SHidetoshi Seto sizeof(sal_log_processor_info_t)
31920305e59SHidetoshi Seto + sizeof(sal_processor_static_info_t),
3201da177e4SLinus Torvalds sizeof(sal_log_mem_dev_err_info_t),
3211da177e4SLinus Torvalds sizeof(sal_log_sel_dev_err_info_t),
3221da177e4SLinus Torvalds sizeof(sal_log_pci_bus_err_info_t),
3231da177e4SLinus Torvalds sizeof(sal_log_smbios_dev_err_info_t),
3241da177e4SLinus Torvalds sizeof(sal_log_pci_comp_err_info_t),
3251da177e4SLinus Torvalds sizeof(sal_log_plat_specific_err_info_t),
3261da177e4SLinus Torvalds sizeof(sal_log_host_ctlr_err_info_t),
3271da177e4SLinus Torvalds sizeof(sal_log_plat_bus_err_info_t),
3281da177e4SLinus Torvalds };
3291da177e4SLinus Torvalds
3301da177e4SLinus Torvalds /*
3311da177e4SLinus Torvalds * MCA handler cannot allocate new memory on flight,
3321da177e4SLinus Torvalds * so we preallocate enough memory to handle a SAL record.
3331da177e4SLinus Torvalds *
3341da177e4SLinus Torvalds * Initialize a handling set of slidx_pool:
3351da177e4SLinus Torvalds * 1. Pick up the max size of SAL error records
3361da177e4SLinus Torvalds * 2. Pick up the min size of SAL error sections
3371da177e4SLinus Torvalds * 3. Allocate the pool as enough to 2 SAL records
3381da177e4SLinus Torvalds * (now we can estimate the maxinum of section in a record.)
3391da177e4SLinus Torvalds */
3401da177e4SLinus Torvalds
3411da177e4SLinus Torvalds /* - 1 - */
3421da177e4SLinus Torvalds rec_max_size = sal_rec_max;
3431da177e4SLinus Torvalds
3441da177e4SLinus Torvalds /* - 2 - */
3451da177e4SLinus Torvalds sect_min_size = sal_log_sect_min_sizes[0];
346c5f320ffSArnd Bergmann for (i = 1; i < ARRAY_SIZE(sal_log_sect_min_sizes); i++)
3471da177e4SLinus Torvalds if (sect_min_size > sal_log_sect_min_sizes[i])
3481da177e4SLinus Torvalds sect_min_size = sal_log_sect_min_sizes[i];
3491da177e4SLinus Torvalds
3501da177e4SLinus Torvalds /* - 3 - */
3511da177e4SLinus Torvalds slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
3527c13e0d1SZhang Yanfei slidx_pool.buffer =
3536da2ec56SKees Cook kmalloc_array(slidx_pool.max_idx, sizeof(slidx_list_t),
3546da2ec56SKees Cook GFP_KERNEL);
3551da177e4SLinus Torvalds
3561da177e4SLinus Torvalds return slidx_pool.buffer ? 0 : -ENOMEM;
3571da177e4SLinus Torvalds }
3581da177e4SLinus Torvalds
3591da177e4SLinus Torvalds
3601da177e4SLinus Torvalds /*****************************************************************************
3611da177e4SLinus Torvalds * Recovery functions *
3621da177e4SLinus Torvalds *****************************************************************************/
3631da177e4SLinus Torvalds
3641da177e4SLinus Torvalds /**
3651da177e4SLinus Torvalds * is_mca_global - Check whether this MCA is global or not
3661da177e4SLinus Torvalds * @peidx: pointer of index of processor error section
3671da177e4SLinus Torvalds * @pbci: pointer to pal_bus_check_info_t
36820305e59SHidetoshi Seto * @sos: pointer to hand off struct between SAL and OS
3691da177e4SLinus Torvalds *
3701da177e4SLinus Torvalds * Return value:
3711da177e4SLinus Torvalds * MCA_IS_LOCAL / MCA_IS_GLOBAL
3721da177e4SLinus Torvalds */
3731da177e4SLinus Torvalds
3741da177e4SLinus Torvalds static mca_type_t
is_mca_global(peidx_table_t * peidx,pal_bus_check_info_t * pbci,struct ia64_sal_os_state * sos)3757f613c7dSKeith Owens is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
3767f613c7dSKeith Owens struct ia64_sal_os_state *sos)
3771da177e4SLinus Torvalds {
37820305e59SHidetoshi Seto pal_processor_state_info_t *psp =
37920305e59SHidetoshi Seto (pal_processor_state_info_t*)peidx_psp(peidx);
3801da177e4SLinus Torvalds
3811da177e4SLinus Torvalds /*
3821da177e4SLinus Torvalds * PAL can request a rendezvous, if the MCA has a global scope.
3831da177e4SLinus Torvalds * If "rz_always" flag is set, SAL requests MCA rendezvous
3841da177e4SLinus Torvalds * in spite of global MCA.
3851da177e4SLinus Torvalds * Therefore it is local MCA when rendezvous has not been requested.
3861da177e4SLinus Torvalds * Failed to rendezvous, the system must be down.
3871da177e4SLinus Torvalds */
3887f613c7dSKeith Owens switch (sos->rv_rc) {
3891da177e4SLinus Torvalds case -1: /* SAL rendezvous unsuccessful */
3901da177e4SLinus Torvalds return MCA_IS_GLOBAL;
3911da177e4SLinus Torvalds case 0: /* SAL rendezvous not required */
3921da177e4SLinus Torvalds return MCA_IS_LOCAL;
3931da177e4SLinus Torvalds case 1: /* SAL rendezvous successful int */
3941da177e4SLinus Torvalds case 2: /* SAL rendezvous successful int with init */
3951da177e4SLinus Torvalds default:
3961da177e4SLinus Torvalds break;
3971da177e4SLinus Torvalds }
3981da177e4SLinus Torvalds
3991da177e4SLinus Torvalds /*
4001da177e4SLinus Torvalds * If One or more Cache/TLB/Reg_File/Uarch_Check is here,
4011da177e4SLinus Torvalds * it would be a local MCA. (i.e. processor internal error)
4021da177e4SLinus Torvalds */
4031da177e4SLinus Torvalds if (psp->tc || psp->cc || psp->rc || psp->uc)
4041da177e4SLinus Torvalds return MCA_IS_LOCAL;
4051da177e4SLinus Torvalds
4061da177e4SLinus Torvalds /*
4071da177e4SLinus Torvalds * Bus_Check structure with Bus_Check.ib (internal bus error) flag set
4081da177e4SLinus Torvalds * would be a global MCA. (e.g. a system bus address parity error)
4091da177e4SLinus Torvalds */
4101da177e4SLinus Torvalds if (!pbci || pbci->ib)
4111da177e4SLinus Torvalds return MCA_IS_GLOBAL;
4121da177e4SLinus Torvalds
4131da177e4SLinus Torvalds /*
4141da177e4SLinus Torvalds * Bus_Check structure with Bus_Check.eb (external bus error) flag set
4151da177e4SLinus Torvalds * could be either a local MCA or a global MCA.
4161da177e4SLinus Torvalds *
4171da177e4SLinus Torvalds * Referring Bus_Check.bsi:
4181da177e4SLinus Torvalds * 0: Unknown/unclassified
4191da177e4SLinus Torvalds * 1: BERR#
4201da177e4SLinus Torvalds * 2: BINIT#
4211da177e4SLinus Torvalds * 3: Hard Fail
4221da177e4SLinus Torvalds * (FIXME: Are these SGI specific or generic bsi values?)
4231da177e4SLinus Torvalds */
4241da177e4SLinus Torvalds if (pbci->eb)
4251da177e4SLinus Torvalds switch (pbci->bsi) {
4261da177e4SLinus Torvalds case 0:
4271da177e4SLinus Torvalds /* e.g. a load from poisoned memory */
4281da177e4SLinus Torvalds return MCA_IS_LOCAL;
4291da177e4SLinus Torvalds case 1:
4301da177e4SLinus Torvalds case 2:
4311da177e4SLinus Torvalds case 3:
4321da177e4SLinus Torvalds return MCA_IS_GLOBAL;
4331da177e4SLinus Torvalds }
4341da177e4SLinus Torvalds
4351da177e4SLinus Torvalds return MCA_IS_GLOBAL;
4361da177e4SLinus Torvalds }
4371da177e4SLinus Torvalds
4381da177e4SLinus Torvalds /**
439264b0f99SRuss Anderson * get_target_identifier - Get the valid Cache or Bus check target identifier.
440264b0f99SRuss Anderson * @peidx: pointer of index of processor error section
441264b0f99SRuss Anderson *
442264b0f99SRuss Anderson * Return value:
44372fdbdceSSimon Arlott * target address on Success / 0 on Failure
444264b0f99SRuss Anderson */
445264b0f99SRuss Anderson static u64
get_target_identifier(peidx_table_t * peidx)446264b0f99SRuss Anderson get_target_identifier(peidx_table_t *peidx)
447264b0f99SRuss Anderson {
448264b0f99SRuss Anderson u64 target_address = 0;
449264b0f99SRuss Anderson sal_log_mod_error_info_t *smei;
450264b0f99SRuss Anderson pal_cache_check_info_t *pcci;
451264b0f99SRuss Anderson int i, level = 9;
452264b0f99SRuss Anderson
453264b0f99SRuss Anderson /*
454264b0f99SRuss Anderson * Look through the cache checks for a valid target identifier
455264b0f99SRuss Anderson * If more than one valid target identifier, return the one
456264b0f99SRuss Anderson * with the lowest cache level.
457264b0f99SRuss Anderson */
458264b0f99SRuss Anderson for (i = 0; i < peidx_cache_check_num(peidx); i++) {
459264b0f99SRuss Anderson smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i);
460264b0f99SRuss Anderson if (smei->valid.target_identifier && smei->target_identifier) {
461264b0f99SRuss Anderson pcci = (pal_cache_check_info_t *)&(smei->check_info);
462264b0f99SRuss Anderson if (!target_address || (pcci->level < level)) {
463264b0f99SRuss Anderson target_address = smei->target_identifier;
464264b0f99SRuss Anderson level = pcci->level;
465264b0f99SRuss Anderson continue;
466264b0f99SRuss Anderson }
467264b0f99SRuss Anderson }
468264b0f99SRuss Anderson }
469264b0f99SRuss Anderson if (target_address)
470264b0f99SRuss Anderson return target_address;
471264b0f99SRuss Anderson
472264b0f99SRuss Anderson /*
473264b0f99SRuss Anderson * Look at the bus check for a valid target identifier
474264b0f99SRuss Anderson */
475264b0f99SRuss Anderson smei = peidx_bus_check(peidx, 0);
476264b0f99SRuss Anderson if (smei && smei->valid.target_identifier)
477264b0f99SRuss Anderson return smei->target_identifier;
478264b0f99SRuss Anderson
479264b0f99SRuss Anderson return 0;
480264b0f99SRuss Anderson }
481264b0f99SRuss Anderson
482264b0f99SRuss Anderson /**
4831da177e4SLinus Torvalds * recover_from_read_error - Try to recover the errors which type are "read"s.
4841da177e4SLinus Torvalds * @slidx: pointer of index of SAL error record
4851da177e4SLinus Torvalds * @peidx: pointer of index of processor error section
4861da177e4SLinus Torvalds * @pbci: pointer of pal_bus_check_info
48720305e59SHidetoshi Seto * @sos: pointer to hand off struct between SAL and OS
4881da177e4SLinus Torvalds *
4891da177e4SLinus Torvalds * Return value:
4901da177e4SLinus Torvalds * 1 on Success / 0 on Failure
4911da177e4SLinus Torvalds */
4921da177e4SLinus Torvalds
4931da177e4SLinus Torvalds static int
recover_from_read_error(slidx_table_t * slidx,peidx_table_t * peidx,pal_bus_check_info_t * pbci,struct ia64_sal_os_state * sos)49420305e59SHidetoshi Seto recover_from_read_error(slidx_table_t *slidx,
49520305e59SHidetoshi Seto peidx_table_t *peidx, pal_bus_check_info_t *pbci,
4967f613c7dSKeith Owens struct ia64_sal_os_state *sos)
4971da177e4SLinus Torvalds {
498264b0f99SRuss Anderson u64 target_identifier;
4992770ef7cSMasahiro Yamada struct pal_min_state_area *pmsa;
5001da177e4SLinus Torvalds struct ia64_psr *psr1, *psr2;
5011da177e4SLinus Torvalds ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
5021da177e4SLinus Torvalds
5031da177e4SLinus Torvalds /* Is target address valid? */
504264b0f99SRuss Anderson target_identifier = get_target_identifier(peidx);
505264b0f99SRuss Anderson if (!target_identifier)
50643ed3bafSHidetoshi Seto return fatal_mca("target address not valid");
5071da177e4SLinus Torvalds
5081da177e4SLinus Torvalds /*
5091da177e4SLinus Torvalds * cpu read or memory-mapped io read
5101da177e4SLinus Torvalds *
5111da177e4SLinus Torvalds * offending process affected process OS MCA do
5121da177e4SLinus Torvalds * kernel mode kernel mode down system
5131da177e4SLinus Torvalds * kernel mode user mode kill the process
5141da177e4SLinus Torvalds * user mode kernel mode down system (*)
5151da177e4SLinus Torvalds * user mode user mode kill the process
5161da177e4SLinus Torvalds *
5171da177e4SLinus Torvalds * (*) You could terminate offending user-mode process
5181da177e4SLinus Torvalds * if (pbci->pv && pbci->pl != 0) *and* if you sure
5191da177e4SLinus Torvalds * the process not have any locks of kernel.
5201da177e4SLinus Torvalds */
5211da177e4SLinus Torvalds
522a9474646SHidetoshi Seto /* Is minstate valid? */
523a9474646SHidetoshi Seto if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
52443ed3bafSHidetoshi Seto return fatal_mca("minstate not valid");
5251da177e4SLinus Torvalds psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
526d2a28ad9SRuss Anderson psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
5271da177e4SLinus Torvalds
5281da177e4SLinus Torvalds /*
5291da177e4SLinus Torvalds * Check the privilege level of interrupted context.
5301da177e4SLinus Torvalds * If it is user-mode, then terminate affected process.
5311da177e4SLinus Torvalds */
532d2a28ad9SRuss Anderson
533d2a28ad9SRuss Anderson pmsa = sos->pal_min_state;
534d2a28ad9SRuss Anderson if (psr1->cpl != 0 ||
535d2a28ad9SRuss Anderson ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
5361da177e4SLinus Torvalds /*
5371da177e4SLinus Torvalds * setup for resume to bottom half of MCA,
5381da177e4SLinus Torvalds * "mca_handler_bhhook"
5391da177e4SLinus Torvalds */
540d2a28ad9SRuss Anderson /* pass to bhhook as argument (gr8, ...) */
541264b0f99SRuss Anderson pmsa->pmsa_gr[8-1] = target_identifier;
542d2a28ad9SRuss Anderson pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
543d2a28ad9SRuss Anderson pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
5441da177e4SLinus Torvalds /* set interrupted return address (but no use) */
5451da177e4SLinus Torvalds pmsa->pmsa_br0 = pmsa->pmsa_iip;
5461da177e4SLinus Torvalds /* change resume address to bottom half */
5471da177e4SLinus Torvalds pmsa->pmsa_iip = mca_hdlr_bh->fp;
5481da177e4SLinus Torvalds pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
5491da177e4SLinus Torvalds /* set cpl with kernel mode */
5501da177e4SLinus Torvalds psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
5511da177e4SLinus Torvalds psr2->cpl = 0;
5521da177e4SLinus Torvalds psr2->ri = 0;
553d2a28ad9SRuss Anderson psr2->bn = 1;
554b1b901c2SRuss Anderson psr2->i = 0;
5551da177e4SLinus Torvalds
55643ed3bafSHidetoshi Seto return mca_recovered("user memory corruption. "
55743ed3bafSHidetoshi Seto "kill affected process - recovered.");
5581da177e4SLinus Torvalds }
5591da177e4SLinus Torvalds
56043ed3bafSHidetoshi Seto return fatal_mca("kernel context not recovered, iip 0x%lx\n",
56143ed3bafSHidetoshi Seto pmsa->pmsa_iip);
5621da177e4SLinus Torvalds }
5631da177e4SLinus Torvalds
5641da177e4SLinus Torvalds /**
5651da177e4SLinus Torvalds * recover_from_platform_error - Recover from platform error.
5661da177e4SLinus Torvalds * @slidx: pointer of index of SAL error record
5671da177e4SLinus Torvalds * @peidx: pointer of index of processor error section
5681da177e4SLinus Torvalds * @pbci: pointer of pal_bus_check_info
56920305e59SHidetoshi Seto * @sos: pointer to hand off struct between SAL and OS
5701da177e4SLinus Torvalds *
5711da177e4SLinus Torvalds * Return value:
5721da177e4SLinus Torvalds * 1 on Success / 0 on Failure
5731da177e4SLinus Torvalds */
5741da177e4SLinus Torvalds
5751da177e4SLinus Torvalds static int
recover_from_platform_error(slidx_table_t * slidx,peidx_table_t * peidx,pal_bus_check_info_t * pbci,struct ia64_sal_os_state * sos)57620305e59SHidetoshi Seto recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
57720305e59SHidetoshi Seto pal_bus_check_info_t *pbci,
5787f613c7dSKeith Owens struct ia64_sal_os_state *sos)
5791da177e4SLinus Torvalds {
5801da177e4SLinus Torvalds int status = 0;
58120305e59SHidetoshi Seto pal_processor_state_info_t *psp =
58220305e59SHidetoshi Seto (pal_processor_state_info_t*)peidx_psp(peidx);
5831da177e4SLinus Torvalds
5841da177e4SLinus Torvalds if (psp->bc && pbci->eb && pbci->bsi == 0) {
5851da177e4SLinus Torvalds switch(pbci->type) {
5861da177e4SLinus Torvalds case 1: /* partial read */
5871da177e4SLinus Torvalds case 3: /* full line(cpu) read */
5881da177e4SLinus Torvalds case 9: /* I/O space read */
58920305e59SHidetoshi Seto status = recover_from_read_error(slidx, peidx, pbci,
59020305e59SHidetoshi Seto sos);
5911da177e4SLinus Torvalds break;
5921da177e4SLinus Torvalds case 0: /* unknown */
5931da177e4SLinus Torvalds case 2: /* partial write */
5941da177e4SLinus Torvalds case 4: /* full line write */
5951da177e4SLinus Torvalds case 5: /* implicit or explicit write-back operation */
5961da177e4SLinus Torvalds case 6: /* snoop probe */
5971da177e4SLinus Torvalds case 7: /* incoming or outgoing ptc.g */
5981da177e4SLinus Torvalds case 8: /* write coalescing transactions */
5991da177e4SLinus Torvalds case 10: /* I/O space write */
6001da177e4SLinus Torvalds case 11: /* inter-processor interrupt message(IPI) */
60120305e59SHidetoshi Seto case 12: /* interrupt acknowledge or
60220305e59SHidetoshi Seto external task priority cycle */
6031da177e4SLinus Torvalds default:
6041da177e4SLinus Torvalds break;
6051da177e4SLinus Torvalds }
606396e8e76SRuss Anderson } else if (psp->cc && !psp->bc) { /* Cache error */
607396e8e76SRuss Anderson status = recover_from_read_error(slidx, peidx, pbci, sos);
6081da177e4SLinus Torvalds }
6091da177e4SLinus Torvalds
6101da177e4SLinus Torvalds return status;
6111da177e4SLinus Torvalds }
6121da177e4SLinus Torvalds
613618b206fSRuss Anderson /*
614618b206fSRuss Anderson * recover_from_tlb_check
615618b206fSRuss Anderson * @peidx: pointer of index of processor error section
616618b206fSRuss Anderson *
617618b206fSRuss Anderson * Return value:
618618b206fSRuss Anderson * 1 on Success / 0 on Failure
619618b206fSRuss Anderson */
620618b206fSRuss Anderson static int
recover_from_tlb_check(peidx_table_t * peidx)621618b206fSRuss Anderson recover_from_tlb_check(peidx_table_t *peidx)
622618b206fSRuss Anderson {
623618b206fSRuss Anderson sal_log_mod_error_info_t *smei;
624618b206fSRuss Anderson pal_tlb_check_info_t *ptci;
625618b206fSRuss Anderson
626618b206fSRuss Anderson smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
627618b206fSRuss Anderson ptci = (pal_tlb_check_info_t *)&(smei->check_info);
628618b206fSRuss Anderson
629618b206fSRuss Anderson /*
630618b206fSRuss Anderson * Look for signature of a duplicate TLB DTC entry, which is
631618b206fSRuss Anderson * a SW bug and always fatal.
632618b206fSRuss Anderson */
633618b206fSRuss Anderson if (ptci->op == PAL_TLB_CHECK_OP_PURGE
634618b206fSRuss Anderson && !(ptci->itr || ptci->dtc || ptci->itc))
635618b206fSRuss Anderson return fatal_mca("Duplicate TLB entry");
636618b206fSRuss Anderson
637618b206fSRuss Anderson return mca_recovered("TLB check recovered");
638618b206fSRuss Anderson }
639618b206fSRuss Anderson
6401da177e4SLinus Torvalds /**
6411da177e4SLinus Torvalds * recover_from_processor_error
6421da177e4SLinus Torvalds * @platform: whether there are some platform error section or not
6431da177e4SLinus Torvalds * @slidx: pointer of index of SAL error record
6441da177e4SLinus Torvalds * @peidx: pointer of index of processor error section
6451da177e4SLinus Torvalds * @pbci: pointer of pal_bus_check_info
64620305e59SHidetoshi Seto * @sos: pointer to hand off struct between SAL and OS
6471da177e4SLinus Torvalds *
6481da177e4SLinus Torvalds * Return value:
6491da177e4SLinus Torvalds * 1 on Success / 0 on Failure
6501da177e4SLinus Torvalds */
6511da177e4SLinus Torvalds
6521da177e4SLinus Torvalds static int
recover_from_processor_error(int platform,slidx_table_t * slidx,peidx_table_t * peidx,pal_bus_check_info_t * pbci,struct ia64_sal_os_state * sos)65320305e59SHidetoshi Seto recover_from_processor_error(int platform, slidx_table_t *slidx,
65420305e59SHidetoshi Seto peidx_table_t *peidx, pal_bus_check_info_t *pbci,
6557f613c7dSKeith Owens struct ia64_sal_os_state *sos)
6561da177e4SLinus Torvalds {
65720305e59SHidetoshi Seto pal_processor_state_info_t *psp =
65820305e59SHidetoshi Seto (pal_processor_state_info_t*)peidx_psp(peidx);
6591da177e4SLinus Torvalds
6601da177e4SLinus Torvalds /*
661a14f25a0SRuss Anderson * Processor recovery status must key off of the PAL recovery
662a14f25a0SRuss Anderson * status in the Processor State Parameter.
6631da177e4SLinus Torvalds */
664a14f25a0SRuss Anderson
665a14f25a0SRuss Anderson /*
666a14f25a0SRuss Anderson * The machine check is corrected.
667a14f25a0SRuss Anderson */
668a14f25a0SRuss Anderson if (psp->cm == 1)
66943ed3bafSHidetoshi Seto return mca_recovered("machine check is already corrected.");
670a14f25a0SRuss Anderson
671a14f25a0SRuss Anderson /*
672a14f25a0SRuss Anderson * The error was not contained. Software must be reset.
673a14f25a0SRuss Anderson */
674a14f25a0SRuss Anderson if (psp->us || psp->ci == 0)
67543ed3bafSHidetoshi Seto return fatal_mca("error not contained");
6761da177e4SLinus Torvalds
6771da177e4SLinus Torvalds /*
678618b206fSRuss Anderson * Look for recoverable TLB check
679618b206fSRuss Anderson */
680618b206fSRuss Anderson if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
681618b206fSRuss Anderson return recover_from_tlb_check(peidx);
682618b206fSRuss Anderson
683618b206fSRuss Anderson /*
684e1c48554SRuss Anderson * The cache check and bus check bits have four possible states
685e1c48554SRuss Anderson * cc bc
686e1c48554SRuss Anderson * 1 1 Memory error, attempt recovery
687396e8e76SRuss Anderson * 1 0 Cache error, attempt recovery
688396e8e76SRuss Anderson * 0 1 I/O error, attempt recovery
689396e8e76SRuss Anderson * 0 0 Other error type, not recovered
6901da177e4SLinus Torvalds */
691396e8e76SRuss Anderson if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL))
692396e8e76SRuss Anderson return fatal_mca("No cache or bus check");
6931da177e4SLinus Torvalds
6941da177e4SLinus Torvalds /*
695396e8e76SRuss Anderson * Cannot handle more than one bus check.
6961da177e4SLinus Torvalds */
6971da177e4SLinus Torvalds if (peidx_bus_check_num(peidx) > 1)
69843ed3bafSHidetoshi Seto return fatal_mca("Too many bus checks");
699396e8e76SRuss Anderson
70018997961SRuss Anderson if (pbci->ib)
70143ed3bafSHidetoshi Seto return fatal_mca("Internal Bus error");
7021da177e4SLinus Torvalds if (pbci->eb && pbci->bsi > 0)
70343ed3bafSHidetoshi Seto return fatal_mca("External bus check fatal status");
7041da177e4SLinus Torvalds
7051da177e4SLinus Torvalds /*
70672fdbdceSSimon Arlott * This is a local MCA and estimated as a recoverable error.
7071da177e4SLinus Torvalds */
7081da177e4SLinus Torvalds if (platform)
7097f613c7dSKeith Owens return recover_from_platform_error(slidx, peidx, pbci, sos);
710396e8e76SRuss Anderson
7111da177e4SLinus Torvalds /*
7121da177e4SLinus Torvalds * On account of strange SAL error record, we cannot recover.
7131da177e4SLinus Torvalds */
71443ed3bafSHidetoshi Seto return fatal_mca("Strange SAL record");
7151da177e4SLinus Torvalds }
7161da177e4SLinus Torvalds
7171da177e4SLinus Torvalds /**
7181da177e4SLinus Torvalds * mca_try_to_recover - Try to recover from MCA
7191da177e4SLinus Torvalds * @rec: pointer to a SAL error record
72020305e59SHidetoshi Seto * @sos: pointer to hand off struct between SAL and OS
7211da177e4SLinus Torvalds *
7221da177e4SLinus Torvalds * Return value:
7231da177e4SLinus Torvalds * 1 on Success / 0 on Failure
7241da177e4SLinus Torvalds */
7251da177e4SLinus Torvalds
7261da177e4SLinus Torvalds static int
mca_try_to_recover(void * rec,struct ia64_sal_os_state * sos)72720305e59SHidetoshi Seto mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
7281da177e4SLinus Torvalds {
7291da177e4SLinus Torvalds int platform_err;
7301da177e4SLinus Torvalds int n_proc_err;
7311da177e4SLinus Torvalds slidx_table_t slidx;
7321da177e4SLinus Torvalds peidx_table_t peidx;
7331da177e4SLinus Torvalds pal_bus_check_info_t pbci;
7341da177e4SLinus Torvalds
7351da177e4SLinus Torvalds /* Make index of SAL error record */
7361da177e4SLinus Torvalds platform_err = mca_make_slidx(rec, &slidx);
7371da177e4SLinus Torvalds
7381da177e4SLinus Torvalds /* Count processor error sections */
7391da177e4SLinus Torvalds n_proc_err = slidx_count(&slidx, proc_err);
7401da177e4SLinus Torvalds
7411da177e4SLinus Torvalds /* Now, OS can recover when there is one processor error section */
7421da177e4SLinus Torvalds if (n_proc_err > 1)
74343ed3bafSHidetoshi Seto return fatal_mca("Too Many Errors");
74418997961SRuss Anderson else if (n_proc_err == 0)
74543ed3bafSHidetoshi Seto /* Weird SAL record ... We can't do anything */
74643ed3bafSHidetoshi Seto return fatal_mca("Weird SAL record");
7471da177e4SLinus Torvalds
7481da177e4SLinus Torvalds /* Make index of processor error section */
74920305e59SHidetoshi Seto mca_make_peidx((sal_log_processor_info_t*)
75020305e59SHidetoshi Seto slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
7511da177e4SLinus Torvalds
7521da177e4SLinus Torvalds /* Extract Processor BUS_CHECK[0] */
7531da177e4SLinus Torvalds *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
7541da177e4SLinus Torvalds
7551da177e4SLinus Torvalds /* Check whether MCA is global or not */
7567f613c7dSKeith Owens if (is_mca_global(&peidx, &pbci, sos))
75743ed3bafSHidetoshi Seto return fatal_mca("global MCA");
7581da177e4SLinus Torvalds
7591da177e4SLinus Torvalds /* Try to recover a processor error */
76020305e59SHidetoshi Seto return recover_from_processor_error(platform_err, &slidx, &peidx,
76120305e59SHidetoshi Seto &pbci, sos);
7621da177e4SLinus Torvalds }
7631da177e4SLinus Torvalds
7641da177e4SLinus Torvalds /*
7651da177e4SLinus Torvalds * =============================================================================
7661da177e4SLinus Torvalds */
7671da177e4SLinus Torvalds
mca_external_handler_init(void)7681da177e4SLinus Torvalds int __init mca_external_handler_init(void)
7691da177e4SLinus Torvalds {
7701da177e4SLinus Torvalds if (init_record_index_pools())
7711da177e4SLinus Torvalds return -ENOMEM;
7721da177e4SLinus Torvalds
7731da177e4SLinus Torvalds /* register external mca handlers */
7741da177e4SLinus Torvalds if (ia64_reg_MCA_extension(mca_try_to_recover)) {
7751da177e4SLinus Torvalds printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
7761da177e4SLinus Torvalds kfree(slidx_pool.buffer);
7771da177e4SLinus Torvalds return -EFAULT;
7781da177e4SLinus Torvalds }
7791da177e4SLinus Torvalds return 0;
7801da177e4SLinus Torvalds }
7811da177e4SLinus Torvalds
mca_external_handler_exit(void)7821da177e4SLinus Torvalds void __exit mca_external_handler_exit(void)
7831da177e4SLinus Torvalds {
7841da177e4SLinus Torvalds /* unregister external mca handlers */
7851da177e4SLinus Torvalds ia64_unreg_MCA_extension();
7861da177e4SLinus Torvalds kfree(slidx_pool.buffer);
7871da177e4SLinus Torvalds }
7881da177e4SLinus Torvalds
7891da177e4SLinus Torvalds module_init(mca_external_handler_init);
7901da177e4SLinus Torvalds module_exit(mca_external_handler_exit);
7911da177e4SLinus Torvalds
7921da177e4SLinus Torvalds module_param(sal_rec_max, int, 0644);
7931da177e4SLinus Torvalds MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
7941da177e4SLinus Torvalds
7951da177e4SLinus Torvalds MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
7961da177e4SLinus Torvalds MODULE_LICENSE("GPL");
797