1d9953105SMichael Ellerman /* 2d9953105SMichael Ellerman * Copyright (C) 2001 Dave Engebretsen IBM Corporation 3d9953105SMichael Ellerman * 4d9953105SMichael Ellerman * This program is free software; you can redistribute it and/or modify 5d9953105SMichael Ellerman * it under the terms of the GNU General Public License as published by 6d9953105SMichael Ellerman * the Free Software Foundation; either version 2 of the License, or 7d9953105SMichael Ellerman * (at your option) any later version. 8d9953105SMichael Ellerman * 9d9953105SMichael Ellerman * This program is distributed in the hope that it will be useful, 10d9953105SMichael Ellerman * but WITHOUT ANY WARRANTY; without even the implied warranty of 11d9953105SMichael Ellerman * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12d9953105SMichael Ellerman * GNU General Public License for more details. 13d9953105SMichael Ellerman * 14d9953105SMichael Ellerman * You should have received a copy of the GNU General Public License 15d9953105SMichael Ellerman * along with this program; if not, write to the Free Software 16d9953105SMichael Ellerman * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17d9953105SMichael Ellerman */ 18d9953105SMichael Ellerman 19d9953105SMichael Ellerman #include <linux/sched.h> 20d9953105SMichael Ellerman #include <linux/interrupt.h> 21d9953105SMichael Ellerman #include <linux/irq.h> 2290128997SAnton Blanchard #include <linux/of.h> 2355fc0c56SAnton Blanchard #include <linux/fs.h> 2455fc0c56SAnton Blanchard #include <linux/reboot.h> 2594675cceSMahesh Salgaonkar #include <linux/irq_work.h> 26d9953105SMichael Ellerman 27d9953105SMichael Ellerman #include <asm/machdep.h> 28d9953105SMichael Ellerman #include <asm/rtas.h> 298c4f1f29SMichael Ellerman #include <asm/firmware.h> 30a43c1590SMahesh Salgaonkar #include <asm/mce.h> 31d9953105SMichael Ellerman 32577830b0SMichael Ellerman #include "pseries.h" 33c902be71SArnd Bergmann 34d9953105SMichael Ellerman static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; 35d9953105SMichael Ellerman static DEFINE_SPINLOCK(ras_log_buf_lock); 36d9953105SMichael Ellerman 37d9953105SMichael Ellerman static int ras_check_exception_token; 38d9953105SMichael Ellerman 3994675cceSMahesh Salgaonkar static void mce_process_errlog_event(struct irq_work *work); 4094675cceSMahesh Salgaonkar static struct irq_work mce_errlog_process_work = { 4194675cceSMahesh Salgaonkar .func = mce_process_errlog_event, 4294675cceSMahesh Salgaonkar }; 4394675cceSMahesh Salgaonkar 44d9953105SMichael Ellerman #define EPOW_SENSOR_TOKEN 9 45d9953105SMichael Ellerman #define EPOW_SENSOR_INDEX 0 46d9953105SMichael Ellerman 47b4af279aSVipin K Parashar /* EPOW events counter variable */ 48b4af279aSVipin K Parashar static int num_epow_events; 49b4af279aSVipin K Parashar 50b7d9eb39SJohn Allen static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); 517d12e780SDavid Howells static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); 527d12e780SDavid Howells static irqreturn_t ras_error_interrupt(int irq, void *dev_id); 53d9953105SMichael Ellerman 5404fce21cSMahesh Salgaonkar /* RTAS pseries MCE errorlog section. */ 5504fce21cSMahesh Salgaonkar struct pseries_mc_errorlog { 5604fce21cSMahesh Salgaonkar __be32 fru_id; 5704fce21cSMahesh Salgaonkar __be32 proc_id; 5804fce21cSMahesh Salgaonkar u8 error_type; 5904fce21cSMahesh Salgaonkar /* 6004fce21cSMahesh Salgaonkar * sub_err_type (1 byte). Bit fields depends on error_type 6104fce21cSMahesh Salgaonkar * 6204fce21cSMahesh Salgaonkar * MSB0 6304fce21cSMahesh Salgaonkar * | 6404fce21cSMahesh Salgaonkar * V 6504fce21cSMahesh Salgaonkar * 01234567 6604fce21cSMahesh Salgaonkar * XXXXXXXX 6704fce21cSMahesh Salgaonkar * 6804fce21cSMahesh Salgaonkar * For error_type == MC_ERROR_TYPE_UE 6904fce21cSMahesh Salgaonkar * XXXXXXXX 7004fce21cSMahesh Salgaonkar * X 1: Permanent or Transient UE. 7104fce21cSMahesh Salgaonkar * X 1: Effective address provided. 7204fce21cSMahesh Salgaonkar * X 1: Logical address provided. 7304fce21cSMahesh Salgaonkar * XX 2: Reserved. 7404fce21cSMahesh Salgaonkar * XXX 3: Type of UE error. 7504fce21cSMahesh Salgaonkar * 7604fce21cSMahesh Salgaonkar * For error_type != MC_ERROR_TYPE_UE 7704fce21cSMahesh Salgaonkar * XXXXXXXX 7804fce21cSMahesh Salgaonkar * X 1: Effective address provided. 7904fce21cSMahesh Salgaonkar * XXXXX 5: Reserved. 8004fce21cSMahesh Salgaonkar * XX 2: Type of SLB/ERAT/TLB error. 8104fce21cSMahesh Salgaonkar */ 8204fce21cSMahesh Salgaonkar u8 sub_err_type; 8304fce21cSMahesh Salgaonkar u8 reserved_1[6]; 8404fce21cSMahesh Salgaonkar __be64 effective_address; 8504fce21cSMahesh Salgaonkar __be64 logical_address; 8604fce21cSMahesh Salgaonkar } __packed; 8704fce21cSMahesh Salgaonkar 8804fce21cSMahesh Salgaonkar /* RTAS pseries MCE error types */ 8904fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_UE 0x00 9004fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_SLB 0x01 9104fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_ERAT 0x02 9204fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_TLB 0x04 9304fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_D_CACHE 0x05 9404fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_I_CACHE 0x07 9504fce21cSMahesh Salgaonkar 9604fce21cSMahesh Salgaonkar /* RTAS pseries MCE error sub types */ 9704fce21cSMahesh Salgaonkar #define MC_ERROR_UE_INDETERMINATE 0 9804fce21cSMahesh Salgaonkar #define MC_ERROR_UE_IFETCH 1 9904fce21cSMahesh Salgaonkar #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 10004fce21cSMahesh Salgaonkar #define MC_ERROR_UE_LOAD_STORE 3 10104fce21cSMahesh Salgaonkar #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 10204fce21cSMahesh Salgaonkar 10304fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_PARITY 0 10404fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_MULTIHIT 1 10504fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_INDETERMINATE 2 10604fce21cSMahesh Salgaonkar 10704fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_PARITY 1 10804fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_MULTIHIT 2 10904fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_INDETERMINATE 3 11004fce21cSMahesh Salgaonkar 11104fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_PARITY 1 11204fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_MULTIHIT 2 11304fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_INDETERMINATE 3 11404fce21cSMahesh Salgaonkar 11504fce21cSMahesh Salgaonkar static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) 11604fce21cSMahesh Salgaonkar { 11704fce21cSMahesh Salgaonkar switch (mlog->error_type) { 11804fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_UE: 11904fce21cSMahesh Salgaonkar return (mlog->sub_err_type & 0x07); 12004fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_SLB: 12104fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_ERAT: 12204fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_TLB: 12304fce21cSMahesh Salgaonkar return (mlog->sub_err_type & 0x03); 12404fce21cSMahesh Salgaonkar default: 12504fce21cSMahesh Salgaonkar return 0; 12604fce21cSMahesh Salgaonkar } 12704fce21cSMahesh Salgaonkar } 12804fce21cSMahesh Salgaonkar 12904fce21cSMahesh Salgaonkar static 13004fce21cSMahesh Salgaonkar inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog) 13104fce21cSMahesh Salgaonkar { 13204fce21cSMahesh Salgaonkar __be64 addr = 0; 13304fce21cSMahesh Salgaonkar 13404fce21cSMahesh Salgaonkar switch (mlog->error_type) { 13504fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_UE: 13604fce21cSMahesh Salgaonkar if (mlog->sub_err_type & 0x40) 13704fce21cSMahesh Salgaonkar addr = mlog->effective_address; 13804fce21cSMahesh Salgaonkar break; 13904fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_SLB: 14004fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_ERAT: 14104fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_TLB: 14204fce21cSMahesh Salgaonkar if (mlog->sub_err_type & 0x80) 14304fce21cSMahesh Salgaonkar addr = mlog->effective_address; 14404fce21cSMahesh Salgaonkar default: 14504fce21cSMahesh Salgaonkar break; 14604fce21cSMahesh Salgaonkar } 14704fce21cSMahesh Salgaonkar return be64_to_cpu(addr); 14804fce21cSMahesh Salgaonkar } 1490ebfff14SBenjamin Herrenschmidt 150d9953105SMichael Ellerman /* 151c9dccf1dSSam Bobroff * Enable the hotplug interrupt late because processing them may touch other 152c9dccf1dSSam Bobroff * devices or systems (e.g. hugepages) that have not been initialized at the 153c9dccf1dSSam Bobroff * subsys stage. 154c9dccf1dSSam Bobroff */ 155c9dccf1dSSam Bobroff int __init init_ras_hotplug_IRQ(void) 156c9dccf1dSSam Bobroff { 157c9dccf1dSSam Bobroff struct device_node *np; 158c9dccf1dSSam Bobroff 159c9dccf1dSSam Bobroff /* Hotplug Events */ 160c9dccf1dSSam Bobroff np = of_find_node_by_path("/event-sources/hot-plug-events"); 161c9dccf1dSSam Bobroff if (np != NULL) { 162c9dccf1dSSam Bobroff if (dlpar_workqueue_init() == 0) 163c9dccf1dSSam Bobroff request_event_sources_irqs(np, ras_hotplug_interrupt, 164c9dccf1dSSam Bobroff "RAS_HOTPLUG"); 165c9dccf1dSSam Bobroff of_node_put(np); 166c9dccf1dSSam Bobroff } 167c9dccf1dSSam Bobroff 168c9dccf1dSSam Bobroff return 0; 169c9dccf1dSSam Bobroff } 170c9dccf1dSSam Bobroff machine_late_initcall(pseries, init_ras_hotplug_IRQ); 171c9dccf1dSSam Bobroff 172c9dccf1dSSam Bobroff /* 173d9953105SMichael Ellerman * Initialize handlers for the set of interrupts caused by hardware errors 174d9953105SMichael Ellerman * and power system events. 175d9953105SMichael Ellerman */ 176d9953105SMichael Ellerman static int __init init_ras_IRQ(void) 177d9953105SMichael Ellerman { 178d9953105SMichael Ellerman struct device_node *np; 179d9953105SMichael Ellerman 180d9953105SMichael Ellerman ras_check_exception_token = rtas_token("check-exception"); 181d9953105SMichael Ellerman 182d9953105SMichael Ellerman /* Internal Errors */ 183d9953105SMichael Ellerman np = of_find_node_by_path("/event-sources/internal-errors"); 184d9953105SMichael Ellerman if (np != NULL) { 18532c96f77SMark Nelson request_event_sources_irqs(np, ras_error_interrupt, 18632c96f77SMark Nelson "RAS_ERROR"); 187d9953105SMichael Ellerman of_node_put(np); 188d9953105SMichael Ellerman } 189d9953105SMichael Ellerman 190d9953105SMichael Ellerman /* EPOW Events */ 191d9953105SMichael Ellerman np = of_find_node_by_path("/event-sources/epow-events"); 192d9953105SMichael Ellerman if (np != NULL) { 19332c96f77SMark Nelson request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW"); 194d9953105SMichael Ellerman of_node_put(np); 195d9953105SMichael Ellerman } 196d9953105SMichael Ellerman 19769ed3324SAnton Blanchard return 0; 198d9953105SMichael Ellerman } 1998e83e905SMichael Ellerman machine_subsys_initcall(pseries, init_ras_IRQ); 200d9953105SMichael Ellerman 20155fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_NORMAL 1 20255fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_ON_UPS 2 20355fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 20455fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 20555fc0c56SAnton Blanchard 20655fc0c56SAnton Blanchard static void handle_system_shutdown(char event_modifier) 20755fc0c56SAnton Blanchard { 20855fc0c56SAnton Blanchard switch (event_modifier) { 20955fc0c56SAnton Blanchard case EPOW_SHUTDOWN_NORMAL: 210b4af279aSVipin K Parashar pr_emerg("Power off requested\n"); 2111b7e0cbeSliguang orderly_poweroff(true); 21255fc0c56SAnton Blanchard break; 21355fc0c56SAnton Blanchard 21455fc0c56SAnton Blanchard case EPOW_SHUTDOWN_ON_UPS: 215b4af279aSVipin K Parashar pr_emerg("Loss of system power detected. System is running on" 216b4af279aSVipin K Parashar " UPS/battery. Check RTAS error log for details\n"); 21779872e35SAnshuman Khandual orderly_poweroff(true); 21855fc0c56SAnton Blanchard break; 21955fc0c56SAnton Blanchard 22055fc0c56SAnton Blanchard case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: 221b4af279aSVipin K Parashar pr_emerg("Loss of system critical functions detected. Check" 222b4af279aSVipin K Parashar " RTAS error log for details\n"); 2231b7e0cbeSliguang orderly_poweroff(true); 22455fc0c56SAnton Blanchard break; 22555fc0c56SAnton Blanchard 22655fc0c56SAnton Blanchard case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: 227b4af279aSVipin K Parashar pr_emerg("High ambient temperature detected. Check RTAS" 228b4af279aSVipin K Parashar " error log for details\n"); 2291b7e0cbeSliguang orderly_poweroff(true); 23055fc0c56SAnton Blanchard break; 23155fc0c56SAnton Blanchard 23255fc0c56SAnton Blanchard default: 233b4af279aSVipin K Parashar pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", 23455fc0c56SAnton Blanchard event_modifier); 23555fc0c56SAnton Blanchard } 23655fc0c56SAnton Blanchard } 23755fc0c56SAnton Blanchard 23855fc0c56SAnton Blanchard struct epow_errorlog { 23955fc0c56SAnton Blanchard unsigned char sensor_value; 24055fc0c56SAnton Blanchard unsigned char event_modifier; 24155fc0c56SAnton Blanchard unsigned char extended_modifier; 24255fc0c56SAnton Blanchard unsigned char reserved; 24355fc0c56SAnton Blanchard unsigned char platform_reason; 24455fc0c56SAnton Blanchard }; 24555fc0c56SAnton Blanchard 24655fc0c56SAnton Blanchard #define EPOW_RESET 0 24755fc0c56SAnton Blanchard #define EPOW_WARN_COOLING 1 24855fc0c56SAnton Blanchard #define EPOW_WARN_POWER 2 24955fc0c56SAnton Blanchard #define EPOW_SYSTEM_SHUTDOWN 3 25055fc0c56SAnton Blanchard #define EPOW_SYSTEM_HALT 4 25155fc0c56SAnton Blanchard #define EPOW_MAIN_ENCLOSURE 5 25255fc0c56SAnton Blanchard #define EPOW_POWER_OFF 7 25355fc0c56SAnton Blanchard 254e51df2c1SAnton Blanchard static void rtas_parse_epow_errlog(struct rtas_error_log *log) 25555fc0c56SAnton Blanchard { 25655fc0c56SAnton Blanchard struct pseries_errorlog *pseries_log; 25755fc0c56SAnton Blanchard struct epow_errorlog *epow_log; 25855fc0c56SAnton Blanchard char action_code; 25955fc0c56SAnton Blanchard char modifier; 26055fc0c56SAnton Blanchard 26155fc0c56SAnton Blanchard pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); 26255fc0c56SAnton Blanchard if (pseries_log == NULL) 26355fc0c56SAnton Blanchard return; 26455fc0c56SAnton Blanchard 26555fc0c56SAnton Blanchard epow_log = (struct epow_errorlog *)pseries_log->data; 26655fc0c56SAnton Blanchard action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ 26755fc0c56SAnton Blanchard modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ 26855fc0c56SAnton Blanchard 26955fc0c56SAnton Blanchard switch (action_code) { 27055fc0c56SAnton Blanchard case EPOW_RESET: 271b4af279aSVipin K Parashar if (num_epow_events) { 272b4af279aSVipin K Parashar pr_info("Non critical power/cooling issue cleared\n"); 273b4af279aSVipin K Parashar num_epow_events--; 274b4af279aSVipin K Parashar } 27555fc0c56SAnton Blanchard break; 27655fc0c56SAnton Blanchard 27755fc0c56SAnton Blanchard case EPOW_WARN_COOLING: 278b4af279aSVipin K Parashar pr_info("Non-critical cooling issue detected. Check RTAS error" 279b4af279aSVipin K Parashar " log for details\n"); 28055fc0c56SAnton Blanchard break; 28155fc0c56SAnton Blanchard 28255fc0c56SAnton Blanchard case EPOW_WARN_POWER: 283b4af279aSVipin K Parashar pr_info("Non-critical power issue detected. Check RTAS error" 284b4af279aSVipin K Parashar " log for details\n"); 28555fc0c56SAnton Blanchard break; 28655fc0c56SAnton Blanchard 28755fc0c56SAnton Blanchard case EPOW_SYSTEM_SHUTDOWN: 28855fc0c56SAnton Blanchard handle_system_shutdown(epow_log->event_modifier); 28955fc0c56SAnton Blanchard break; 29055fc0c56SAnton Blanchard 29155fc0c56SAnton Blanchard case EPOW_SYSTEM_HALT: 292b4af279aSVipin K Parashar pr_emerg("Critical power/cooling issue detected. Check RTAS" 293b4af279aSVipin K Parashar " error log for details. Powering off.\n"); 2941b7e0cbeSliguang orderly_poweroff(true); 29555fc0c56SAnton Blanchard break; 29655fc0c56SAnton Blanchard 29755fc0c56SAnton Blanchard case EPOW_MAIN_ENCLOSURE: 29855fc0c56SAnton Blanchard case EPOW_POWER_OFF: 299b4af279aSVipin K Parashar pr_emerg("System about to lose power. Check RTAS error log " 300b4af279aSVipin K Parashar " for details. Powering off immediately.\n"); 30155fc0c56SAnton Blanchard emergency_sync(); 30255fc0c56SAnton Blanchard kernel_power_off(); 30355fc0c56SAnton Blanchard break; 30455fc0c56SAnton Blanchard 30555fc0c56SAnton Blanchard default: 306b4af279aSVipin K Parashar pr_err("Unknown power/cooling event (action code = %d)\n", 30755fc0c56SAnton Blanchard action_code); 30855fc0c56SAnton Blanchard } 309b4af279aSVipin K Parashar 310b4af279aSVipin K Parashar /* Increment epow events counter variable */ 311b4af279aSVipin K Parashar if (action_code != EPOW_RESET) 312b4af279aSVipin K Parashar num_epow_events++; 31355fc0c56SAnton Blanchard } 31455fc0c56SAnton Blanchard 315b7d9eb39SJohn Allen static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) 316b7d9eb39SJohn Allen { 317b7d9eb39SJohn Allen struct pseries_errorlog *pseries_log; 318b7d9eb39SJohn Allen struct pseries_hp_errorlog *hp_elog; 319b7d9eb39SJohn Allen 320b7d9eb39SJohn Allen spin_lock(&ras_log_buf_lock); 321b7d9eb39SJohn Allen 322b7d9eb39SJohn Allen rtas_call(ras_check_exception_token, 6, 1, NULL, 323b7d9eb39SJohn Allen RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), 324b7d9eb39SJohn Allen RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), 325b7d9eb39SJohn Allen rtas_get_error_log_max()); 326b7d9eb39SJohn Allen 327b7d9eb39SJohn Allen pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, 328b7d9eb39SJohn Allen PSERIES_ELOG_SECT_ID_HOTPLUG); 329b7d9eb39SJohn Allen hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; 330b7d9eb39SJohn Allen 331b7d9eb39SJohn Allen /* 332b7d9eb39SJohn Allen * Since PCI hotplug is not currently supported on pseries, put PCI 333b7d9eb39SJohn Allen * hotplug events on the ras_log_buf to be handled by rtas_errd. 334b7d9eb39SJohn Allen */ 335b7d9eb39SJohn Allen if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || 336*4c5d87dbSOliver O'Halloran hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || 337*4c5d87dbSOliver O'Halloran hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) 338fd12527aSNathan Fontenot queue_hotplug_event(hp_elog); 339b7d9eb39SJohn Allen else 340b7d9eb39SJohn Allen log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 341b7d9eb39SJohn Allen 342b7d9eb39SJohn Allen spin_unlock(&ras_log_buf_lock); 343b7d9eb39SJohn Allen return IRQ_HANDLED; 344b7d9eb39SJohn Allen } 345b7d9eb39SJohn Allen 34655fc0c56SAnton Blanchard /* Handle environmental and power warning (EPOW) interrupts. */ 3477d12e780SDavid Howells static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) 348d9953105SMichael Ellerman { 34955fc0c56SAnton Blanchard int status; 35055fc0c56SAnton Blanchard int state; 351d9953105SMichael Ellerman int critical; 352d9953105SMichael Ellerman 3531c2cb594SThomas Huth status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, 3541c2cb594SThomas Huth &state); 355d9953105SMichael Ellerman 356d9953105SMichael Ellerman if (state > 3) 357d9953105SMichael Ellerman critical = 1; /* Time Critical */ 358d9953105SMichael Ellerman else 359d9953105SMichael Ellerman critical = 0; 360d9953105SMichael Ellerman 361d9953105SMichael Ellerman spin_lock(&ras_log_buf_lock); 362d9953105SMichael Ellerman 363d9953105SMichael Ellerman status = rtas_call(ras_check_exception_token, 6, 1, NULL, 364b08e281bSMark Nelson RTAS_VECTOR_EXTERNAL_INTERRUPT, 365476eb491SGrant Likely virq_to_hw(irq), 3666f43747fSAnton Blanchard RTAS_EPOW_WARNING, 367d9953105SMichael Ellerman critical, __pa(&ras_log_buf), 368d9953105SMichael Ellerman rtas_get_error_log_max()); 369d9953105SMichael Ellerman 370d9953105SMichael Ellerman log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 371d9953105SMichael Ellerman 37255fc0c56SAnton Blanchard rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); 37355fc0c56SAnton Blanchard 374d9953105SMichael Ellerman spin_unlock(&ras_log_buf_lock); 375d9953105SMichael Ellerman return IRQ_HANDLED; 376d9953105SMichael Ellerman } 377d9953105SMichael Ellerman 378d9953105SMichael Ellerman /* 379d9953105SMichael Ellerman * Handle hardware error interrupts. 380d9953105SMichael Ellerman * 381d9953105SMichael Ellerman * RTAS check-exception is called to collect data on the exception. If 382d9953105SMichael Ellerman * the error is deemed recoverable, we log a warning and return. 383d9953105SMichael Ellerman * For nonrecoverable errors, an error is logged and we stop all processing 384d9953105SMichael Ellerman * as quickly as possible in order to prevent propagation of the failure. 385d9953105SMichael Ellerman */ 3867d12e780SDavid Howells static irqreturn_t ras_error_interrupt(int irq, void *dev_id) 387d9953105SMichael Ellerman { 388d9953105SMichael Ellerman struct rtas_error_log *rtas_elog; 389cc8b5263SAnton Blanchard int status; 390d9953105SMichael Ellerman int fatal; 391d9953105SMichael Ellerman 392d9953105SMichael Ellerman spin_lock(&ras_log_buf_lock); 393d9953105SMichael Ellerman 394d9953105SMichael Ellerman status = rtas_call(ras_check_exception_token, 6, 1, NULL, 395b08e281bSMark Nelson RTAS_VECTOR_EXTERNAL_INTERRUPT, 396476eb491SGrant Likely virq_to_hw(irq), 397d9953105SMichael Ellerman RTAS_INTERNAL_ERROR, 1 /* Time Critical */, 398d9953105SMichael Ellerman __pa(&ras_log_buf), 399d9953105SMichael Ellerman rtas_get_error_log_max()); 400d9953105SMichael Ellerman 401d9953105SMichael Ellerman rtas_elog = (struct rtas_error_log *)ras_log_buf; 402d9953105SMichael Ellerman 403a08a53eaSGreg Kurz if (status == 0 && 404a08a53eaSGreg Kurz rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) 405d9953105SMichael Ellerman fatal = 1; 406d9953105SMichael Ellerman else 407d9953105SMichael Ellerman fatal = 0; 408d9953105SMichael Ellerman 409d9953105SMichael Ellerman /* format and print the extended information */ 410d9953105SMichael Ellerman log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); 411d9953105SMichael Ellerman 412d9953105SMichael Ellerman if (fatal) { 413b4af279aSVipin K Parashar pr_emerg("Fatal hardware error detected. Check RTAS error" 414b4af279aSVipin K Parashar " log for details. Powering off immediately\n"); 415cc8b5263SAnton Blanchard emergency_sync(); 416cc8b5263SAnton Blanchard kernel_power_off(); 417d9953105SMichael Ellerman } else { 418b4af279aSVipin K Parashar pr_err("Recoverable hardware error detected\n"); 419d9953105SMichael Ellerman } 420d9953105SMichael Ellerman 421d9953105SMichael Ellerman spin_unlock(&ras_log_buf_lock); 422d9953105SMichael Ellerman return IRQ_HANDLED; 423d9953105SMichael Ellerman } 424d9953105SMichael Ellerman 425d368514cSAnton Blanchard /* 426d368514cSAnton Blanchard * Some versions of FWNMI place the buffer inside the 4kB page starting at 427d368514cSAnton Blanchard * 0x7000. Other versions place it inside the rtas buffer. We check both. 428d368514cSAnton Blanchard */ 429d368514cSAnton Blanchard #define VALID_FWNMI_BUFFER(A) \ 430d368514cSAnton Blanchard ((((A) >= 0x7000) && ((A) < 0x7ff0)) || \ 431d368514cSAnton Blanchard (((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16)))) 432d368514cSAnton Blanchard 43394675cceSMahesh Salgaonkar static inline struct rtas_error_log *fwnmi_get_errlog(void) 43494675cceSMahesh Salgaonkar { 43594675cceSMahesh Salgaonkar return (struct rtas_error_log *)local_paca->mce_data_buf; 43694675cceSMahesh Salgaonkar } 43794675cceSMahesh Salgaonkar 438d368514cSAnton Blanchard /* 439d368514cSAnton Blanchard * Get the error information for errors coming through the 440d9953105SMichael Ellerman * FWNMI vectors. The pt_regs' r3 will be updated to reflect 441d9953105SMichael Ellerman * the actual r3 if possible, and a ptr to the error log entry 442d9953105SMichael Ellerman * will be returned if found. 443d9953105SMichael Ellerman * 44494675cceSMahesh Salgaonkar * Use one buffer mce_data_buf per cpu to store RTAS error. 445d368514cSAnton Blanchard * 44694675cceSMahesh Salgaonkar * The mce_data_buf does not have any locks or protection around it, 447d9953105SMichael Ellerman * if a second machine check comes in, or a system reset is done 448d9953105SMichael Ellerman * before we have logged the error, then we will get corruption in the 449d9953105SMichael Ellerman * error log. This is preferable over holding off on calling 450d9953105SMichael Ellerman * ibm,nmi-interlock which would result in us checkstopping if a 451d9953105SMichael Ellerman * second machine check did come in. 452d9953105SMichael Ellerman */ 453d9953105SMichael Ellerman static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) 454d9953105SMichael Ellerman { 455d9953105SMichael Ellerman unsigned long *savep; 45694675cceSMahesh Salgaonkar struct rtas_error_log *h; 457d9953105SMichael Ellerman 458ee1dd1e3SMahesh Salgaonkar /* Mask top two bits */ 459ee1dd1e3SMahesh Salgaonkar regs->gpr[3] &= ~(0x3UL << 62); 460ee1dd1e3SMahesh Salgaonkar 461d368514cSAnton Blanchard if (!VALID_FWNMI_BUFFER(regs->gpr[3])) { 462f0e939aeSAnton Blanchard printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); 463d368514cSAnton Blanchard return NULL; 464d9953105SMichael Ellerman } 465d368514cSAnton Blanchard 466d368514cSAnton Blanchard savep = __va(regs->gpr[3]); 467cd813e1cSMahesh Salgaonkar regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ 468d368514cSAnton Blanchard 469d368514cSAnton Blanchard h = (struct rtas_error_log *)&savep[1]; 47094675cceSMahesh Salgaonkar /* Use the per cpu buffer from paca to store rtas error log */ 47194675cceSMahesh Salgaonkar memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); 472a08a53eaSGreg Kurz if (!rtas_error_extended(h)) { 47394675cceSMahesh Salgaonkar memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); 474d368514cSAnton Blanchard } else { 475a08a53eaSGreg Kurz int len, error_log_length; 476d368514cSAnton Blanchard 477a08a53eaSGreg Kurz error_log_length = 8 + rtas_error_extended_log_length(h); 47874e96bf4SMahesh Salgaonkar len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); 47994675cceSMahesh Salgaonkar memcpy(local_paca->mce_data_buf, h, len); 480d368514cSAnton Blanchard } 481d368514cSAnton Blanchard 48294675cceSMahesh Salgaonkar return (struct rtas_error_log *)local_paca->mce_data_buf; 483d9953105SMichael Ellerman } 484d9953105SMichael Ellerman 485d9953105SMichael Ellerman /* Call this when done with the data returned by FWNMI_get_errinfo. 486d9953105SMichael Ellerman * It will release the saved data area for other CPUs in the 487d9953105SMichael Ellerman * partition to receive FWNMI errors. 488d9953105SMichael Ellerman */ 489d9953105SMichael Ellerman static void fwnmi_release_errinfo(void) 490d9953105SMichael Ellerman { 491d9953105SMichael Ellerman int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL); 492d9953105SMichael Ellerman if (ret != 0) 493d368514cSAnton Blanchard printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); 494d9953105SMichael Ellerman } 495d9953105SMichael Ellerman 496c902be71SArnd Bergmann int pSeries_system_reset_exception(struct pt_regs *regs) 497d9953105SMichael Ellerman { 498bded0706SNicholas Piggin #ifdef __LITTLE_ENDIAN__ 499bded0706SNicholas Piggin /* 500bded0706SNicholas Piggin * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try 501bded0706SNicholas Piggin * to detect the bad SRR1 pattern here. Flip the NIP back to correct 502bded0706SNicholas Piggin * endian for reporting purposes. Unfortunately the MSR can't be fixed, 503bded0706SNicholas Piggin * so clear it. It will be missing MSR_RI so we won't try to recover. 504bded0706SNicholas Piggin */ 505bded0706SNicholas Piggin if ((be64_to_cpu(regs->msr) & 506bded0706SNicholas Piggin (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| 507bded0706SNicholas Piggin MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { 508bded0706SNicholas Piggin regs->nip = be64_to_cpu((__be64)regs->nip); 509bded0706SNicholas Piggin regs->msr = 0; 510bded0706SNicholas Piggin } 511bded0706SNicholas Piggin #endif 512bded0706SNicholas Piggin 513d9953105SMichael Ellerman if (fwnmi_active) { 514d9953105SMichael Ellerman struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs); 515d9953105SMichael Ellerman if (errhdr) { 516d9953105SMichael Ellerman /* XXX Should look at FWNMI information */ 517d9953105SMichael Ellerman } 518d9953105SMichael Ellerman fwnmi_release_errinfo(); 519d9953105SMichael Ellerman } 520102c05e8SNicholas Piggin 521102c05e8SNicholas Piggin if (smp_handle_nmi_ipi(regs)) 522102c05e8SNicholas Piggin return 1; 523102c05e8SNicholas Piggin 524c902be71SArnd Bergmann return 0; /* need to perform reset */ 525d9953105SMichael Ellerman } 526d9953105SMichael Ellerman 5278f0b8056SMahesh Salgaonkar #define VAL_TO_STRING(ar, val) \ 5288f0b8056SMahesh Salgaonkar (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown") 5298f0b8056SMahesh Salgaonkar 5308f0b8056SMahesh Salgaonkar static void pseries_print_mce_info(struct pt_regs *regs, 5318f0b8056SMahesh Salgaonkar struct rtas_error_log *errp) 5328f0b8056SMahesh Salgaonkar { 5338f0b8056SMahesh Salgaonkar const char *level, *sevstr; 5348f0b8056SMahesh Salgaonkar struct pseries_errorlog *pseries_log; 5358f0b8056SMahesh Salgaonkar struct pseries_mc_errorlog *mce_log; 5368f0b8056SMahesh Salgaonkar u8 error_type, err_sub_type; 5378f0b8056SMahesh Salgaonkar u64 addr; 5388f0b8056SMahesh Salgaonkar u8 initiator = rtas_error_initiator(errp); 5398f0b8056SMahesh Salgaonkar int disposition = rtas_error_disposition(errp); 5408f0b8056SMahesh Salgaonkar 5418f0b8056SMahesh Salgaonkar static const char * const initiators[] = { 5428f0b8056SMahesh Salgaonkar "Unknown", 5438f0b8056SMahesh Salgaonkar "CPU", 5448f0b8056SMahesh Salgaonkar "PCI", 5458f0b8056SMahesh Salgaonkar "ISA", 5468f0b8056SMahesh Salgaonkar "Memory", 5478f0b8056SMahesh Salgaonkar "Power Mgmt", 5488f0b8056SMahesh Salgaonkar }; 5498f0b8056SMahesh Salgaonkar static const char * const mc_err_types[] = { 5508f0b8056SMahesh Salgaonkar "UE", 5518f0b8056SMahesh Salgaonkar "SLB", 5528f0b8056SMahesh Salgaonkar "ERAT", 5538f0b8056SMahesh Salgaonkar "TLB", 5548f0b8056SMahesh Salgaonkar "D-Cache", 5558f0b8056SMahesh Salgaonkar "Unknown", 5568f0b8056SMahesh Salgaonkar "I-Cache", 5578f0b8056SMahesh Salgaonkar }; 5588f0b8056SMahesh Salgaonkar static const char * const mc_ue_types[] = { 5598f0b8056SMahesh Salgaonkar "Indeterminate", 5608f0b8056SMahesh Salgaonkar "Instruction fetch", 5618f0b8056SMahesh Salgaonkar "Page table walk ifetch", 5628f0b8056SMahesh Salgaonkar "Load/Store", 5638f0b8056SMahesh Salgaonkar "Page table walk Load/Store", 5648f0b8056SMahesh Salgaonkar }; 5658f0b8056SMahesh Salgaonkar 5668f0b8056SMahesh Salgaonkar /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ 5678f0b8056SMahesh Salgaonkar static const char * const mc_slb_types[] = { 5688f0b8056SMahesh Salgaonkar "Parity", 5698f0b8056SMahesh Salgaonkar "Multihit", 5708f0b8056SMahesh Salgaonkar "Indeterminate", 5718f0b8056SMahesh Salgaonkar }; 5728f0b8056SMahesh Salgaonkar 5738f0b8056SMahesh Salgaonkar /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ 5748f0b8056SMahesh Salgaonkar static const char * const mc_soft_types[] = { 5758f0b8056SMahesh Salgaonkar "Unknown", 5768f0b8056SMahesh Salgaonkar "Parity", 5778f0b8056SMahesh Salgaonkar "Multihit", 5788f0b8056SMahesh Salgaonkar "Indeterminate", 5798f0b8056SMahesh Salgaonkar }; 5808f0b8056SMahesh Salgaonkar 5818f0b8056SMahesh Salgaonkar if (!rtas_error_extended(errp)) { 5828f0b8056SMahesh Salgaonkar pr_err("Machine check interrupt: Missing extended error log\n"); 5838f0b8056SMahesh Salgaonkar return; 5848f0b8056SMahesh Salgaonkar } 5858f0b8056SMahesh Salgaonkar 5868f0b8056SMahesh Salgaonkar pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); 5878f0b8056SMahesh Salgaonkar if (pseries_log == NULL) 5888f0b8056SMahesh Salgaonkar return; 5898f0b8056SMahesh Salgaonkar 5908f0b8056SMahesh Salgaonkar mce_log = (struct pseries_mc_errorlog *)pseries_log->data; 5918f0b8056SMahesh Salgaonkar 5928f0b8056SMahesh Salgaonkar error_type = mce_log->error_type; 5938f0b8056SMahesh Salgaonkar err_sub_type = rtas_mc_error_sub_type(mce_log); 5948f0b8056SMahesh Salgaonkar 5958f0b8056SMahesh Salgaonkar switch (rtas_error_severity(errp)) { 5968f0b8056SMahesh Salgaonkar case RTAS_SEVERITY_NO_ERROR: 5978f0b8056SMahesh Salgaonkar level = KERN_INFO; 5988f0b8056SMahesh Salgaonkar sevstr = "Harmless"; 5998f0b8056SMahesh Salgaonkar break; 6008f0b8056SMahesh Salgaonkar case RTAS_SEVERITY_WARNING: 6018f0b8056SMahesh Salgaonkar level = KERN_WARNING; 6028f0b8056SMahesh Salgaonkar sevstr = ""; 6038f0b8056SMahesh Salgaonkar break; 6048f0b8056SMahesh Salgaonkar case RTAS_SEVERITY_ERROR: 6058f0b8056SMahesh Salgaonkar case RTAS_SEVERITY_ERROR_SYNC: 6068f0b8056SMahesh Salgaonkar level = KERN_ERR; 6078f0b8056SMahesh Salgaonkar sevstr = "Severe"; 6088f0b8056SMahesh Salgaonkar break; 6098f0b8056SMahesh Salgaonkar case RTAS_SEVERITY_FATAL: 6108f0b8056SMahesh Salgaonkar default: 6118f0b8056SMahesh Salgaonkar level = KERN_ERR; 6128f0b8056SMahesh Salgaonkar sevstr = "Fatal"; 6138f0b8056SMahesh Salgaonkar break; 6148f0b8056SMahesh Salgaonkar } 6158f0b8056SMahesh Salgaonkar 616c6d15258SMahesh Salgaonkar #ifdef CONFIG_PPC_BOOK3S_64 617c6d15258SMahesh Salgaonkar /* Display faulty slb contents for SLB errors. */ 618c6d15258SMahesh Salgaonkar if (error_type == MC_ERROR_TYPE_SLB) 619c6d15258SMahesh Salgaonkar slb_dump_contents(local_paca->mce_faulty_slbs); 620c6d15258SMahesh Salgaonkar #endif 621c6d15258SMahesh Salgaonkar 6228f0b8056SMahesh Salgaonkar printk("%s%s Machine check interrupt [%s]\n", level, sevstr, 6238f0b8056SMahesh Salgaonkar disposition == RTAS_DISP_FULLY_RECOVERED ? 6248f0b8056SMahesh Salgaonkar "Recovered" : "Not recovered"); 6258f0b8056SMahesh Salgaonkar if (user_mode(regs)) { 6268f0b8056SMahesh Salgaonkar printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level, 6278f0b8056SMahesh Salgaonkar regs->nip, current->pid, current->comm); 6288f0b8056SMahesh Salgaonkar } else { 6298f0b8056SMahesh Salgaonkar printk("%s NIP [%016lx]: %pS\n", level, regs->nip, 6308f0b8056SMahesh Salgaonkar (void *)regs->nip); 6318f0b8056SMahesh Salgaonkar } 6328f0b8056SMahesh Salgaonkar printk("%s Initiator: %s\n", level, 6338f0b8056SMahesh Salgaonkar VAL_TO_STRING(initiators, initiator)); 6348f0b8056SMahesh Salgaonkar 6358f0b8056SMahesh Salgaonkar switch (error_type) { 6368f0b8056SMahesh Salgaonkar case MC_ERROR_TYPE_UE: 6378f0b8056SMahesh Salgaonkar printk("%s Error type: %s [%s]\n", level, 6388f0b8056SMahesh Salgaonkar VAL_TO_STRING(mc_err_types, error_type), 6398f0b8056SMahesh Salgaonkar VAL_TO_STRING(mc_ue_types, err_sub_type)); 6408f0b8056SMahesh Salgaonkar break; 6418f0b8056SMahesh Salgaonkar case MC_ERROR_TYPE_SLB: 6428f0b8056SMahesh Salgaonkar printk("%s Error type: %s [%s]\n", level, 6438f0b8056SMahesh Salgaonkar VAL_TO_STRING(mc_err_types, error_type), 6448f0b8056SMahesh Salgaonkar VAL_TO_STRING(mc_slb_types, err_sub_type)); 6458f0b8056SMahesh Salgaonkar break; 6468f0b8056SMahesh Salgaonkar case MC_ERROR_TYPE_ERAT: 6478f0b8056SMahesh Salgaonkar case MC_ERROR_TYPE_TLB: 6488f0b8056SMahesh Salgaonkar printk("%s Error type: %s [%s]\n", level, 6498f0b8056SMahesh Salgaonkar VAL_TO_STRING(mc_err_types, error_type), 6508f0b8056SMahesh Salgaonkar VAL_TO_STRING(mc_soft_types, err_sub_type)); 6518f0b8056SMahesh Salgaonkar break; 6528f0b8056SMahesh Salgaonkar default: 6538f0b8056SMahesh Salgaonkar printk("%s Error type: %s\n", level, 6548f0b8056SMahesh Salgaonkar VAL_TO_STRING(mc_err_types, error_type)); 6558f0b8056SMahesh Salgaonkar break; 6568f0b8056SMahesh Salgaonkar } 6578f0b8056SMahesh Salgaonkar 6588f0b8056SMahesh Salgaonkar addr = rtas_mc_get_effective_addr(mce_log); 6598f0b8056SMahesh Salgaonkar if (addr) 6608f0b8056SMahesh Salgaonkar printk("%s Effective address: %016llx\n", level, addr); 6618f0b8056SMahesh Salgaonkar } 6628f0b8056SMahesh Salgaonkar 663a43c1590SMahesh Salgaonkar static int mce_handle_error(struct rtas_error_log *errp) 664a43c1590SMahesh Salgaonkar { 665a43c1590SMahesh Salgaonkar struct pseries_errorlog *pseries_log; 666a43c1590SMahesh Salgaonkar struct pseries_mc_errorlog *mce_log; 667a43c1590SMahesh Salgaonkar int disposition = rtas_error_disposition(errp); 668a43c1590SMahesh Salgaonkar u8 error_type; 669a43c1590SMahesh Salgaonkar 670a43c1590SMahesh Salgaonkar if (!rtas_error_extended(errp)) 671a43c1590SMahesh Salgaonkar goto out; 672a43c1590SMahesh Salgaonkar 673a43c1590SMahesh Salgaonkar pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); 674a43c1590SMahesh Salgaonkar if (pseries_log == NULL) 675a43c1590SMahesh Salgaonkar goto out; 676a43c1590SMahesh Salgaonkar 677a43c1590SMahesh Salgaonkar mce_log = (struct pseries_mc_errorlog *)pseries_log->data; 678a43c1590SMahesh Salgaonkar error_type = mce_log->error_type; 679a43c1590SMahesh Salgaonkar 680a43c1590SMahesh Salgaonkar #ifdef CONFIG_PPC_BOOK3S_64 681a43c1590SMahesh Salgaonkar if (disposition == RTAS_DISP_NOT_RECOVERED) { 682a43c1590SMahesh Salgaonkar switch (error_type) { 683a43c1590SMahesh Salgaonkar case MC_ERROR_TYPE_SLB: 684a43c1590SMahesh Salgaonkar case MC_ERROR_TYPE_ERAT: 685c6d15258SMahesh Salgaonkar /* 686c6d15258SMahesh Salgaonkar * Store the old slb content in paca before flushing. 687c6d15258SMahesh Salgaonkar * Print this when we go to virtual mode. 688c6d15258SMahesh Salgaonkar * There are chances that we may hit MCE again if there 689c6d15258SMahesh Salgaonkar * is a parity error on the SLB entry we trying to read 690c6d15258SMahesh Salgaonkar * for saving. Hence limit the slb saving to single 691c6d15258SMahesh Salgaonkar * level of recursion. 692c6d15258SMahesh Salgaonkar */ 693c6d15258SMahesh Salgaonkar if (local_paca->in_mce == 1) 694c6d15258SMahesh Salgaonkar slb_save_contents(local_paca->mce_faulty_slbs); 695a43c1590SMahesh Salgaonkar flush_and_reload_slb(); 696a43c1590SMahesh Salgaonkar disposition = RTAS_DISP_FULLY_RECOVERED; 697a43c1590SMahesh Salgaonkar rtas_set_disposition_recovered(errp); 698a43c1590SMahesh Salgaonkar break; 699a43c1590SMahesh Salgaonkar default: 700a43c1590SMahesh Salgaonkar break; 701a43c1590SMahesh Salgaonkar } 702a43c1590SMahesh Salgaonkar } 703a43c1590SMahesh Salgaonkar #endif 704a43c1590SMahesh Salgaonkar 705a43c1590SMahesh Salgaonkar out: 706a43c1590SMahesh Salgaonkar return disposition; 707a43c1590SMahesh Salgaonkar } 708a43c1590SMahesh Salgaonkar 709d9953105SMichael Ellerman /* 71094675cceSMahesh Salgaonkar * Process MCE rtas errlog event. 71194675cceSMahesh Salgaonkar */ 71294675cceSMahesh Salgaonkar static void mce_process_errlog_event(struct irq_work *work) 71394675cceSMahesh Salgaonkar { 71494675cceSMahesh Salgaonkar struct rtas_error_log *err; 71594675cceSMahesh Salgaonkar 71694675cceSMahesh Salgaonkar err = fwnmi_get_errlog(); 71794675cceSMahesh Salgaonkar log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); 71894675cceSMahesh Salgaonkar } 71994675cceSMahesh Salgaonkar 72094675cceSMahesh Salgaonkar /* 721d9953105SMichael Ellerman * See if we can recover from a machine check exception. 722d9953105SMichael Ellerman * This is only called on power4 (or above) and only via 723d9953105SMichael Ellerman * the Firmware Non-Maskable Interrupts (fwnmi) handler 724d9953105SMichael Ellerman * which provides the error analysis for us. 725d9953105SMichael Ellerman * 726d9953105SMichael Ellerman * Return 1 if corrected (or delivered a signal). 727d9953105SMichael Ellerman * Return 0 if there is nothing we can do. 728d9953105SMichael Ellerman */ 729d9953105SMichael Ellerman static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) 730d9953105SMichael Ellerman { 731d47d1d8aSAnton Blanchard int recovered = 0; 732a08a53eaSGreg Kurz int disposition = rtas_error_disposition(err); 733d9953105SMichael Ellerman 7348f0b8056SMahesh Salgaonkar pseries_print_mce_info(regs, err); 7358f0b8056SMahesh Salgaonkar 736d47d1d8aSAnton Blanchard if (!(regs->msr & MSR_RI)) { 737d47d1d8aSAnton Blanchard /* If MSR_RI isn't set, we cannot recover */ 7388f0b8056SMahesh Salgaonkar pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); 739d47d1d8aSAnton Blanchard recovered = 0; 740d47d1d8aSAnton Blanchard 741a08a53eaSGreg Kurz } else if (disposition == RTAS_DISP_FULLY_RECOVERED) { 742d9953105SMichael Ellerman /* Platform corrected itself */ 743d47d1d8aSAnton Blanchard recovered = 1; 744d47d1d8aSAnton Blanchard 745a08a53eaSGreg Kurz } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { 746d47d1d8aSAnton Blanchard /* Platform corrected itself but could be degraded */ 747d47d1d8aSAnton Blanchard printk(KERN_ERR "MCE: limited recovery, system may " 748d47d1d8aSAnton Blanchard "be degraded\n"); 749d47d1d8aSAnton Blanchard recovered = 1; 750d47d1d8aSAnton Blanchard 751d47d1d8aSAnton Blanchard } else if (user_mode(regs) && !is_global_init(current) && 752a08a53eaSGreg Kurz rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) { 753d47d1d8aSAnton Blanchard 754d47d1d8aSAnton Blanchard /* 755d47d1d8aSAnton Blanchard * If we received a synchronous error when in userspace 756d47d1d8aSAnton Blanchard * kill the task. Firmware may report details of the fail 757d47d1d8aSAnton Blanchard * asynchronously, so we can't rely on the target and type 758d47d1d8aSAnton Blanchard * fields being valid here. 759d47d1d8aSAnton Blanchard */ 760d47d1d8aSAnton Blanchard printk(KERN_ERR "MCE: uncorrectable error, killing task " 761d47d1d8aSAnton Blanchard "%s:%d\n", current->comm, current->pid); 762d47d1d8aSAnton Blanchard 763d47d1d8aSAnton Blanchard _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 764d47d1d8aSAnton Blanchard recovered = 1; 765d9953105SMichael Ellerman } 766d9953105SMichael Ellerman 76794675cceSMahesh Salgaonkar /* Queue irq work to log this rtas event later. */ 76894675cceSMahesh Salgaonkar irq_work_queue(&mce_errlog_process_work); 769d9953105SMichael Ellerman 770d47d1d8aSAnton Blanchard return recovered; 771d9953105SMichael Ellerman } 772d9953105SMichael Ellerman 773d9953105SMichael Ellerman /* 774d9953105SMichael Ellerman * Handle a machine check. 775d9953105SMichael Ellerman * 776d9953105SMichael Ellerman * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) 777d9953105SMichael Ellerman * should be present. If so the handler which called us tells us if the 778d9953105SMichael Ellerman * error was recovered (never true if RI=0). 779d9953105SMichael Ellerman * 780d9953105SMichael Ellerman * On hardware prior to Power 4 these exceptions were asynchronous which 781d9953105SMichael Ellerman * means we can't tell exactly where it occurred and so we can't recover. 782d9953105SMichael Ellerman */ 783d9953105SMichael Ellerman int pSeries_machine_check_exception(struct pt_regs *regs) 784d9953105SMichael Ellerman { 785d9953105SMichael Ellerman struct rtas_error_log *errp; 786d9953105SMichael Ellerman 787d9953105SMichael Ellerman if (fwnmi_active) { 788d9953105SMichael Ellerman fwnmi_release_errinfo(); 789a43c1590SMahesh Salgaonkar errp = fwnmi_get_errlog(); 790d9953105SMichael Ellerman if (errp && recover_mce(regs, errp)) 791d9953105SMichael Ellerman return 1; 792d9953105SMichael Ellerman } 793d9953105SMichael Ellerman 794d9953105SMichael Ellerman return 0; 795d9953105SMichael Ellerman } 796a43c1590SMahesh Salgaonkar 797a43c1590SMahesh Salgaonkar long pseries_machine_check_realmode(struct pt_regs *regs) 798a43c1590SMahesh Salgaonkar { 799a43c1590SMahesh Salgaonkar struct rtas_error_log *errp; 800a43c1590SMahesh Salgaonkar int disposition; 801a43c1590SMahesh Salgaonkar 802a43c1590SMahesh Salgaonkar if (fwnmi_active) { 803a43c1590SMahesh Salgaonkar errp = fwnmi_get_errinfo(regs); 804a43c1590SMahesh Salgaonkar /* 805a43c1590SMahesh Salgaonkar * Call to fwnmi_release_errinfo() in real mode causes kernel 806a43c1590SMahesh Salgaonkar * to panic. Hence we will call it as soon as we go into 807a43c1590SMahesh Salgaonkar * virtual mode. 808a43c1590SMahesh Salgaonkar */ 809a43c1590SMahesh Salgaonkar disposition = mce_handle_error(errp); 810a43c1590SMahesh Salgaonkar if (disposition == RTAS_DISP_FULLY_RECOVERED) 811a43c1590SMahesh Salgaonkar return 1; 812a43c1590SMahesh Salgaonkar } 813a43c1590SMahesh Salgaonkar 814a43c1590SMahesh Salgaonkar return 0; 815a43c1590SMahesh Salgaonkar } 816