11a59d1b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 2d9953105SMichael Ellerman /* 3d9953105SMichael Ellerman * Copyright (C) 2001 Dave Engebretsen IBM Corporation 4d9953105SMichael Ellerman */ 5d9953105SMichael Ellerman 6d9953105SMichael Ellerman #include <linux/sched.h> 7d9953105SMichael Ellerman #include <linux/interrupt.h> 8d9953105SMichael Ellerman #include <linux/irq.h> 990128997SAnton Blanchard #include <linux/of.h> 1055fc0c56SAnton Blanchard #include <linux/fs.h> 1155fc0c56SAnton Blanchard #include <linux/reboot.h> 1294675cceSMahesh Salgaonkar #include <linux/irq_work.h> 13d9953105SMichael Ellerman 14d9953105SMichael Ellerman #include <asm/machdep.h> 15d9953105SMichael Ellerman #include <asm/rtas.h> 168c4f1f29SMichael Ellerman #include <asm/firmware.h> 17a43c1590SMahesh Salgaonkar #include <asm/mce.h> 18d9953105SMichael Ellerman 19577830b0SMichael Ellerman #include "pseries.h" 20c902be71SArnd Bergmann 21d9953105SMichael Ellerman static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; 22d9953105SMichael Ellerman static DEFINE_SPINLOCK(ras_log_buf_lock); 23d9953105SMichael Ellerman 24d9953105SMichael Ellerman static int ras_check_exception_token; 25d9953105SMichael Ellerman 2694675cceSMahesh Salgaonkar static void mce_process_errlog_event(struct irq_work *work); 2794675cceSMahesh Salgaonkar static struct irq_work mce_errlog_process_work = { 2894675cceSMahesh Salgaonkar .func = mce_process_errlog_event, 2994675cceSMahesh Salgaonkar }; 3094675cceSMahesh Salgaonkar 31d9953105SMichael Ellerman #define EPOW_SENSOR_TOKEN 9 32d9953105SMichael Ellerman #define EPOW_SENSOR_INDEX 0 33d9953105SMichael Ellerman 34b4af279aSVipin K Parashar /* EPOW events counter variable */ 35b4af279aSVipin K Parashar static int num_epow_events; 36b4af279aSVipin K Parashar 37b7d9eb39SJohn Allen static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); 387d12e780SDavid Howells static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); 397d12e780SDavid Howells static irqreturn_t ras_error_interrupt(int irq, void *dev_id); 40d9953105SMichael Ellerman 4104fce21cSMahesh Salgaonkar /* RTAS pseries MCE errorlog section. */ 4204fce21cSMahesh Salgaonkar struct pseries_mc_errorlog { 4304fce21cSMahesh Salgaonkar __be32 fru_id; 4404fce21cSMahesh Salgaonkar __be32 proc_id; 4504fce21cSMahesh Salgaonkar u8 error_type; 4604fce21cSMahesh Salgaonkar /* 4704fce21cSMahesh Salgaonkar * sub_err_type (1 byte). Bit fields depends on error_type 4804fce21cSMahesh Salgaonkar * 4904fce21cSMahesh Salgaonkar * MSB0 5004fce21cSMahesh Salgaonkar * | 5104fce21cSMahesh Salgaonkar * V 5204fce21cSMahesh Salgaonkar * 01234567 5304fce21cSMahesh Salgaonkar * XXXXXXXX 5404fce21cSMahesh Salgaonkar * 5504fce21cSMahesh Salgaonkar * For error_type == MC_ERROR_TYPE_UE 5604fce21cSMahesh Salgaonkar * XXXXXXXX 5704fce21cSMahesh Salgaonkar * X 1: Permanent or Transient UE. 5804fce21cSMahesh Salgaonkar * X 1: Effective address provided. 5904fce21cSMahesh Salgaonkar * X 1: Logical address provided. 6004fce21cSMahesh Salgaonkar * XX 2: Reserved. 6104fce21cSMahesh Salgaonkar * XXX 3: Type of UE error. 6204fce21cSMahesh Salgaonkar * 63*0f54bddeSGanesh Goudar * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB 6404fce21cSMahesh Salgaonkar * XXXXXXXX 6504fce21cSMahesh Salgaonkar * X 1: Effective address provided. 6604fce21cSMahesh Salgaonkar * XXXXX 5: Reserved. 6704fce21cSMahesh Salgaonkar * XX 2: Type of SLB/ERAT/TLB error. 68*0f54bddeSGanesh Goudar * 69*0f54bddeSGanesh Goudar * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS 70*0f54bddeSGanesh Goudar * XXXXXXXX 71*0f54bddeSGanesh Goudar * X 1: Error causing address provided. 72*0f54bddeSGanesh Goudar * XXX 3: Type of error. 73*0f54bddeSGanesh Goudar * XXXX 4: Reserved. 7404fce21cSMahesh Salgaonkar */ 7504fce21cSMahesh Salgaonkar u8 sub_err_type; 7604fce21cSMahesh Salgaonkar u8 reserved_1[6]; 7704fce21cSMahesh Salgaonkar __be64 effective_address; 7804fce21cSMahesh Salgaonkar __be64 logical_address; 7904fce21cSMahesh Salgaonkar } __packed; 8004fce21cSMahesh Salgaonkar 8104fce21cSMahesh Salgaonkar /* RTAS pseries MCE error types */ 8204fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_UE 0x00 8304fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_SLB 0x01 8404fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_ERAT 0x02 859ca766f9SNicholas Piggin #define MC_ERROR_TYPE_UNKNOWN 0x03 8604fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_TLB 0x04 8704fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_D_CACHE 0x05 8804fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_I_CACHE 0x07 89*0f54bddeSGanesh Goudar #define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 9004fce21cSMahesh Salgaonkar 9104fce21cSMahesh Salgaonkar /* RTAS pseries MCE error sub types */ 9204fce21cSMahesh Salgaonkar #define MC_ERROR_UE_INDETERMINATE 0 9304fce21cSMahesh Salgaonkar #define MC_ERROR_UE_IFETCH 1 9404fce21cSMahesh Salgaonkar #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 9504fce21cSMahesh Salgaonkar #define MC_ERROR_UE_LOAD_STORE 3 9604fce21cSMahesh Salgaonkar #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 9704fce21cSMahesh Salgaonkar 989ca766f9SNicholas Piggin #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 999ca766f9SNicholas Piggin #define UE_LOGICAL_ADDR_PROVIDED 0x20 100*0f54bddeSGanesh Goudar #define MC_EFFECTIVE_ADDR_PROVIDED 0x80 1019ca766f9SNicholas Piggin 10204fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_PARITY 0 10304fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_MULTIHIT 1 10404fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_INDETERMINATE 2 10504fce21cSMahesh Salgaonkar 10604fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_PARITY 1 10704fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_MULTIHIT 2 10804fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_INDETERMINATE 3 10904fce21cSMahesh Salgaonkar 11004fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_PARITY 1 11104fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_MULTIHIT 2 11204fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_INDETERMINATE 3 11304fce21cSMahesh Salgaonkar 114*0f54bddeSGanesh Goudar #define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 115*0f54bddeSGanesh Goudar #define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 116*0f54bddeSGanesh Goudar 11704fce21cSMahesh Salgaonkar static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) 11804fce21cSMahesh Salgaonkar { 11904fce21cSMahesh Salgaonkar switch (mlog->error_type) { 12004fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_UE: 12104fce21cSMahesh Salgaonkar return (mlog->sub_err_type & 0x07); 12204fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_SLB: 12304fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_ERAT: 12404fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_TLB: 12504fce21cSMahesh Salgaonkar return (mlog->sub_err_type & 0x03); 126*0f54bddeSGanesh Goudar case MC_ERROR_TYPE_CTRL_MEM_ACCESS: 127*0f54bddeSGanesh Goudar return (mlog->sub_err_type & 0x70) >> 4; 12804fce21cSMahesh Salgaonkar default: 12904fce21cSMahesh Salgaonkar return 0; 13004fce21cSMahesh Salgaonkar } 13104fce21cSMahesh Salgaonkar } 13204fce21cSMahesh Salgaonkar 133d9953105SMichael Ellerman /* 134c9dccf1dSSam Bobroff * Enable the hotplug interrupt late because processing them may touch other 135c9dccf1dSSam Bobroff * devices or systems (e.g. hugepages) that have not been initialized at the 136c9dccf1dSSam Bobroff * subsys stage. 137c9dccf1dSSam Bobroff */ 13890db8bf2SCédric Le Goater static int __init init_ras_hotplug_IRQ(void) 139c9dccf1dSSam Bobroff { 140c9dccf1dSSam Bobroff struct device_node *np; 141c9dccf1dSSam Bobroff 142c9dccf1dSSam Bobroff /* Hotplug Events */ 143c9dccf1dSSam Bobroff np = of_find_node_by_path("/event-sources/hot-plug-events"); 144c9dccf1dSSam Bobroff if (np != NULL) { 145c9dccf1dSSam Bobroff if (dlpar_workqueue_init() == 0) 146c9dccf1dSSam Bobroff request_event_sources_irqs(np, ras_hotplug_interrupt, 147c9dccf1dSSam Bobroff "RAS_HOTPLUG"); 148c9dccf1dSSam Bobroff of_node_put(np); 149c9dccf1dSSam Bobroff } 150c9dccf1dSSam Bobroff 151c9dccf1dSSam Bobroff return 0; 152c9dccf1dSSam Bobroff } 153c9dccf1dSSam Bobroff machine_late_initcall(pseries, init_ras_hotplug_IRQ); 154c9dccf1dSSam Bobroff 155c9dccf1dSSam Bobroff /* 156d9953105SMichael Ellerman * Initialize handlers for the set of interrupts caused by hardware errors 157d9953105SMichael Ellerman * and power system events. 158d9953105SMichael Ellerman */ 159d9953105SMichael Ellerman static int __init init_ras_IRQ(void) 160d9953105SMichael Ellerman { 161d9953105SMichael Ellerman struct device_node *np; 162d9953105SMichael Ellerman 163d9953105SMichael Ellerman ras_check_exception_token = rtas_token("check-exception"); 164d9953105SMichael Ellerman 165d9953105SMichael Ellerman /* Internal Errors */ 166d9953105SMichael Ellerman np = of_find_node_by_path("/event-sources/internal-errors"); 167d9953105SMichael Ellerman if (np != NULL) { 16832c96f77SMark Nelson request_event_sources_irqs(np, ras_error_interrupt, 16932c96f77SMark Nelson "RAS_ERROR"); 170d9953105SMichael Ellerman of_node_put(np); 171d9953105SMichael Ellerman } 172d9953105SMichael Ellerman 173d9953105SMichael Ellerman /* EPOW Events */ 174d9953105SMichael Ellerman np = of_find_node_by_path("/event-sources/epow-events"); 175d9953105SMichael Ellerman if (np != NULL) { 17632c96f77SMark Nelson request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW"); 177d9953105SMichael Ellerman of_node_put(np); 178d9953105SMichael Ellerman } 179d9953105SMichael Ellerman 18069ed3324SAnton Blanchard return 0; 181d9953105SMichael Ellerman } 1828e83e905SMichael Ellerman machine_subsys_initcall(pseries, init_ras_IRQ); 183d9953105SMichael Ellerman 18455fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_NORMAL 1 18555fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_ON_UPS 2 18655fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 18755fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 18855fc0c56SAnton Blanchard 18955fc0c56SAnton Blanchard static void handle_system_shutdown(char event_modifier) 19055fc0c56SAnton Blanchard { 19155fc0c56SAnton Blanchard switch (event_modifier) { 19255fc0c56SAnton Blanchard case EPOW_SHUTDOWN_NORMAL: 193b4af279aSVipin K Parashar pr_emerg("Power off requested\n"); 1941b7e0cbeSliguang orderly_poweroff(true); 19555fc0c56SAnton Blanchard break; 19655fc0c56SAnton Blanchard 19755fc0c56SAnton Blanchard case EPOW_SHUTDOWN_ON_UPS: 198b4af279aSVipin K Parashar pr_emerg("Loss of system power detected. System is running on" 199b4af279aSVipin K Parashar " UPS/battery. Check RTAS error log for details\n"); 20055fc0c56SAnton Blanchard break; 20155fc0c56SAnton Blanchard 20255fc0c56SAnton Blanchard case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: 203b4af279aSVipin K Parashar pr_emerg("Loss of system critical functions detected. Check" 204b4af279aSVipin K Parashar " RTAS error log for details\n"); 2051b7e0cbeSliguang orderly_poweroff(true); 20655fc0c56SAnton Blanchard break; 20755fc0c56SAnton Blanchard 20855fc0c56SAnton Blanchard case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: 209b4af279aSVipin K Parashar pr_emerg("High ambient temperature detected. Check RTAS" 210b4af279aSVipin K Parashar " error log for details\n"); 2111b7e0cbeSliguang orderly_poweroff(true); 21255fc0c56SAnton Blanchard break; 21355fc0c56SAnton Blanchard 21455fc0c56SAnton Blanchard default: 215b4af279aSVipin K Parashar pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", 21655fc0c56SAnton Blanchard event_modifier); 21755fc0c56SAnton Blanchard } 21855fc0c56SAnton Blanchard } 21955fc0c56SAnton Blanchard 22055fc0c56SAnton Blanchard struct epow_errorlog { 22155fc0c56SAnton Blanchard unsigned char sensor_value; 22255fc0c56SAnton Blanchard unsigned char event_modifier; 22355fc0c56SAnton Blanchard unsigned char extended_modifier; 22455fc0c56SAnton Blanchard unsigned char reserved; 22555fc0c56SAnton Blanchard unsigned char platform_reason; 22655fc0c56SAnton Blanchard }; 22755fc0c56SAnton Blanchard 22855fc0c56SAnton Blanchard #define EPOW_RESET 0 22955fc0c56SAnton Blanchard #define EPOW_WARN_COOLING 1 23055fc0c56SAnton Blanchard #define EPOW_WARN_POWER 2 23155fc0c56SAnton Blanchard #define EPOW_SYSTEM_SHUTDOWN 3 23255fc0c56SAnton Blanchard #define EPOW_SYSTEM_HALT 4 23355fc0c56SAnton Blanchard #define EPOW_MAIN_ENCLOSURE 5 23455fc0c56SAnton Blanchard #define EPOW_POWER_OFF 7 23555fc0c56SAnton Blanchard 236e51df2c1SAnton Blanchard static void rtas_parse_epow_errlog(struct rtas_error_log *log) 23755fc0c56SAnton Blanchard { 23855fc0c56SAnton Blanchard struct pseries_errorlog *pseries_log; 23955fc0c56SAnton Blanchard struct epow_errorlog *epow_log; 24055fc0c56SAnton Blanchard char action_code; 24155fc0c56SAnton Blanchard char modifier; 24255fc0c56SAnton Blanchard 24355fc0c56SAnton Blanchard pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); 24455fc0c56SAnton Blanchard if (pseries_log == NULL) 24555fc0c56SAnton Blanchard return; 24655fc0c56SAnton Blanchard 24755fc0c56SAnton Blanchard epow_log = (struct epow_errorlog *)pseries_log->data; 24855fc0c56SAnton Blanchard action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ 24955fc0c56SAnton Blanchard modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ 25055fc0c56SAnton Blanchard 25155fc0c56SAnton Blanchard switch (action_code) { 25255fc0c56SAnton Blanchard case EPOW_RESET: 253b4af279aSVipin K Parashar if (num_epow_events) { 254b4af279aSVipin K Parashar pr_info("Non critical power/cooling issue cleared\n"); 255b4af279aSVipin K Parashar num_epow_events--; 256b4af279aSVipin K Parashar } 25755fc0c56SAnton Blanchard break; 25855fc0c56SAnton Blanchard 25955fc0c56SAnton Blanchard case EPOW_WARN_COOLING: 260b4af279aSVipin K Parashar pr_info("Non-critical cooling issue detected. Check RTAS error" 261b4af279aSVipin K Parashar " log for details\n"); 26255fc0c56SAnton Blanchard break; 26355fc0c56SAnton Blanchard 26455fc0c56SAnton Blanchard case EPOW_WARN_POWER: 265b4af279aSVipin K Parashar pr_info("Non-critical power issue detected. Check RTAS error" 266b4af279aSVipin K Parashar " log for details\n"); 26755fc0c56SAnton Blanchard break; 26855fc0c56SAnton Blanchard 26955fc0c56SAnton Blanchard case EPOW_SYSTEM_SHUTDOWN: 270d273fa91SYueHaibing handle_system_shutdown(modifier); 27155fc0c56SAnton Blanchard break; 27255fc0c56SAnton Blanchard 27355fc0c56SAnton Blanchard case EPOW_SYSTEM_HALT: 274b4af279aSVipin K Parashar pr_emerg("Critical power/cooling issue detected. Check RTAS" 275b4af279aSVipin K Parashar " error log for details. Powering off.\n"); 2761b7e0cbeSliguang orderly_poweroff(true); 27755fc0c56SAnton Blanchard break; 27855fc0c56SAnton Blanchard 27955fc0c56SAnton Blanchard case EPOW_MAIN_ENCLOSURE: 28055fc0c56SAnton Blanchard case EPOW_POWER_OFF: 281b4af279aSVipin K Parashar pr_emerg("System about to lose power. Check RTAS error log " 282b4af279aSVipin K Parashar " for details. Powering off immediately.\n"); 28355fc0c56SAnton Blanchard emergency_sync(); 28455fc0c56SAnton Blanchard kernel_power_off(); 28555fc0c56SAnton Blanchard break; 28655fc0c56SAnton Blanchard 28755fc0c56SAnton Blanchard default: 288b4af279aSVipin K Parashar pr_err("Unknown power/cooling event (action code = %d)\n", 28955fc0c56SAnton Blanchard action_code); 29055fc0c56SAnton Blanchard } 291b4af279aSVipin K Parashar 292b4af279aSVipin K Parashar /* Increment epow events counter variable */ 293b4af279aSVipin K Parashar if (action_code != EPOW_RESET) 294b4af279aSVipin K Parashar num_epow_events++; 29555fc0c56SAnton Blanchard } 29655fc0c56SAnton Blanchard 297b7d9eb39SJohn Allen static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) 298b7d9eb39SJohn Allen { 299b7d9eb39SJohn Allen struct pseries_errorlog *pseries_log; 300b7d9eb39SJohn Allen struct pseries_hp_errorlog *hp_elog; 301b7d9eb39SJohn Allen 302b7d9eb39SJohn Allen spin_lock(&ras_log_buf_lock); 303b7d9eb39SJohn Allen 304b7d9eb39SJohn Allen rtas_call(ras_check_exception_token, 6, 1, NULL, 305b7d9eb39SJohn Allen RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), 306b7d9eb39SJohn Allen RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), 307b7d9eb39SJohn Allen rtas_get_error_log_max()); 308b7d9eb39SJohn Allen 309b7d9eb39SJohn Allen pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, 310b7d9eb39SJohn Allen PSERIES_ELOG_SECT_ID_HOTPLUG); 311b7d9eb39SJohn Allen hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; 312b7d9eb39SJohn Allen 313b7d9eb39SJohn Allen /* 314b7d9eb39SJohn Allen * Since PCI hotplug is not currently supported on pseries, put PCI 315b7d9eb39SJohn Allen * hotplug events on the ras_log_buf to be handled by rtas_errd. 316b7d9eb39SJohn Allen */ 317b7d9eb39SJohn Allen if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || 3184c5d87dbSOliver O'Halloran hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || 3194c5d87dbSOliver O'Halloran hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) 320fd12527aSNathan Fontenot queue_hotplug_event(hp_elog); 321b7d9eb39SJohn Allen else 322b7d9eb39SJohn Allen log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 323b7d9eb39SJohn Allen 324b7d9eb39SJohn Allen spin_unlock(&ras_log_buf_lock); 325b7d9eb39SJohn Allen return IRQ_HANDLED; 326b7d9eb39SJohn Allen } 327b7d9eb39SJohn Allen 32855fc0c56SAnton Blanchard /* Handle environmental and power warning (EPOW) interrupts. */ 3297d12e780SDavid Howells static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) 330d9953105SMichael Ellerman { 33155fc0c56SAnton Blanchard int state; 332d9953105SMichael Ellerman int critical; 333d9953105SMichael Ellerman 334aa23ea0cSCédric Le Goater rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); 335d9953105SMichael Ellerman 336d9953105SMichael Ellerman if (state > 3) 337d9953105SMichael Ellerman critical = 1; /* Time Critical */ 338d9953105SMichael Ellerman else 339d9953105SMichael Ellerman critical = 0; 340d9953105SMichael Ellerman 341d9953105SMichael Ellerman spin_lock(&ras_log_buf_lock); 342d9953105SMichael Ellerman 343aa23ea0cSCédric Le Goater rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, 344aa23ea0cSCédric Le Goater virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), 345d9953105SMichael Ellerman rtas_get_error_log_max()); 346d9953105SMichael Ellerman 347d9953105SMichael Ellerman log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 348d9953105SMichael Ellerman 34955fc0c56SAnton Blanchard rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); 35055fc0c56SAnton Blanchard 351d9953105SMichael Ellerman spin_unlock(&ras_log_buf_lock); 352d9953105SMichael Ellerman return IRQ_HANDLED; 353d9953105SMichael Ellerman } 354d9953105SMichael Ellerman 355d9953105SMichael Ellerman /* 356d9953105SMichael Ellerman * Handle hardware error interrupts. 357d9953105SMichael Ellerman * 358d9953105SMichael Ellerman * RTAS check-exception is called to collect data on the exception. If 359d9953105SMichael Ellerman * the error is deemed recoverable, we log a warning and return. 360d9953105SMichael Ellerman * For nonrecoverable errors, an error is logged and we stop all processing 361d9953105SMichael Ellerman * as quickly as possible in order to prevent propagation of the failure. 362d9953105SMichael Ellerman */ 3637d12e780SDavid Howells static irqreturn_t ras_error_interrupt(int irq, void *dev_id) 364d9953105SMichael Ellerman { 365d9953105SMichael Ellerman struct rtas_error_log *rtas_elog; 366cc8b5263SAnton Blanchard int status; 367d9953105SMichael Ellerman int fatal; 368d9953105SMichael Ellerman 369d9953105SMichael Ellerman spin_lock(&ras_log_buf_lock); 370d9953105SMichael Ellerman 371d9953105SMichael Ellerman status = rtas_call(ras_check_exception_token, 6, 1, NULL, 372b08e281bSMark Nelson RTAS_VECTOR_EXTERNAL_INTERRUPT, 373476eb491SGrant Likely virq_to_hw(irq), 374d9953105SMichael Ellerman RTAS_INTERNAL_ERROR, 1 /* Time Critical */, 375d9953105SMichael Ellerman __pa(&ras_log_buf), 376d9953105SMichael Ellerman rtas_get_error_log_max()); 377d9953105SMichael Ellerman 378d9953105SMichael Ellerman rtas_elog = (struct rtas_error_log *)ras_log_buf; 379d9953105SMichael Ellerman 380a08a53eaSGreg Kurz if (status == 0 && 381a08a53eaSGreg Kurz rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) 382d9953105SMichael Ellerman fatal = 1; 383d9953105SMichael Ellerman else 384d9953105SMichael Ellerman fatal = 0; 385d9953105SMichael Ellerman 386d9953105SMichael Ellerman /* format and print the extended information */ 387d9953105SMichael Ellerman log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); 388d9953105SMichael Ellerman 389d9953105SMichael Ellerman if (fatal) { 390b4af279aSVipin K Parashar pr_emerg("Fatal hardware error detected. Check RTAS error" 391b4af279aSVipin K Parashar " log for details. Powering off immediately\n"); 392cc8b5263SAnton Blanchard emergency_sync(); 393cc8b5263SAnton Blanchard kernel_power_off(); 394d9953105SMichael Ellerman } else { 395b4af279aSVipin K Parashar pr_err("Recoverable hardware error detected\n"); 396d9953105SMichael Ellerman } 397d9953105SMichael Ellerman 398d9953105SMichael Ellerman spin_unlock(&ras_log_buf_lock); 399d9953105SMichael Ellerman return IRQ_HANDLED; 400d9953105SMichael Ellerman } 401d9953105SMichael Ellerman 402d368514cSAnton Blanchard /* 403d368514cSAnton Blanchard * Some versions of FWNMI place the buffer inside the 4kB page starting at 404d368514cSAnton Blanchard * 0x7000. Other versions place it inside the rtas buffer. We check both. 405deb70f7aSNicholas Piggin * Minimum size of the buffer is 16 bytes. 406d368514cSAnton Blanchard */ 407d368514cSAnton Blanchard #define VALID_FWNMI_BUFFER(A) \ 408deb70f7aSNicholas Piggin ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ 409deb70f7aSNicholas Piggin (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) 410d368514cSAnton Blanchard 41194675cceSMahesh Salgaonkar static inline struct rtas_error_log *fwnmi_get_errlog(void) 41294675cceSMahesh Salgaonkar { 41394675cceSMahesh Salgaonkar return (struct rtas_error_log *)local_paca->mce_data_buf; 41494675cceSMahesh Salgaonkar } 41594675cceSMahesh Salgaonkar 416d7b14c5cSNicholas Piggin static __be64 *fwnmi_get_savep(struct pt_regs *regs) 417d7b14c5cSNicholas Piggin { 418d7b14c5cSNicholas Piggin unsigned long savep_ra; 419d7b14c5cSNicholas Piggin 420d7b14c5cSNicholas Piggin /* Mask top two bits */ 421d7b14c5cSNicholas Piggin savep_ra = regs->gpr[3] & ~(0x3UL << 62); 422d7b14c5cSNicholas Piggin if (!VALID_FWNMI_BUFFER(savep_ra)) { 423d7b14c5cSNicholas Piggin printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); 424d7b14c5cSNicholas Piggin return NULL; 425d7b14c5cSNicholas Piggin } 426d7b14c5cSNicholas Piggin 427d7b14c5cSNicholas Piggin return __va(savep_ra); 428d7b14c5cSNicholas Piggin } 429d7b14c5cSNicholas Piggin 430d368514cSAnton Blanchard /* 431d368514cSAnton Blanchard * Get the error information for errors coming through the 432d9953105SMichael Ellerman * FWNMI vectors. The pt_regs' r3 will be updated to reflect 433d9953105SMichael Ellerman * the actual r3 if possible, and a ptr to the error log entry 434d9953105SMichael Ellerman * will be returned if found. 435d9953105SMichael Ellerman * 43694675cceSMahesh Salgaonkar * Use one buffer mce_data_buf per cpu to store RTAS error. 437d368514cSAnton Blanchard * 43894675cceSMahesh Salgaonkar * The mce_data_buf does not have any locks or protection around it, 439d9953105SMichael Ellerman * if a second machine check comes in, or a system reset is done 440d9953105SMichael Ellerman * before we have logged the error, then we will get corruption in the 441d9953105SMichael Ellerman * error log. This is preferable over holding off on calling 442d9953105SMichael Ellerman * ibm,nmi-interlock which would result in us checkstopping if a 443d9953105SMichael Ellerman * second machine check did come in. 444d9953105SMichael Ellerman */ 445d9953105SMichael Ellerman static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) 446d9953105SMichael Ellerman { 44794675cceSMahesh Salgaonkar struct rtas_error_log *h; 448d7b14c5cSNicholas Piggin __be64 *savep; 449d9953105SMichael Ellerman 450d7b14c5cSNicholas Piggin savep = fwnmi_get_savep(regs); 451d7b14c5cSNicholas Piggin if (!savep) 452d368514cSAnton Blanchard return NULL; 453d368514cSAnton Blanchard 454cd813e1cSMahesh Salgaonkar regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ 455d368514cSAnton Blanchard 456d368514cSAnton Blanchard h = (struct rtas_error_log *)&savep[1]; 45794675cceSMahesh Salgaonkar /* Use the per cpu buffer from paca to store rtas error log */ 45894675cceSMahesh Salgaonkar memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); 459a08a53eaSGreg Kurz if (!rtas_error_extended(h)) { 46094675cceSMahesh Salgaonkar memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); 461d368514cSAnton Blanchard } else { 462a08a53eaSGreg Kurz int len, error_log_length; 463d368514cSAnton Blanchard 464a08a53eaSGreg Kurz error_log_length = 8 + rtas_error_extended_log_length(h); 46574e96bf4SMahesh Salgaonkar len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); 46694675cceSMahesh Salgaonkar memcpy(local_paca->mce_data_buf, h, len); 467d368514cSAnton Blanchard } 468d368514cSAnton Blanchard 46994675cceSMahesh Salgaonkar return (struct rtas_error_log *)local_paca->mce_data_buf; 470d9953105SMichael Ellerman } 471d9953105SMichael Ellerman 472d9953105SMichael Ellerman /* Call this when done with the data returned by FWNMI_get_errinfo. 473d9953105SMichael Ellerman * It will release the saved data area for other CPUs in the 474d9953105SMichael Ellerman * partition to receive FWNMI errors. 475d9953105SMichael Ellerman */ 476d9953105SMichael Ellerman static void fwnmi_release_errinfo(void) 477d9953105SMichael Ellerman { 4782576f5f9SNicholas Piggin struct rtas_args rtas_args; 4792576f5f9SNicholas Piggin int ret; 4802576f5f9SNicholas Piggin 4812576f5f9SNicholas Piggin /* 4822576f5f9SNicholas Piggin * On pseries, the machine check stack is limited to under 4GB, so 4832576f5f9SNicholas Piggin * args can be on-stack. 4842576f5f9SNicholas Piggin */ 4852576f5f9SNicholas Piggin rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); 4862576f5f9SNicholas Piggin ret = be32_to_cpu(rtas_args.rets[0]); 487d9953105SMichael Ellerman if (ret != 0) 488d368514cSAnton Blanchard printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); 489d9953105SMichael Ellerman } 490d9953105SMichael Ellerman 491c902be71SArnd Bergmann int pSeries_system_reset_exception(struct pt_regs *regs) 492d9953105SMichael Ellerman { 493bded0706SNicholas Piggin #ifdef __LITTLE_ENDIAN__ 494bded0706SNicholas Piggin /* 495bded0706SNicholas Piggin * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try 496bded0706SNicholas Piggin * to detect the bad SRR1 pattern here. Flip the NIP back to correct 497bded0706SNicholas Piggin * endian for reporting purposes. Unfortunately the MSR can't be fixed, 498bded0706SNicholas Piggin * so clear it. It will be missing MSR_RI so we won't try to recover. 499bded0706SNicholas Piggin */ 500bded0706SNicholas Piggin if ((be64_to_cpu(regs->msr) & 501bded0706SNicholas Piggin (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| 502bded0706SNicholas Piggin MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { 50359dc5bfcSNicholas Piggin regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); 50459dc5bfcSNicholas Piggin regs_set_return_msr(regs, 0); 505bded0706SNicholas Piggin } 506bded0706SNicholas Piggin #endif 507bded0706SNicholas Piggin 508d9953105SMichael Ellerman if (fwnmi_active) { 509d7b14c5cSNicholas Piggin __be64 *savep; 510d7b14c5cSNicholas Piggin 511d7b14c5cSNicholas Piggin /* 512d7b14c5cSNicholas Piggin * Firmware (PowerVM and KVM) saves r3 to a save area like 513d7b14c5cSNicholas Piggin * machine check, which is not exactly what PAPR (2.9) 514d7b14c5cSNicholas Piggin * suggests but there is no way to detect otherwise, so this 515d7b14c5cSNicholas Piggin * is the interface now. 516d7b14c5cSNicholas Piggin * 517d7b14c5cSNicholas Piggin * System resets do not save any error log or require an 518d7b14c5cSNicholas Piggin * "ibm,nmi-interlock" rtas call to release. 519d7b14c5cSNicholas Piggin */ 520d7b14c5cSNicholas Piggin 521d7b14c5cSNicholas Piggin savep = fwnmi_get_savep(regs); 522d7b14c5cSNicholas Piggin if (savep) 523d7b14c5cSNicholas Piggin regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ 524d9953105SMichael Ellerman } 525102c05e8SNicholas Piggin 526102c05e8SNicholas Piggin if (smp_handle_nmi_ipi(regs)) 527102c05e8SNicholas Piggin return 1; 528102c05e8SNicholas Piggin 529c902be71SArnd Bergmann return 0; /* need to perform reset */ 530d9953105SMichael Ellerman } 531d9953105SMichael Ellerman 5324ff753feSGanesh Goudar static int mce_handle_err_realmode(int disposition, u8 error_type) 5334ff753feSGanesh Goudar { 5344ff753feSGanesh Goudar #ifdef CONFIG_PPC_BOOK3S_64 5354ff753feSGanesh Goudar if (disposition == RTAS_DISP_NOT_RECOVERED) { 5364ff753feSGanesh Goudar switch (error_type) { 5374ff753feSGanesh Goudar case MC_ERROR_TYPE_ERAT: 53882f70a05SNicholas Piggin flush_erat(); 53982f70a05SNicholas Piggin disposition = RTAS_DISP_FULLY_RECOVERED; 54082f70a05SNicholas Piggin break; 54182f70a05SNicholas Piggin case MC_ERROR_TYPE_SLB: 542387e220aSNicholas Piggin #ifdef CONFIG_PPC_64S_HASH_MMU 5434ff753feSGanesh Goudar /* 5444ff753feSGanesh Goudar * Store the old slb content in paca before flushing. 5454ff753feSGanesh Goudar * Print this when we go to virtual mode. 5464ff753feSGanesh Goudar * There are chances that we may hit MCE again if there 5474ff753feSGanesh Goudar * is a parity error on the SLB entry we trying to read 5484ff753feSGanesh Goudar * for saving. Hence limit the slb saving to single 5494ff753feSGanesh Goudar * level of recursion. 5504ff753feSGanesh Goudar */ 5514ff753feSGanesh Goudar if (local_paca->in_mce == 1) 5524ff753feSGanesh Goudar slb_save_contents(local_paca->mce_faulty_slbs); 5534ff753feSGanesh Goudar flush_and_reload_slb(); 5544ff753feSGanesh Goudar disposition = RTAS_DISP_FULLY_RECOVERED; 555387e220aSNicholas Piggin #endif 5564ff753feSGanesh Goudar break; 5574ff753feSGanesh Goudar default: 5584ff753feSGanesh Goudar break; 5594ff753feSGanesh Goudar } 5604ff753feSGanesh Goudar } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { 5614ff753feSGanesh Goudar /* Platform corrected itself but could be degraded */ 5624ff753feSGanesh Goudar pr_err("MCE: limited recovery, system may be degraded\n"); 5634ff753feSGanesh Goudar disposition = RTAS_DISP_FULLY_RECOVERED; 5644ff753feSGanesh Goudar } 5654ff753feSGanesh Goudar #endif 5664ff753feSGanesh Goudar return disposition; 5674ff753feSGanesh Goudar } 5688f0b8056SMahesh Salgaonkar 5694ff753feSGanesh Goudar static int mce_handle_err_virtmode(struct pt_regs *regs, 5704ff753feSGanesh Goudar struct rtas_error_log *errp, 5714ff753feSGanesh Goudar struct pseries_mc_errorlog *mce_log, 5724ff753feSGanesh Goudar int disposition) 5738f0b8056SMahesh Salgaonkar { 5749ca766f9SNicholas Piggin struct mce_error_info mce_err = { 0 }; 5759ca766f9SNicholas Piggin int initiator = rtas_error_initiator(errp); 5769ca766f9SNicholas Piggin int severity = rtas_error_severity(errp); 5774ff753feSGanesh Goudar unsigned long eaddr = 0, paddr = 0; 5788f0b8056SMahesh Salgaonkar u8 error_type, err_sub_type; 5798f0b8056SMahesh Salgaonkar 5804ff753feSGanesh Goudar if (!mce_log) 5814ff753feSGanesh Goudar goto out; 5824ff753feSGanesh Goudar 5834ff753feSGanesh Goudar error_type = mce_log->error_type; 5844ff753feSGanesh Goudar err_sub_type = rtas_mc_error_sub_type(mce_log); 5854ff753feSGanesh Goudar 5869ca766f9SNicholas Piggin if (initiator == RTAS_INITIATOR_UNKNOWN) 5879ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_UNKNOWN; 5889ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_CPU) 5899ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_CPU; 5909ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_PCI) 5919ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_PCI; 5929ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_ISA) 5939ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_ISA; 5949ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_MEMORY) 5959ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_MEMORY; 5969ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_POWERMGM) 5979ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_POWERMGM; 5989ca766f9SNicholas Piggin else 5999ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_UNKNOWN; 6008f0b8056SMahesh Salgaonkar 6019ca766f9SNicholas Piggin if (severity == RTAS_SEVERITY_NO_ERROR) 6029ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_NO_ERROR; 6039ca766f9SNicholas Piggin else if (severity == RTAS_SEVERITY_EVENT) 6049ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_WARNING; 6059ca766f9SNicholas Piggin else if (severity == RTAS_SEVERITY_WARNING) 6069ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_WARNING; 6079ca766f9SNicholas Piggin else if (severity == RTAS_SEVERITY_ERROR_SYNC) 6089ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_SEVERE; 6099ca766f9SNicholas Piggin else if (severity == RTAS_SEVERITY_ERROR) 6109ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_SEVERE; 6119ca766f9SNicholas Piggin else 6129ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_FATAL; 6138f0b8056SMahesh Salgaonkar 6149ca766f9SNicholas Piggin if (severity <= RTAS_SEVERITY_ERROR_SYNC) 6159ca766f9SNicholas Piggin mce_err.sync_error = true; 6169ca766f9SNicholas Piggin else 6179ca766f9SNicholas Piggin mce_err.sync_error = false; 6188f0b8056SMahesh Salgaonkar 6199ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; 6209ca766f9SNicholas Piggin mce_err.error_class = MCE_ECLASS_UNKNOWN; 621a43c1590SMahesh Salgaonkar 6224ff753feSGanesh Goudar switch (error_type) { 6239ca766f9SNicholas Piggin case MC_ERROR_TYPE_UE: 6249ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_UE; 625efbc4303SGanesh Goudar mce_common_process_ue(regs, &mce_err); 626efbc4303SGanesh Goudar if (mce_err.ignore_event) 627efbc4303SGanesh Goudar disposition = RTAS_DISP_FULLY_RECOVERED; 6289ca766f9SNicholas Piggin switch (err_sub_type) { 6299ca766f9SNicholas Piggin case MC_ERROR_UE_IFETCH: 6309ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; 6319ca766f9SNicholas Piggin break; 6329ca766f9SNicholas Piggin case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH: 6339ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; 6349ca766f9SNicholas Piggin break; 6359ca766f9SNicholas Piggin case MC_ERROR_UE_LOAD_STORE: 6369ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; 6379ca766f9SNicholas Piggin break; 6389ca766f9SNicholas Piggin case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE: 6399ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; 6409ca766f9SNicholas Piggin break; 6419ca766f9SNicholas Piggin case MC_ERROR_UE_INDETERMINATE: 6429ca766f9SNicholas Piggin default: 6439ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE; 6449ca766f9SNicholas Piggin break; 6459ca766f9SNicholas Piggin } 6469ca766f9SNicholas Piggin if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) 6479ca766f9SNicholas Piggin eaddr = be64_to_cpu(mce_log->effective_address); 6489ca766f9SNicholas Piggin 6499ca766f9SNicholas Piggin if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { 6509ca766f9SNicholas Piggin paddr = be64_to_cpu(mce_log->logical_address); 6519ca766f9SNicholas Piggin } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { 6529ca766f9SNicholas Piggin unsigned long pfn; 6539ca766f9SNicholas Piggin 6549ca766f9SNicholas Piggin pfn = addr_to_pfn(regs, eaddr); 6559ca766f9SNicholas Piggin if (pfn != ULONG_MAX) 6569ca766f9SNicholas Piggin paddr = pfn << PAGE_SHIFT; 6579ca766f9SNicholas Piggin } 6589ca766f9SNicholas Piggin 6599ca766f9SNicholas Piggin break; 6609ca766f9SNicholas Piggin case MC_ERROR_TYPE_SLB: 6619ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_SLB; 6629ca766f9SNicholas Piggin switch (err_sub_type) { 6639ca766f9SNicholas Piggin case MC_ERROR_SLB_PARITY: 6649ca766f9SNicholas Piggin mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY; 6659ca766f9SNicholas Piggin break; 6669ca766f9SNicholas Piggin case MC_ERROR_SLB_MULTIHIT: 6679ca766f9SNicholas Piggin mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; 6689ca766f9SNicholas Piggin break; 6699ca766f9SNicholas Piggin case MC_ERROR_SLB_INDETERMINATE: 6709ca766f9SNicholas Piggin default: 6719ca766f9SNicholas Piggin mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; 6729ca766f9SNicholas Piggin break; 6739ca766f9SNicholas Piggin } 674*0f54bddeSGanesh Goudar if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 6759ca766f9SNicholas Piggin eaddr = be64_to_cpu(mce_log->effective_address); 6769ca766f9SNicholas Piggin break; 6779ca766f9SNicholas Piggin case MC_ERROR_TYPE_ERAT: 6789ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_ERAT; 6799ca766f9SNicholas Piggin switch (err_sub_type) { 6809ca766f9SNicholas Piggin case MC_ERROR_ERAT_PARITY: 6819ca766f9SNicholas Piggin mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY; 6829ca766f9SNicholas Piggin break; 6839ca766f9SNicholas Piggin case MC_ERROR_ERAT_MULTIHIT: 6849ca766f9SNicholas Piggin mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; 6859ca766f9SNicholas Piggin break; 6869ca766f9SNicholas Piggin case MC_ERROR_ERAT_INDETERMINATE: 6879ca766f9SNicholas Piggin default: 6889ca766f9SNicholas Piggin mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; 6899ca766f9SNicholas Piggin break; 6909ca766f9SNicholas Piggin } 691*0f54bddeSGanesh Goudar if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 6929ca766f9SNicholas Piggin eaddr = be64_to_cpu(mce_log->effective_address); 6939ca766f9SNicholas Piggin break; 6949ca766f9SNicholas Piggin case MC_ERROR_TYPE_TLB: 6959ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_TLB; 6969ca766f9SNicholas Piggin switch (err_sub_type) { 6979ca766f9SNicholas Piggin case MC_ERROR_TLB_PARITY: 6989ca766f9SNicholas Piggin mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY; 6999ca766f9SNicholas Piggin break; 7009ca766f9SNicholas Piggin case MC_ERROR_TLB_MULTIHIT: 7019ca766f9SNicholas Piggin mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; 7029ca766f9SNicholas Piggin break; 7039ca766f9SNicholas Piggin case MC_ERROR_TLB_INDETERMINATE: 7049ca766f9SNicholas Piggin default: 7059ca766f9SNicholas Piggin mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; 7069ca766f9SNicholas Piggin break; 7079ca766f9SNicholas Piggin } 708*0f54bddeSGanesh Goudar if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 7099ca766f9SNicholas Piggin eaddr = be64_to_cpu(mce_log->effective_address); 7109ca766f9SNicholas Piggin break; 7119ca766f9SNicholas Piggin case MC_ERROR_TYPE_D_CACHE: 7129ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_DCACHE; 7139ca766f9SNicholas Piggin break; 7149ca766f9SNicholas Piggin case MC_ERROR_TYPE_I_CACHE: 715864ec4d4SGanesh Goudar mce_err.error_type = MCE_ERROR_TYPE_ICACHE; 7169ca766f9SNicholas Piggin break; 717*0f54bddeSGanesh Goudar case MC_ERROR_TYPE_CTRL_MEM_ACCESS: 718*0f54bddeSGanesh Goudar mce_err.error_type = MCE_ERROR_TYPE_RA; 719*0f54bddeSGanesh Goudar switch (err_sub_type) { 720*0f54bddeSGanesh Goudar case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: 721*0f54bddeSGanesh Goudar mce_err.u.ra_error_type = 722*0f54bddeSGanesh Goudar MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; 723*0f54bddeSGanesh Goudar break; 724*0f54bddeSGanesh Goudar case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: 725*0f54bddeSGanesh Goudar mce_err.u.ra_error_type = 726*0f54bddeSGanesh Goudar MCE_RA_ERROR_LOAD_STORE_FOREIGN; 727*0f54bddeSGanesh Goudar break; 728*0f54bddeSGanesh Goudar } 729*0f54bddeSGanesh Goudar if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 730*0f54bddeSGanesh Goudar eaddr = be64_to_cpu(mce_log->effective_address); 731*0f54bddeSGanesh Goudar break; 7329ca766f9SNicholas Piggin case MC_ERROR_TYPE_UNKNOWN: 7339ca766f9SNicholas Piggin default: 7349ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; 7359ca766f9SNicholas Piggin break; 7369ca766f9SNicholas Piggin } 737a43c1590SMahesh Salgaonkar out: 7384ff753feSGanesh Goudar save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, 7394ff753feSGanesh Goudar &mce_err, regs->nip, eaddr, paddr); 7404ff753feSGanesh Goudar return disposition; 7414ff753feSGanesh Goudar } 7424ff753feSGanesh Goudar 7434ff753feSGanesh Goudar static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) 7444ff753feSGanesh Goudar { 7454ff753feSGanesh Goudar struct pseries_errorlog *pseries_log; 7464ff753feSGanesh Goudar struct pseries_mc_errorlog *mce_log = NULL; 7474ff753feSGanesh Goudar int disposition = rtas_error_disposition(errp); 74874c3354bSNicholas Piggin unsigned long msr; 7494ff753feSGanesh Goudar u8 error_type; 7504ff753feSGanesh Goudar 7514ff753feSGanesh Goudar if (!rtas_error_extended(errp)) 7524ff753feSGanesh Goudar goto out; 7534ff753feSGanesh Goudar 7544ff753feSGanesh Goudar pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); 7554ff753feSGanesh Goudar if (!pseries_log) 7564ff753feSGanesh Goudar goto out; 7574ff753feSGanesh Goudar 7584ff753feSGanesh Goudar mce_log = (struct pseries_mc_errorlog *)pseries_log->data; 7594ff753feSGanesh Goudar error_type = mce_log->error_type; 7604ff753feSGanesh Goudar 7614ff753feSGanesh Goudar disposition = mce_handle_err_realmode(disposition, error_type); 7624ff753feSGanesh Goudar 763a95a0a16SGanesh Goudar /* 764a95a0a16SGanesh Goudar * Enable translation as we will be accessing per-cpu variables 765a95a0a16SGanesh Goudar * in save_mce_event() which may fall outside RMO region, also 766a95a0a16SGanesh Goudar * leave it enabled because subsequently we will be queuing work 767a95a0a16SGanesh Goudar * to workqueues where again per-cpu variables accessed, besides 768a95a0a16SGanesh Goudar * fwnmi_release_errinfo() crashes when called in realmode on 769a95a0a16SGanesh Goudar * pseries. 770a95a0a16SGanesh Goudar * Note: All the realmode handling like flushing SLB entries for 771a95a0a16SGanesh Goudar * SLB multihit is done by now. 772a95a0a16SGanesh Goudar */ 7734ff753feSGanesh Goudar out: 77474c3354bSNicholas Piggin msr = mfmsr(); 77574c3354bSNicholas Piggin mtmsr(msr | MSR_IR | MSR_DR); 77674c3354bSNicholas Piggin 7774ff753feSGanesh Goudar disposition = mce_handle_err_virtmode(regs, errp, mce_log, 7784ff753feSGanesh Goudar disposition); 77974c3354bSNicholas Piggin 78074c3354bSNicholas Piggin /* 78174c3354bSNicholas Piggin * Queue irq work to log this rtas event later. 78274c3354bSNicholas Piggin * irq_work_queue uses per-cpu variables, so do this in virt 78374c3354bSNicholas Piggin * mode as well. 78474c3354bSNicholas Piggin */ 78574c3354bSNicholas Piggin irq_work_queue(&mce_errlog_process_work); 78674c3354bSNicholas Piggin 78774c3354bSNicholas Piggin mtmsr(msr); 78874c3354bSNicholas Piggin 789a43c1590SMahesh Salgaonkar return disposition; 790a43c1590SMahesh Salgaonkar } 791a43c1590SMahesh Salgaonkar 792d9953105SMichael Ellerman /* 79394675cceSMahesh Salgaonkar * Process MCE rtas errlog event. 79494675cceSMahesh Salgaonkar */ 79594675cceSMahesh Salgaonkar static void mce_process_errlog_event(struct irq_work *work) 79694675cceSMahesh Salgaonkar { 79794675cceSMahesh Salgaonkar struct rtas_error_log *err; 79894675cceSMahesh Salgaonkar 79994675cceSMahesh Salgaonkar err = fwnmi_get_errlog(); 80094675cceSMahesh Salgaonkar log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); 80194675cceSMahesh Salgaonkar } 80294675cceSMahesh Salgaonkar 80394675cceSMahesh Salgaonkar /* 804d9953105SMichael Ellerman * See if we can recover from a machine check exception. 805d9953105SMichael Ellerman * This is only called on power4 (or above) and only via 806d9953105SMichael Ellerman * the Firmware Non-Maskable Interrupts (fwnmi) handler 807d9953105SMichael Ellerman * which provides the error analysis for us. 808d9953105SMichael Ellerman * 809d9953105SMichael Ellerman * Return 1 if corrected (or delivered a signal). 810d9953105SMichael Ellerman * Return 0 if there is nothing we can do. 811d9953105SMichael Ellerman */ 8129ca766f9SNicholas Piggin static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) 813d9953105SMichael Ellerman { 814d47d1d8aSAnton Blanchard int recovered = 0; 8158f0b8056SMahesh Salgaonkar 816806c0e6eSChristophe Leroy if (regs_is_unrecoverable(regs)) { 817d47d1d8aSAnton Blanchard /* If MSR_RI isn't set, we cannot recover */ 8188f0b8056SMahesh Salgaonkar pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); 819d47d1d8aSAnton Blanchard recovered = 0; 8209ca766f9SNicholas Piggin } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { 821d9953105SMichael Ellerman /* Platform corrected itself */ 822d47d1d8aSAnton Blanchard recovered = 1; 8239ca766f9SNicholas Piggin } else if (evt->severity == MCE_SEV_FATAL) { 8249ca766f9SNicholas Piggin /* Fatal machine check */ 8259ca766f9SNicholas Piggin pr_err("Machine check interrupt is fatal\n"); 8269ca766f9SNicholas Piggin recovered = 0; 827d9953105SMichael Ellerman } 828d9953105SMichael Ellerman 8299ca766f9SNicholas Piggin if (!recovered && evt->sync_error) { 8309ca766f9SNicholas Piggin /* 8319ca766f9SNicholas Piggin * Try to kill processes if we get a synchronous machine check 8329ca766f9SNicholas Piggin * (e.g., one caused by execution of this instruction). This 8339ca766f9SNicholas Piggin * will devolve into a panic if we try to kill init or are in 8349ca766f9SNicholas Piggin * an interrupt etc. 8359ca766f9SNicholas Piggin * 8369ca766f9SNicholas Piggin * TODO: Queue up this address for hwpoisioning later. 8379ca766f9SNicholas Piggin * TODO: This is not quite right for d-side machine 8389ca766f9SNicholas Piggin * checks ->nip is not necessarily the important 8399ca766f9SNicholas Piggin * address. 8409ca766f9SNicholas Piggin */ 8419ca766f9SNicholas Piggin if ((user_mode(regs))) { 8429ca766f9SNicholas Piggin _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 8439ca766f9SNicholas Piggin recovered = 1; 8449ca766f9SNicholas Piggin } else if (die_will_crash()) { 8459ca766f9SNicholas Piggin /* 8469ca766f9SNicholas Piggin * die() would kill the kernel, so better to go via 8479ca766f9SNicholas Piggin * the platform reboot code that will log the 8489ca766f9SNicholas Piggin * machine check. 8499ca766f9SNicholas Piggin */ 8509ca766f9SNicholas Piggin recovered = 0; 8519ca766f9SNicholas Piggin } else { 852209e9d50SNicholas Piggin die_mce("Machine check", regs, SIGBUS); 8539ca766f9SNicholas Piggin recovered = 1; 8549ca766f9SNicholas Piggin } 8559ca766f9SNicholas Piggin } 856d9953105SMichael Ellerman 857d47d1d8aSAnton Blanchard return recovered; 858d9953105SMichael Ellerman } 859d9953105SMichael Ellerman 860d9953105SMichael Ellerman /* 861d9953105SMichael Ellerman * Handle a machine check. 862d9953105SMichael Ellerman * 863d9953105SMichael Ellerman * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) 864d9953105SMichael Ellerman * should be present. If so the handler which called us tells us if the 865d9953105SMichael Ellerman * error was recovered (never true if RI=0). 866d9953105SMichael Ellerman * 867d9953105SMichael Ellerman * On hardware prior to Power 4 these exceptions were asynchronous which 868d9953105SMichael Ellerman * means we can't tell exactly where it occurred and so we can't recover. 869d9953105SMichael Ellerman */ 870d9953105SMichael Ellerman int pSeries_machine_check_exception(struct pt_regs *regs) 871d9953105SMichael Ellerman { 8729ca766f9SNicholas Piggin struct machine_check_event evt; 873d9953105SMichael Ellerman 8749ca766f9SNicholas Piggin if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 8759ca766f9SNicholas Piggin return 0; 8769ca766f9SNicholas Piggin 8779ca766f9SNicholas Piggin /* Print things out */ 8789ca766f9SNicholas Piggin if (evt.version != MCE_V1) { 8799ca766f9SNicholas Piggin pr_err("Machine Check Exception, Unknown event version %d !\n", 8809ca766f9SNicholas Piggin evt.version); 8819ca766f9SNicholas Piggin return 0; 882d9953105SMichael Ellerman } 8839ca766f9SNicholas Piggin machine_check_print_event_info(&evt, user_mode(regs), false); 8849ca766f9SNicholas Piggin 8859ca766f9SNicholas Piggin if (recover_mce(regs, &evt)) 8869ca766f9SNicholas Piggin return 1; 887d9953105SMichael Ellerman 888d9953105SMichael Ellerman return 0; 889d9953105SMichael Ellerman } 890a43c1590SMahesh Salgaonkar 891a43c1590SMahesh Salgaonkar long pseries_machine_check_realmode(struct pt_regs *regs) 892a43c1590SMahesh Salgaonkar { 893a43c1590SMahesh Salgaonkar struct rtas_error_log *errp; 894a43c1590SMahesh Salgaonkar int disposition; 895a43c1590SMahesh Salgaonkar 896a43c1590SMahesh Salgaonkar if (fwnmi_active) { 897a43c1590SMahesh Salgaonkar errp = fwnmi_get_errinfo(regs); 898a43c1590SMahesh Salgaonkar /* 899a43c1590SMahesh Salgaonkar * Call to fwnmi_release_errinfo() in real mode causes kernel 900a43c1590SMahesh Salgaonkar * to panic. Hence we will call it as soon as we go into 901a43c1590SMahesh Salgaonkar * virtual mode. 902a43c1590SMahesh Salgaonkar */ 9039ca766f9SNicholas Piggin disposition = mce_handle_error(regs, errp); 9049ca766f9SNicholas Piggin 90574c3354bSNicholas Piggin fwnmi_release_errinfo(); 9069ca766f9SNicholas Piggin 907a43c1590SMahesh Salgaonkar if (disposition == RTAS_DISP_FULLY_RECOVERED) 908a43c1590SMahesh Salgaonkar return 1; 909a43c1590SMahesh Salgaonkar } 910a43c1590SMahesh Salgaonkar 911a43c1590SMahesh Salgaonkar return 0; 912a43c1590SMahesh Salgaonkar } 913