11a59d1b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 2d9953105SMichael Ellerman /* 3d9953105SMichael Ellerman * Copyright (C) 2001 Dave Engebretsen IBM Corporation 4d9953105SMichael Ellerman */ 5d9953105SMichael Ellerman 6d9953105SMichael Ellerman #include <linux/sched.h> 7d9953105SMichael Ellerman #include <linux/interrupt.h> 8d9953105SMichael Ellerman #include <linux/irq.h> 990128997SAnton Blanchard #include <linux/of.h> 1055fc0c56SAnton Blanchard #include <linux/fs.h> 1155fc0c56SAnton Blanchard #include <linux/reboot.h> 1294675cceSMahesh Salgaonkar #include <linux/irq_work.h> 13d9953105SMichael Ellerman 14d9953105SMichael Ellerman #include <asm/machdep.h> 15d9953105SMichael Ellerman #include <asm/rtas.h> 168c4f1f29SMichael Ellerman #include <asm/firmware.h> 17a43c1590SMahesh Salgaonkar #include <asm/mce.h> 18d9953105SMichael Ellerman 19577830b0SMichael Ellerman #include "pseries.h" 20c902be71SArnd Bergmann 21d9953105SMichael Ellerman static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX]; 22d9953105SMichael Ellerman static DEFINE_SPINLOCK(ras_log_buf_lock); 23d9953105SMichael Ellerman 24d9953105SMichael Ellerman static int ras_check_exception_token; 25d9953105SMichael Ellerman 26d9953105SMichael Ellerman #define EPOW_SENSOR_TOKEN 9 27d9953105SMichael Ellerman #define EPOW_SENSOR_INDEX 0 28d9953105SMichael Ellerman 29b4af279aSVipin K Parashar /* EPOW events counter variable */ 30b4af279aSVipin K Parashar static int num_epow_events; 31b4af279aSVipin K Parashar 32b7d9eb39SJohn Allen static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id); 337d12e780SDavid Howells static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); 347d12e780SDavid Howells static irqreturn_t ras_error_interrupt(int irq, void *dev_id); 35d9953105SMichael Ellerman 3604fce21cSMahesh Salgaonkar /* RTAS pseries MCE errorlog section. */ 3704fce21cSMahesh Salgaonkar struct pseries_mc_errorlog { 3804fce21cSMahesh Salgaonkar __be32 fru_id; 3904fce21cSMahesh Salgaonkar __be32 proc_id; 4004fce21cSMahesh Salgaonkar u8 error_type; 4104fce21cSMahesh Salgaonkar /* 4204fce21cSMahesh Salgaonkar * sub_err_type (1 byte). Bit fields depends on error_type 4304fce21cSMahesh Salgaonkar * 4404fce21cSMahesh Salgaonkar * MSB0 4504fce21cSMahesh Salgaonkar * | 4604fce21cSMahesh Salgaonkar * V 4704fce21cSMahesh Salgaonkar * 01234567 4804fce21cSMahesh Salgaonkar * XXXXXXXX 4904fce21cSMahesh Salgaonkar * 5004fce21cSMahesh Salgaonkar * For error_type == MC_ERROR_TYPE_UE 5104fce21cSMahesh Salgaonkar * XXXXXXXX 5204fce21cSMahesh Salgaonkar * X 1: Permanent or Transient UE. 5304fce21cSMahesh Salgaonkar * X 1: Effective address provided. 5404fce21cSMahesh Salgaonkar * X 1: Logical address provided. 5504fce21cSMahesh Salgaonkar * XX 2: Reserved. 5604fce21cSMahesh Salgaonkar * XXX 3: Type of UE error. 5704fce21cSMahesh Salgaonkar * 580f54bddeSGanesh Goudar * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB 5904fce21cSMahesh Salgaonkar * XXXXXXXX 6004fce21cSMahesh Salgaonkar * X 1: Effective address provided. 6104fce21cSMahesh Salgaonkar * XXXXX 5: Reserved. 6204fce21cSMahesh Salgaonkar * XX 2: Type of SLB/ERAT/TLB error. 630f54bddeSGanesh Goudar * 640f54bddeSGanesh Goudar * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS 650f54bddeSGanesh Goudar * XXXXXXXX 660f54bddeSGanesh Goudar * X 1: Error causing address provided. 670f54bddeSGanesh Goudar * XXX 3: Type of error. 680f54bddeSGanesh Goudar * XXXX 4: Reserved. 6904fce21cSMahesh Salgaonkar */ 7004fce21cSMahesh Salgaonkar u8 sub_err_type; 7104fce21cSMahesh Salgaonkar u8 reserved_1[6]; 7204fce21cSMahesh Salgaonkar __be64 effective_address; 7304fce21cSMahesh Salgaonkar __be64 logical_address; 7404fce21cSMahesh Salgaonkar } __packed; 7504fce21cSMahesh Salgaonkar 7604fce21cSMahesh Salgaonkar /* RTAS pseries MCE error types */ 7704fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_UE 0x00 7804fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_SLB 0x01 7904fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_ERAT 0x02 809ca766f9SNicholas Piggin #define MC_ERROR_TYPE_UNKNOWN 0x03 8104fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_TLB 0x04 8204fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_D_CACHE 0x05 8304fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_I_CACHE 0x07 840f54bddeSGanesh Goudar #define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08 8504fce21cSMahesh Salgaonkar 8604fce21cSMahesh Salgaonkar /* RTAS pseries MCE error sub types */ 8704fce21cSMahesh Salgaonkar #define MC_ERROR_UE_INDETERMINATE 0 8804fce21cSMahesh Salgaonkar #define MC_ERROR_UE_IFETCH 1 8904fce21cSMahesh Salgaonkar #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2 9004fce21cSMahesh Salgaonkar #define MC_ERROR_UE_LOAD_STORE 3 9104fce21cSMahesh Salgaonkar #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4 9204fce21cSMahesh Salgaonkar 939ca766f9SNicholas Piggin #define UE_EFFECTIVE_ADDR_PROVIDED 0x40 949ca766f9SNicholas Piggin #define UE_LOGICAL_ADDR_PROVIDED 0x20 950f54bddeSGanesh Goudar #define MC_EFFECTIVE_ADDR_PROVIDED 0x80 969ca766f9SNicholas Piggin 9704fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_PARITY 0 9804fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_MULTIHIT 1 9904fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_INDETERMINATE 2 10004fce21cSMahesh Salgaonkar 10104fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_PARITY 1 10204fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_MULTIHIT 2 10304fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_INDETERMINATE 3 10404fce21cSMahesh Salgaonkar 10504fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_PARITY 1 10604fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_MULTIHIT 2 10704fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_INDETERMINATE 3 10804fce21cSMahesh Salgaonkar 1090f54bddeSGanesh Goudar #define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0 1100f54bddeSGanesh Goudar #define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1 1110f54bddeSGanesh Goudar 11204fce21cSMahesh Salgaonkar static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog) 11304fce21cSMahesh Salgaonkar { 11404fce21cSMahesh Salgaonkar switch (mlog->error_type) { 11504fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_UE: 11604fce21cSMahesh Salgaonkar return (mlog->sub_err_type & 0x07); 11704fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_SLB: 11804fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_ERAT: 11904fce21cSMahesh Salgaonkar case MC_ERROR_TYPE_TLB: 12004fce21cSMahesh Salgaonkar return (mlog->sub_err_type & 0x03); 1210f54bddeSGanesh Goudar case MC_ERROR_TYPE_CTRL_MEM_ACCESS: 1220f54bddeSGanesh Goudar return (mlog->sub_err_type & 0x70) >> 4; 12304fce21cSMahesh Salgaonkar default: 12404fce21cSMahesh Salgaonkar return 0; 12504fce21cSMahesh Salgaonkar } 12604fce21cSMahesh Salgaonkar } 12704fce21cSMahesh Salgaonkar 128d9953105SMichael Ellerman /* 129c9dccf1dSSam Bobroff * Enable the hotplug interrupt late because processing them may touch other 130c9dccf1dSSam Bobroff * devices or systems (e.g. hugepages) that have not been initialized at the 131c9dccf1dSSam Bobroff * subsys stage. 132c9dccf1dSSam Bobroff */ 13390db8bf2SCédric Le Goater static int __init init_ras_hotplug_IRQ(void) 134c9dccf1dSSam Bobroff { 135c9dccf1dSSam Bobroff struct device_node *np; 136c9dccf1dSSam Bobroff 137c9dccf1dSSam Bobroff /* Hotplug Events */ 138c9dccf1dSSam Bobroff np = of_find_node_by_path("/event-sources/hot-plug-events"); 139c9dccf1dSSam Bobroff if (np != NULL) { 140c9dccf1dSSam Bobroff if (dlpar_workqueue_init() == 0) 141c9dccf1dSSam Bobroff request_event_sources_irqs(np, ras_hotplug_interrupt, 142c9dccf1dSSam Bobroff "RAS_HOTPLUG"); 143c9dccf1dSSam Bobroff of_node_put(np); 144c9dccf1dSSam Bobroff } 145c9dccf1dSSam Bobroff 146c9dccf1dSSam Bobroff return 0; 147c9dccf1dSSam Bobroff } 148c9dccf1dSSam Bobroff machine_late_initcall(pseries, init_ras_hotplug_IRQ); 149c9dccf1dSSam Bobroff 150c9dccf1dSSam Bobroff /* 151d9953105SMichael Ellerman * Initialize handlers for the set of interrupts caused by hardware errors 152d9953105SMichael Ellerman * and power system events. 153d9953105SMichael Ellerman */ 154d9953105SMichael Ellerman static int __init init_ras_IRQ(void) 155d9953105SMichael Ellerman { 156d9953105SMichael Ellerman struct device_node *np; 157d9953105SMichael Ellerman 158d9953105SMichael Ellerman ras_check_exception_token = rtas_token("check-exception"); 159d9953105SMichael Ellerman 160d9953105SMichael Ellerman /* Internal Errors */ 161d9953105SMichael Ellerman np = of_find_node_by_path("/event-sources/internal-errors"); 162d9953105SMichael Ellerman if (np != NULL) { 16332c96f77SMark Nelson request_event_sources_irqs(np, ras_error_interrupt, 16432c96f77SMark Nelson "RAS_ERROR"); 165d9953105SMichael Ellerman of_node_put(np); 166d9953105SMichael Ellerman } 167d9953105SMichael Ellerman 168d9953105SMichael Ellerman /* EPOW Events */ 169d9953105SMichael Ellerman np = of_find_node_by_path("/event-sources/epow-events"); 170d9953105SMichael Ellerman if (np != NULL) { 17132c96f77SMark Nelson request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW"); 172d9953105SMichael Ellerman of_node_put(np); 173d9953105SMichael Ellerman } 174d9953105SMichael Ellerman 17569ed3324SAnton Blanchard return 0; 176d9953105SMichael Ellerman } 1778e83e905SMichael Ellerman machine_subsys_initcall(pseries, init_ras_IRQ); 178d9953105SMichael Ellerman 17955fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_NORMAL 1 18055fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_ON_UPS 2 18155fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 18255fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 18355fc0c56SAnton Blanchard 18455fc0c56SAnton Blanchard static void handle_system_shutdown(char event_modifier) 18555fc0c56SAnton Blanchard { 18655fc0c56SAnton Blanchard switch (event_modifier) { 18755fc0c56SAnton Blanchard case EPOW_SHUTDOWN_NORMAL: 188b4af279aSVipin K Parashar pr_emerg("Power off requested\n"); 1891b7e0cbeSliguang orderly_poweroff(true); 19055fc0c56SAnton Blanchard break; 19155fc0c56SAnton Blanchard 19255fc0c56SAnton Blanchard case EPOW_SHUTDOWN_ON_UPS: 193b4af279aSVipin K Parashar pr_emerg("Loss of system power detected. System is running on" 194b4af279aSVipin K Parashar " UPS/battery. Check RTAS error log for details\n"); 19555fc0c56SAnton Blanchard break; 19655fc0c56SAnton Blanchard 19755fc0c56SAnton Blanchard case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: 198b4af279aSVipin K Parashar pr_emerg("Loss of system critical functions detected. Check" 199b4af279aSVipin K Parashar " RTAS error log for details\n"); 2001b7e0cbeSliguang orderly_poweroff(true); 20155fc0c56SAnton Blanchard break; 20255fc0c56SAnton Blanchard 20355fc0c56SAnton Blanchard case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: 204b4af279aSVipin K Parashar pr_emerg("High ambient temperature detected. Check RTAS" 205b4af279aSVipin K Parashar " error log for details\n"); 2061b7e0cbeSliguang orderly_poweroff(true); 20755fc0c56SAnton Blanchard break; 20855fc0c56SAnton Blanchard 20955fc0c56SAnton Blanchard default: 210b4af279aSVipin K Parashar pr_err("Unknown power/cooling shutdown event (modifier = %d)\n", 21155fc0c56SAnton Blanchard event_modifier); 21255fc0c56SAnton Blanchard } 21355fc0c56SAnton Blanchard } 21455fc0c56SAnton Blanchard 21555fc0c56SAnton Blanchard struct epow_errorlog { 21655fc0c56SAnton Blanchard unsigned char sensor_value; 21755fc0c56SAnton Blanchard unsigned char event_modifier; 21855fc0c56SAnton Blanchard unsigned char extended_modifier; 21955fc0c56SAnton Blanchard unsigned char reserved; 22055fc0c56SAnton Blanchard unsigned char platform_reason; 22155fc0c56SAnton Blanchard }; 22255fc0c56SAnton Blanchard 22355fc0c56SAnton Blanchard #define EPOW_RESET 0 22455fc0c56SAnton Blanchard #define EPOW_WARN_COOLING 1 22555fc0c56SAnton Blanchard #define EPOW_WARN_POWER 2 22655fc0c56SAnton Blanchard #define EPOW_SYSTEM_SHUTDOWN 3 22755fc0c56SAnton Blanchard #define EPOW_SYSTEM_HALT 4 22855fc0c56SAnton Blanchard #define EPOW_MAIN_ENCLOSURE 5 22955fc0c56SAnton Blanchard #define EPOW_POWER_OFF 7 23055fc0c56SAnton Blanchard 231e51df2c1SAnton Blanchard static void rtas_parse_epow_errlog(struct rtas_error_log *log) 23255fc0c56SAnton Blanchard { 23355fc0c56SAnton Blanchard struct pseries_errorlog *pseries_log; 23455fc0c56SAnton Blanchard struct epow_errorlog *epow_log; 23555fc0c56SAnton Blanchard char action_code; 23655fc0c56SAnton Blanchard char modifier; 23755fc0c56SAnton Blanchard 23855fc0c56SAnton Blanchard pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); 23955fc0c56SAnton Blanchard if (pseries_log == NULL) 24055fc0c56SAnton Blanchard return; 24155fc0c56SAnton Blanchard 24255fc0c56SAnton Blanchard epow_log = (struct epow_errorlog *)pseries_log->data; 24355fc0c56SAnton Blanchard action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ 24455fc0c56SAnton Blanchard modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ 24555fc0c56SAnton Blanchard 24655fc0c56SAnton Blanchard switch (action_code) { 24755fc0c56SAnton Blanchard case EPOW_RESET: 248b4af279aSVipin K Parashar if (num_epow_events) { 249b4af279aSVipin K Parashar pr_info("Non critical power/cooling issue cleared\n"); 250b4af279aSVipin K Parashar num_epow_events--; 251b4af279aSVipin K Parashar } 25255fc0c56SAnton Blanchard break; 25355fc0c56SAnton Blanchard 25455fc0c56SAnton Blanchard case EPOW_WARN_COOLING: 255b4af279aSVipin K Parashar pr_info("Non-critical cooling issue detected. Check RTAS error" 256b4af279aSVipin K Parashar " log for details\n"); 25755fc0c56SAnton Blanchard break; 25855fc0c56SAnton Blanchard 25955fc0c56SAnton Blanchard case EPOW_WARN_POWER: 260b4af279aSVipin K Parashar pr_info("Non-critical power issue detected. Check RTAS error" 261b4af279aSVipin K Parashar " log for details\n"); 26255fc0c56SAnton Blanchard break; 26355fc0c56SAnton Blanchard 26455fc0c56SAnton Blanchard case EPOW_SYSTEM_SHUTDOWN: 265d273fa91SYueHaibing handle_system_shutdown(modifier); 26655fc0c56SAnton Blanchard break; 26755fc0c56SAnton Blanchard 26855fc0c56SAnton Blanchard case EPOW_SYSTEM_HALT: 269b4af279aSVipin K Parashar pr_emerg("Critical power/cooling issue detected. Check RTAS" 270b4af279aSVipin K Parashar " error log for details. Powering off.\n"); 2711b7e0cbeSliguang orderly_poweroff(true); 27255fc0c56SAnton Blanchard break; 27355fc0c56SAnton Blanchard 27455fc0c56SAnton Blanchard case EPOW_MAIN_ENCLOSURE: 27555fc0c56SAnton Blanchard case EPOW_POWER_OFF: 276b4af279aSVipin K Parashar pr_emerg("System about to lose power. Check RTAS error log " 277b4af279aSVipin K Parashar " for details. Powering off immediately.\n"); 27855fc0c56SAnton Blanchard emergency_sync(); 27955fc0c56SAnton Blanchard kernel_power_off(); 28055fc0c56SAnton Blanchard break; 28155fc0c56SAnton Blanchard 28255fc0c56SAnton Blanchard default: 283b4af279aSVipin K Parashar pr_err("Unknown power/cooling event (action code = %d)\n", 28455fc0c56SAnton Blanchard action_code); 28555fc0c56SAnton Blanchard } 286b4af279aSVipin K Parashar 287b4af279aSVipin K Parashar /* Increment epow events counter variable */ 288b4af279aSVipin K Parashar if (action_code != EPOW_RESET) 289b4af279aSVipin K Parashar num_epow_events++; 29055fc0c56SAnton Blanchard } 29155fc0c56SAnton Blanchard 292b7d9eb39SJohn Allen static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id) 293b7d9eb39SJohn Allen { 294b7d9eb39SJohn Allen struct pseries_errorlog *pseries_log; 295b7d9eb39SJohn Allen struct pseries_hp_errorlog *hp_elog; 296b7d9eb39SJohn Allen 297b7d9eb39SJohn Allen spin_lock(&ras_log_buf_lock); 298b7d9eb39SJohn Allen 299b7d9eb39SJohn Allen rtas_call(ras_check_exception_token, 6, 1, NULL, 300b7d9eb39SJohn Allen RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq), 301b7d9eb39SJohn Allen RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf), 302b7d9eb39SJohn Allen rtas_get_error_log_max()); 303b7d9eb39SJohn Allen 304b7d9eb39SJohn Allen pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf, 305b7d9eb39SJohn Allen PSERIES_ELOG_SECT_ID_HOTPLUG); 306b7d9eb39SJohn Allen hp_elog = (struct pseries_hp_errorlog *)pseries_log->data; 307b7d9eb39SJohn Allen 308b7d9eb39SJohn Allen /* 309b7d9eb39SJohn Allen * Since PCI hotplug is not currently supported on pseries, put PCI 310b7d9eb39SJohn Allen * hotplug events on the ras_log_buf to be handled by rtas_errd. 311b7d9eb39SJohn Allen */ 312b7d9eb39SJohn Allen if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM || 3134c5d87dbSOliver O'Halloran hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU || 3144c5d87dbSOliver O'Halloran hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM) 315fd12527aSNathan Fontenot queue_hotplug_event(hp_elog); 316b7d9eb39SJohn Allen else 317b7d9eb39SJohn Allen log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 318b7d9eb39SJohn Allen 319b7d9eb39SJohn Allen spin_unlock(&ras_log_buf_lock); 320b7d9eb39SJohn Allen return IRQ_HANDLED; 321b7d9eb39SJohn Allen } 322b7d9eb39SJohn Allen 32355fc0c56SAnton Blanchard /* Handle environmental and power warning (EPOW) interrupts. */ 3247d12e780SDavid Howells static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) 325d9953105SMichael Ellerman { 32655fc0c56SAnton Blanchard int state; 327d9953105SMichael Ellerman int critical; 328d9953105SMichael Ellerman 329aa23ea0cSCédric Le Goater rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); 330d9953105SMichael Ellerman 331d9953105SMichael Ellerman if (state > 3) 332d9953105SMichael Ellerman critical = 1; /* Time Critical */ 333d9953105SMichael Ellerman else 334d9953105SMichael Ellerman critical = 0; 335d9953105SMichael Ellerman 336d9953105SMichael Ellerman spin_lock(&ras_log_buf_lock); 337d9953105SMichael Ellerman 338aa23ea0cSCédric Le Goater rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT, 339aa23ea0cSCédric Le Goater virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf), 340d9953105SMichael Ellerman rtas_get_error_log_max()); 341d9953105SMichael Ellerman 342d9953105SMichael Ellerman log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 343d9953105SMichael Ellerman 34455fc0c56SAnton Blanchard rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); 34555fc0c56SAnton Blanchard 346d9953105SMichael Ellerman spin_unlock(&ras_log_buf_lock); 347d9953105SMichael Ellerman return IRQ_HANDLED; 348d9953105SMichael Ellerman } 349d9953105SMichael Ellerman 350d9953105SMichael Ellerman /* 351d9953105SMichael Ellerman * Handle hardware error interrupts. 352d9953105SMichael Ellerman * 353d9953105SMichael Ellerman * RTAS check-exception is called to collect data on the exception. If 354d9953105SMichael Ellerman * the error is deemed recoverable, we log a warning and return. 355d9953105SMichael Ellerman * For nonrecoverable errors, an error is logged and we stop all processing 356d9953105SMichael Ellerman * as quickly as possible in order to prevent propagation of the failure. 357d9953105SMichael Ellerman */ 3587d12e780SDavid Howells static irqreturn_t ras_error_interrupt(int irq, void *dev_id) 359d9953105SMichael Ellerman { 360d9953105SMichael Ellerman struct rtas_error_log *rtas_elog; 361cc8b5263SAnton Blanchard int status; 362d9953105SMichael Ellerman int fatal; 363d9953105SMichael Ellerman 364d9953105SMichael Ellerman spin_lock(&ras_log_buf_lock); 365d9953105SMichael Ellerman 366d9953105SMichael Ellerman status = rtas_call(ras_check_exception_token, 6, 1, NULL, 367b08e281bSMark Nelson RTAS_VECTOR_EXTERNAL_INTERRUPT, 368476eb491SGrant Likely virq_to_hw(irq), 369d9953105SMichael Ellerman RTAS_INTERNAL_ERROR, 1 /* Time Critical */, 370d9953105SMichael Ellerman __pa(&ras_log_buf), 371d9953105SMichael Ellerman rtas_get_error_log_max()); 372d9953105SMichael Ellerman 373d9953105SMichael Ellerman rtas_elog = (struct rtas_error_log *)ras_log_buf; 374d9953105SMichael Ellerman 375a08a53eaSGreg Kurz if (status == 0 && 376a08a53eaSGreg Kurz rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) 377d9953105SMichael Ellerman fatal = 1; 378d9953105SMichael Ellerman else 379d9953105SMichael Ellerman fatal = 0; 380d9953105SMichael Ellerman 381d9953105SMichael Ellerman /* format and print the extended information */ 382d9953105SMichael Ellerman log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); 383d9953105SMichael Ellerman 384d9953105SMichael Ellerman if (fatal) { 385b4af279aSVipin K Parashar pr_emerg("Fatal hardware error detected. Check RTAS error" 386b4af279aSVipin K Parashar " log for details. Powering off immediately\n"); 387cc8b5263SAnton Blanchard emergency_sync(); 388cc8b5263SAnton Blanchard kernel_power_off(); 389d9953105SMichael Ellerman } else { 390b4af279aSVipin K Parashar pr_err("Recoverable hardware error detected\n"); 391d9953105SMichael Ellerman } 392d9953105SMichael Ellerman 393d9953105SMichael Ellerman spin_unlock(&ras_log_buf_lock); 394d9953105SMichael Ellerman return IRQ_HANDLED; 395d9953105SMichael Ellerman } 396d9953105SMichael Ellerman 397d368514cSAnton Blanchard /* 398d368514cSAnton Blanchard * Some versions of FWNMI place the buffer inside the 4kB page starting at 399d368514cSAnton Blanchard * 0x7000. Other versions place it inside the rtas buffer. We check both. 400deb70f7aSNicholas Piggin * Minimum size of the buffer is 16 bytes. 401d368514cSAnton Blanchard */ 402d368514cSAnton Blanchard #define VALID_FWNMI_BUFFER(A) \ 403deb70f7aSNicholas Piggin ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \ 404deb70f7aSNicholas Piggin (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16)))) 405d368514cSAnton Blanchard 40694675cceSMahesh Salgaonkar static inline struct rtas_error_log *fwnmi_get_errlog(void) 40794675cceSMahesh Salgaonkar { 40894675cceSMahesh Salgaonkar return (struct rtas_error_log *)local_paca->mce_data_buf; 40994675cceSMahesh Salgaonkar } 41094675cceSMahesh Salgaonkar 411d7b14c5cSNicholas Piggin static __be64 *fwnmi_get_savep(struct pt_regs *regs) 412d7b14c5cSNicholas Piggin { 413d7b14c5cSNicholas Piggin unsigned long savep_ra; 414d7b14c5cSNicholas Piggin 415d7b14c5cSNicholas Piggin /* Mask top two bits */ 416d7b14c5cSNicholas Piggin savep_ra = regs->gpr[3] & ~(0x3UL << 62); 417d7b14c5cSNicholas Piggin if (!VALID_FWNMI_BUFFER(savep_ra)) { 418d7b14c5cSNicholas Piggin printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]); 419d7b14c5cSNicholas Piggin return NULL; 420d7b14c5cSNicholas Piggin } 421d7b14c5cSNicholas Piggin 422d7b14c5cSNicholas Piggin return __va(savep_ra); 423d7b14c5cSNicholas Piggin } 424d7b14c5cSNicholas Piggin 425d368514cSAnton Blanchard /* 426d368514cSAnton Blanchard * Get the error information for errors coming through the 427d9953105SMichael Ellerman * FWNMI vectors. The pt_regs' r3 will be updated to reflect 428d9953105SMichael Ellerman * the actual r3 if possible, and a ptr to the error log entry 429d9953105SMichael Ellerman * will be returned if found. 430d9953105SMichael Ellerman * 43194675cceSMahesh Salgaonkar * Use one buffer mce_data_buf per cpu to store RTAS error. 432d368514cSAnton Blanchard * 43394675cceSMahesh Salgaonkar * The mce_data_buf does not have any locks or protection around it, 434d9953105SMichael Ellerman * if a second machine check comes in, or a system reset is done 435d9953105SMichael Ellerman * before we have logged the error, then we will get corruption in the 436d9953105SMichael Ellerman * error log. This is preferable over holding off on calling 437d9953105SMichael Ellerman * ibm,nmi-interlock which would result in us checkstopping if a 438d9953105SMichael Ellerman * second machine check did come in. 439d9953105SMichael Ellerman */ 440d9953105SMichael Ellerman static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) 441d9953105SMichael Ellerman { 44294675cceSMahesh Salgaonkar struct rtas_error_log *h; 443d7b14c5cSNicholas Piggin __be64 *savep; 444d9953105SMichael Ellerman 445d7b14c5cSNicholas Piggin savep = fwnmi_get_savep(regs); 446d7b14c5cSNicholas Piggin if (!savep) 447d368514cSAnton Blanchard return NULL; 448d368514cSAnton Blanchard 449cd813e1cSMahesh Salgaonkar regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ 450d368514cSAnton Blanchard 451d368514cSAnton Blanchard h = (struct rtas_error_log *)&savep[1]; 45294675cceSMahesh Salgaonkar /* Use the per cpu buffer from paca to store rtas error log */ 45394675cceSMahesh Salgaonkar memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX); 454a08a53eaSGreg Kurz if (!rtas_error_extended(h)) { 45594675cceSMahesh Salgaonkar memcpy(local_paca->mce_data_buf, h, sizeof(__u64)); 456d368514cSAnton Blanchard } else { 457a08a53eaSGreg Kurz int len, error_log_length; 458d368514cSAnton Blanchard 459a08a53eaSGreg Kurz error_log_length = 8 + rtas_error_extended_log_length(h); 46074e96bf4SMahesh Salgaonkar len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX); 46194675cceSMahesh Salgaonkar memcpy(local_paca->mce_data_buf, h, len); 462d368514cSAnton Blanchard } 463d368514cSAnton Blanchard 46494675cceSMahesh Salgaonkar return (struct rtas_error_log *)local_paca->mce_data_buf; 465d9953105SMichael Ellerman } 466d9953105SMichael Ellerman 467d9953105SMichael Ellerman /* Call this when done with the data returned by FWNMI_get_errinfo. 468d9953105SMichael Ellerman * It will release the saved data area for other CPUs in the 469d9953105SMichael Ellerman * partition to receive FWNMI errors. 470d9953105SMichael Ellerman */ 471d9953105SMichael Ellerman static void fwnmi_release_errinfo(void) 472d9953105SMichael Ellerman { 4732576f5f9SNicholas Piggin struct rtas_args rtas_args; 4742576f5f9SNicholas Piggin int ret; 4752576f5f9SNicholas Piggin 4762576f5f9SNicholas Piggin /* 4772576f5f9SNicholas Piggin * On pseries, the machine check stack is limited to under 4GB, so 4782576f5f9SNicholas Piggin * args can be on-stack. 4792576f5f9SNicholas Piggin */ 4802576f5f9SNicholas Piggin rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL); 4812576f5f9SNicholas Piggin ret = be32_to_cpu(rtas_args.rets[0]); 482d9953105SMichael Ellerman if (ret != 0) 483d368514cSAnton Blanchard printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret); 484d9953105SMichael Ellerman } 485d9953105SMichael Ellerman 486c902be71SArnd Bergmann int pSeries_system_reset_exception(struct pt_regs *regs) 487d9953105SMichael Ellerman { 488bded0706SNicholas Piggin #ifdef __LITTLE_ENDIAN__ 489bded0706SNicholas Piggin /* 490bded0706SNicholas Piggin * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try 491bded0706SNicholas Piggin * to detect the bad SRR1 pattern here. Flip the NIP back to correct 492bded0706SNicholas Piggin * endian for reporting purposes. Unfortunately the MSR can't be fixed, 493bded0706SNicholas Piggin * so clear it. It will be missing MSR_RI so we won't try to recover. 494bded0706SNicholas Piggin */ 495bded0706SNicholas Piggin if ((be64_to_cpu(regs->msr) & 496bded0706SNicholas Piggin (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR| 497bded0706SNicholas Piggin MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) { 49859dc5bfcSNicholas Piggin regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip)); 49959dc5bfcSNicholas Piggin regs_set_return_msr(regs, 0); 500bded0706SNicholas Piggin } 501bded0706SNicholas Piggin #endif 502bded0706SNicholas Piggin 503d9953105SMichael Ellerman if (fwnmi_active) { 504d7b14c5cSNicholas Piggin __be64 *savep; 505d7b14c5cSNicholas Piggin 506d7b14c5cSNicholas Piggin /* 507d7b14c5cSNicholas Piggin * Firmware (PowerVM and KVM) saves r3 to a save area like 508d7b14c5cSNicholas Piggin * machine check, which is not exactly what PAPR (2.9) 509d7b14c5cSNicholas Piggin * suggests but there is no way to detect otherwise, so this 510d7b14c5cSNicholas Piggin * is the interface now. 511d7b14c5cSNicholas Piggin * 512d7b14c5cSNicholas Piggin * System resets do not save any error log or require an 513d7b14c5cSNicholas Piggin * "ibm,nmi-interlock" rtas call to release. 514d7b14c5cSNicholas Piggin */ 515d7b14c5cSNicholas Piggin 516d7b14c5cSNicholas Piggin savep = fwnmi_get_savep(regs); 517d7b14c5cSNicholas Piggin if (savep) 518d7b14c5cSNicholas Piggin regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */ 519d9953105SMichael Ellerman } 520102c05e8SNicholas Piggin 521102c05e8SNicholas Piggin if (smp_handle_nmi_ipi(regs)) 522102c05e8SNicholas Piggin return 1; 523102c05e8SNicholas Piggin 524c902be71SArnd Bergmann return 0; /* need to perform reset */ 525d9953105SMichael Ellerman } 526d9953105SMichael Ellerman 5274ff753feSGanesh Goudar static int mce_handle_err_realmode(int disposition, u8 error_type) 5284ff753feSGanesh Goudar { 5294ff753feSGanesh Goudar #ifdef CONFIG_PPC_BOOK3S_64 5304ff753feSGanesh Goudar if (disposition == RTAS_DISP_NOT_RECOVERED) { 5314ff753feSGanesh Goudar switch (error_type) { 5324ff753feSGanesh Goudar case MC_ERROR_TYPE_ERAT: 53382f70a05SNicholas Piggin flush_erat(); 53482f70a05SNicholas Piggin disposition = RTAS_DISP_FULLY_RECOVERED; 53582f70a05SNicholas Piggin break; 53682f70a05SNicholas Piggin case MC_ERROR_TYPE_SLB: 537387e220aSNicholas Piggin #ifdef CONFIG_PPC_64S_HASH_MMU 5384ff753feSGanesh Goudar /* 5394ff753feSGanesh Goudar * Store the old slb content in paca before flushing. 5404ff753feSGanesh Goudar * Print this when we go to virtual mode. 5414ff753feSGanesh Goudar * There are chances that we may hit MCE again if there 5424ff753feSGanesh Goudar * is a parity error on the SLB entry we trying to read 5434ff753feSGanesh Goudar * for saving. Hence limit the slb saving to single 5444ff753feSGanesh Goudar * level of recursion. 5454ff753feSGanesh Goudar */ 5464ff753feSGanesh Goudar if (local_paca->in_mce == 1) 5474ff753feSGanesh Goudar slb_save_contents(local_paca->mce_faulty_slbs); 5484ff753feSGanesh Goudar flush_and_reload_slb(); 5494ff753feSGanesh Goudar disposition = RTAS_DISP_FULLY_RECOVERED; 550387e220aSNicholas Piggin #endif 5514ff753feSGanesh Goudar break; 5524ff753feSGanesh Goudar default: 5534ff753feSGanesh Goudar break; 5544ff753feSGanesh Goudar } 5554ff753feSGanesh Goudar } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { 5564ff753feSGanesh Goudar /* Platform corrected itself but could be degraded */ 5574ff753feSGanesh Goudar pr_err("MCE: limited recovery, system may be degraded\n"); 5584ff753feSGanesh Goudar disposition = RTAS_DISP_FULLY_RECOVERED; 5594ff753feSGanesh Goudar } 5604ff753feSGanesh Goudar #endif 5614ff753feSGanesh Goudar return disposition; 5624ff753feSGanesh Goudar } 5638f0b8056SMahesh Salgaonkar 5644ff753feSGanesh Goudar static int mce_handle_err_virtmode(struct pt_regs *regs, 5654ff753feSGanesh Goudar struct rtas_error_log *errp, 5664ff753feSGanesh Goudar struct pseries_mc_errorlog *mce_log, 5674ff753feSGanesh Goudar int disposition) 5688f0b8056SMahesh Salgaonkar { 5699ca766f9SNicholas Piggin struct mce_error_info mce_err = { 0 }; 5709ca766f9SNicholas Piggin int initiator = rtas_error_initiator(errp); 5719ca766f9SNicholas Piggin int severity = rtas_error_severity(errp); 5724ff753feSGanesh Goudar unsigned long eaddr = 0, paddr = 0; 5738f0b8056SMahesh Salgaonkar u8 error_type, err_sub_type; 5748f0b8056SMahesh Salgaonkar 5754ff753feSGanesh Goudar if (!mce_log) 5764ff753feSGanesh Goudar goto out; 5774ff753feSGanesh Goudar 5784ff753feSGanesh Goudar error_type = mce_log->error_type; 5794ff753feSGanesh Goudar err_sub_type = rtas_mc_error_sub_type(mce_log); 5804ff753feSGanesh Goudar 5819ca766f9SNicholas Piggin if (initiator == RTAS_INITIATOR_UNKNOWN) 5829ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_UNKNOWN; 5839ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_CPU) 5849ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_CPU; 5859ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_PCI) 5869ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_PCI; 5879ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_ISA) 5889ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_ISA; 5899ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_MEMORY) 5909ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_MEMORY; 5919ca766f9SNicholas Piggin else if (initiator == RTAS_INITIATOR_POWERMGM) 5929ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_POWERMGM; 5939ca766f9SNicholas Piggin else 5949ca766f9SNicholas Piggin mce_err.initiator = MCE_INITIATOR_UNKNOWN; 5958f0b8056SMahesh Salgaonkar 5969ca766f9SNicholas Piggin if (severity == RTAS_SEVERITY_NO_ERROR) 5979ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_NO_ERROR; 5989ca766f9SNicholas Piggin else if (severity == RTAS_SEVERITY_EVENT) 5999ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_WARNING; 6009ca766f9SNicholas Piggin else if (severity == RTAS_SEVERITY_WARNING) 6019ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_WARNING; 6029ca766f9SNicholas Piggin else if (severity == RTAS_SEVERITY_ERROR_SYNC) 6039ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_SEVERE; 6049ca766f9SNicholas Piggin else if (severity == RTAS_SEVERITY_ERROR) 6059ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_SEVERE; 6069ca766f9SNicholas Piggin else 6079ca766f9SNicholas Piggin mce_err.severity = MCE_SEV_FATAL; 6088f0b8056SMahesh Salgaonkar 6099ca766f9SNicholas Piggin if (severity <= RTAS_SEVERITY_ERROR_SYNC) 6109ca766f9SNicholas Piggin mce_err.sync_error = true; 6119ca766f9SNicholas Piggin else 6129ca766f9SNicholas Piggin mce_err.sync_error = false; 6138f0b8056SMahesh Salgaonkar 6149ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; 6159ca766f9SNicholas Piggin mce_err.error_class = MCE_ECLASS_UNKNOWN; 616a43c1590SMahesh Salgaonkar 6174ff753feSGanesh Goudar switch (error_type) { 6189ca766f9SNicholas Piggin case MC_ERROR_TYPE_UE: 6199ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_UE; 620efbc4303SGanesh Goudar mce_common_process_ue(regs, &mce_err); 621efbc4303SGanesh Goudar if (mce_err.ignore_event) 622efbc4303SGanesh Goudar disposition = RTAS_DISP_FULLY_RECOVERED; 6239ca766f9SNicholas Piggin switch (err_sub_type) { 6249ca766f9SNicholas Piggin case MC_ERROR_UE_IFETCH: 6259ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH; 6269ca766f9SNicholas Piggin break; 6279ca766f9SNicholas Piggin case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH: 6289ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; 6299ca766f9SNicholas Piggin break; 6309ca766f9SNicholas Piggin case MC_ERROR_UE_LOAD_STORE: 6319ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; 6329ca766f9SNicholas Piggin break; 6339ca766f9SNicholas Piggin case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE: 6349ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; 6359ca766f9SNicholas Piggin break; 6369ca766f9SNicholas Piggin case MC_ERROR_UE_INDETERMINATE: 6379ca766f9SNicholas Piggin default: 6389ca766f9SNicholas Piggin mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE; 6399ca766f9SNicholas Piggin break; 6409ca766f9SNicholas Piggin } 6419ca766f9SNicholas Piggin if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) 6429ca766f9SNicholas Piggin eaddr = be64_to_cpu(mce_log->effective_address); 6439ca766f9SNicholas Piggin 6449ca766f9SNicholas Piggin if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { 6459ca766f9SNicholas Piggin paddr = be64_to_cpu(mce_log->logical_address); 6469ca766f9SNicholas Piggin } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { 6479ca766f9SNicholas Piggin unsigned long pfn; 6489ca766f9SNicholas Piggin 6499ca766f9SNicholas Piggin pfn = addr_to_pfn(regs, eaddr); 6509ca766f9SNicholas Piggin if (pfn != ULONG_MAX) 6519ca766f9SNicholas Piggin paddr = pfn << PAGE_SHIFT; 6529ca766f9SNicholas Piggin } 6539ca766f9SNicholas Piggin 6549ca766f9SNicholas Piggin break; 6559ca766f9SNicholas Piggin case MC_ERROR_TYPE_SLB: 6569ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_SLB; 6579ca766f9SNicholas Piggin switch (err_sub_type) { 6589ca766f9SNicholas Piggin case MC_ERROR_SLB_PARITY: 6599ca766f9SNicholas Piggin mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY; 6609ca766f9SNicholas Piggin break; 6619ca766f9SNicholas Piggin case MC_ERROR_SLB_MULTIHIT: 6629ca766f9SNicholas Piggin mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; 6639ca766f9SNicholas Piggin break; 6649ca766f9SNicholas Piggin case MC_ERROR_SLB_INDETERMINATE: 6659ca766f9SNicholas Piggin default: 6669ca766f9SNicholas Piggin mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE; 6679ca766f9SNicholas Piggin break; 6689ca766f9SNicholas Piggin } 6690f54bddeSGanesh Goudar if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 6709ca766f9SNicholas Piggin eaddr = be64_to_cpu(mce_log->effective_address); 6719ca766f9SNicholas Piggin break; 6729ca766f9SNicholas Piggin case MC_ERROR_TYPE_ERAT: 6739ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_ERAT; 6749ca766f9SNicholas Piggin switch (err_sub_type) { 6759ca766f9SNicholas Piggin case MC_ERROR_ERAT_PARITY: 6769ca766f9SNicholas Piggin mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY; 6779ca766f9SNicholas Piggin break; 6789ca766f9SNicholas Piggin case MC_ERROR_ERAT_MULTIHIT: 6799ca766f9SNicholas Piggin mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; 6809ca766f9SNicholas Piggin break; 6819ca766f9SNicholas Piggin case MC_ERROR_ERAT_INDETERMINATE: 6829ca766f9SNicholas Piggin default: 6839ca766f9SNicholas Piggin mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE; 6849ca766f9SNicholas Piggin break; 6859ca766f9SNicholas Piggin } 6860f54bddeSGanesh Goudar if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 6879ca766f9SNicholas Piggin eaddr = be64_to_cpu(mce_log->effective_address); 6889ca766f9SNicholas Piggin break; 6899ca766f9SNicholas Piggin case MC_ERROR_TYPE_TLB: 6909ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_TLB; 6919ca766f9SNicholas Piggin switch (err_sub_type) { 6929ca766f9SNicholas Piggin case MC_ERROR_TLB_PARITY: 6939ca766f9SNicholas Piggin mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY; 6949ca766f9SNicholas Piggin break; 6959ca766f9SNicholas Piggin case MC_ERROR_TLB_MULTIHIT: 6969ca766f9SNicholas Piggin mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; 6979ca766f9SNicholas Piggin break; 6989ca766f9SNicholas Piggin case MC_ERROR_TLB_INDETERMINATE: 6999ca766f9SNicholas Piggin default: 7009ca766f9SNicholas Piggin mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE; 7019ca766f9SNicholas Piggin break; 7029ca766f9SNicholas Piggin } 7030f54bddeSGanesh Goudar if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 7049ca766f9SNicholas Piggin eaddr = be64_to_cpu(mce_log->effective_address); 7059ca766f9SNicholas Piggin break; 7069ca766f9SNicholas Piggin case MC_ERROR_TYPE_D_CACHE: 7079ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_DCACHE; 7089ca766f9SNicholas Piggin break; 7099ca766f9SNicholas Piggin case MC_ERROR_TYPE_I_CACHE: 710864ec4d4SGanesh Goudar mce_err.error_type = MCE_ERROR_TYPE_ICACHE; 7119ca766f9SNicholas Piggin break; 7120f54bddeSGanesh Goudar case MC_ERROR_TYPE_CTRL_MEM_ACCESS: 7130f54bddeSGanesh Goudar mce_err.error_type = MCE_ERROR_TYPE_RA; 7140f54bddeSGanesh Goudar switch (err_sub_type) { 7150f54bddeSGanesh Goudar case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK: 7160f54bddeSGanesh Goudar mce_err.u.ra_error_type = 7170f54bddeSGanesh Goudar MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; 7180f54bddeSGanesh Goudar break; 7190f54bddeSGanesh Goudar case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS: 7200f54bddeSGanesh Goudar mce_err.u.ra_error_type = 7210f54bddeSGanesh Goudar MCE_RA_ERROR_LOAD_STORE_FOREIGN; 7220f54bddeSGanesh Goudar break; 7230f54bddeSGanesh Goudar } 7240f54bddeSGanesh Goudar if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED) 7250f54bddeSGanesh Goudar eaddr = be64_to_cpu(mce_log->effective_address); 7260f54bddeSGanesh Goudar break; 7279ca766f9SNicholas Piggin case MC_ERROR_TYPE_UNKNOWN: 7289ca766f9SNicholas Piggin default: 7299ca766f9SNicholas Piggin mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN; 7309ca766f9SNicholas Piggin break; 7319ca766f9SNicholas Piggin } 732a43c1590SMahesh Salgaonkar out: 7334ff753feSGanesh Goudar save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, 7344ff753feSGanesh Goudar &mce_err, regs->nip, eaddr, paddr); 7354ff753feSGanesh Goudar return disposition; 7364ff753feSGanesh Goudar } 7374ff753feSGanesh Goudar 7384ff753feSGanesh Goudar static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) 7394ff753feSGanesh Goudar { 7404ff753feSGanesh Goudar struct pseries_errorlog *pseries_log; 7414ff753feSGanesh Goudar struct pseries_mc_errorlog *mce_log = NULL; 7424ff753feSGanesh Goudar int disposition = rtas_error_disposition(errp); 7434ff753feSGanesh Goudar u8 error_type; 7444ff753feSGanesh Goudar 7454ff753feSGanesh Goudar if (!rtas_error_extended(errp)) 7464ff753feSGanesh Goudar goto out; 7474ff753feSGanesh Goudar 7484ff753feSGanesh Goudar pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); 7494ff753feSGanesh Goudar if (!pseries_log) 7504ff753feSGanesh Goudar goto out; 7514ff753feSGanesh Goudar 7524ff753feSGanesh Goudar mce_log = (struct pseries_mc_errorlog *)pseries_log->data; 7534ff753feSGanesh Goudar error_type = mce_log->error_type; 7544ff753feSGanesh Goudar 7554ff753feSGanesh Goudar disposition = mce_handle_err_realmode(disposition, error_type); 7564ff753feSGanesh Goudar out: 7574ff753feSGanesh Goudar disposition = mce_handle_err_virtmode(regs, errp, mce_log, 7584ff753feSGanesh Goudar disposition); 759a43c1590SMahesh Salgaonkar return disposition; 760a43c1590SMahesh Salgaonkar } 761a43c1590SMahesh Salgaonkar 762d9953105SMichael Ellerman /* 76394675cceSMahesh Salgaonkar * Process MCE rtas errlog event. 76494675cceSMahesh Salgaonkar */ 765*cc15ff32SGanesh Goudar void pSeries_machine_check_log_err(void) 76694675cceSMahesh Salgaonkar { 76794675cceSMahesh Salgaonkar struct rtas_error_log *err; 76894675cceSMahesh Salgaonkar 76994675cceSMahesh Salgaonkar err = fwnmi_get_errlog(); 77094675cceSMahesh Salgaonkar log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); 77194675cceSMahesh Salgaonkar } 77294675cceSMahesh Salgaonkar 77394675cceSMahesh Salgaonkar /* 774d9953105SMichael Ellerman * See if we can recover from a machine check exception. 775d9953105SMichael Ellerman * This is only called on power4 (or above) and only via 776d9953105SMichael Ellerman * the Firmware Non-Maskable Interrupts (fwnmi) handler 777d9953105SMichael Ellerman * which provides the error analysis for us. 778d9953105SMichael Ellerman * 779d9953105SMichael Ellerman * Return 1 if corrected (or delivered a signal). 780d9953105SMichael Ellerman * Return 0 if there is nothing we can do. 781d9953105SMichael Ellerman */ 7829ca766f9SNicholas Piggin static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) 783d9953105SMichael Ellerman { 784d47d1d8aSAnton Blanchard int recovered = 0; 7858f0b8056SMahesh Salgaonkar 786806c0e6eSChristophe Leroy if (regs_is_unrecoverable(regs)) { 787d47d1d8aSAnton Blanchard /* If MSR_RI isn't set, we cannot recover */ 7888f0b8056SMahesh Salgaonkar pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); 789d47d1d8aSAnton Blanchard recovered = 0; 7909ca766f9SNicholas Piggin } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { 791d9953105SMichael Ellerman /* Platform corrected itself */ 792d47d1d8aSAnton Blanchard recovered = 1; 7939ca766f9SNicholas Piggin } else if (evt->severity == MCE_SEV_FATAL) { 7949ca766f9SNicholas Piggin /* Fatal machine check */ 7959ca766f9SNicholas Piggin pr_err("Machine check interrupt is fatal\n"); 7969ca766f9SNicholas Piggin recovered = 0; 797d9953105SMichael Ellerman } 798d9953105SMichael Ellerman 7999ca766f9SNicholas Piggin if (!recovered && evt->sync_error) { 8009ca766f9SNicholas Piggin /* 8019ca766f9SNicholas Piggin * Try to kill processes if we get a synchronous machine check 8029ca766f9SNicholas Piggin * (e.g., one caused by execution of this instruction). This 8039ca766f9SNicholas Piggin * will devolve into a panic if we try to kill init or are in 8049ca766f9SNicholas Piggin * an interrupt etc. 8059ca766f9SNicholas Piggin * 8069ca766f9SNicholas Piggin * TODO: Queue up this address for hwpoisioning later. 8079ca766f9SNicholas Piggin * TODO: This is not quite right for d-side machine 8089ca766f9SNicholas Piggin * checks ->nip is not necessarily the important 8099ca766f9SNicholas Piggin * address. 8109ca766f9SNicholas Piggin */ 8119ca766f9SNicholas Piggin if ((user_mode(regs))) { 8129ca766f9SNicholas Piggin _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); 8139ca766f9SNicholas Piggin recovered = 1; 8149ca766f9SNicholas Piggin } else if (die_will_crash()) { 8159ca766f9SNicholas Piggin /* 8169ca766f9SNicholas Piggin * die() would kill the kernel, so better to go via 8179ca766f9SNicholas Piggin * the platform reboot code that will log the 8189ca766f9SNicholas Piggin * machine check. 8199ca766f9SNicholas Piggin */ 8209ca766f9SNicholas Piggin recovered = 0; 8219ca766f9SNicholas Piggin } else { 822209e9d50SNicholas Piggin die_mce("Machine check", regs, SIGBUS); 8239ca766f9SNicholas Piggin recovered = 1; 8249ca766f9SNicholas Piggin } 8259ca766f9SNicholas Piggin } 826d9953105SMichael Ellerman 827d47d1d8aSAnton Blanchard return recovered; 828d9953105SMichael Ellerman } 829d9953105SMichael Ellerman 830d9953105SMichael Ellerman /* 831d9953105SMichael Ellerman * Handle a machine check. 832d9953105SMichael Ellerman * 833d9953105SMichael Ellerman * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi) 834d9953105SMichael Ellerman * should be present. If so the handler which called us tells us if the 835d9953105SMichael Ellerman * error was recovered (never true if RI=0). 836d9953105SMichael Ellerman * 837d9953105SMichael Ellerman * On hardware prior to Power 4 these exceptions were asynchronous which 838d9953105SMichael Ellerman * means we can't tell exactly where it occurred and so we can't recover. 839d9953105SMichael Ellerman */ 840d9953105SMichael Ellerman int pSeries_machine_check_exception(struct pt_regs *regs) 841d9953105SMichael Ellerman { 8429ca766f9SNicholas Piggin struct machine_check_event evt; 843d9953105SMichael Ellerman 8449ca766f9SNicholas Piggin if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 8459ca766f9SNicholas Piggin return 0; 8469ca766f9SNicholas Piggin 8479ca766f9SNicholas Piggin /* Print things out */ 8489ca766f9SNicholas Piggin if (evt.version != MCE_V1) { 8499ca766f9SNicholas Piggin pr_err("Machine Check Exception, Unknown event version %d !\n", 8509ca766f9SNicholas Piggin evt.version); 8519ca766f9SNicholas Piggin return 0; 852d9953105SMichael Ellerman } 8539ca766f9SNicholas Piggin machine_check_print_event_info(&evt, user_mode(regs), false); 8549ca766f9SNicholas Piggin 8559ca766f9SNicholas Piggin if (recover_mce(regs, &evt)) 8569ca766f9SNicholas Piggin return 1; 857d9953105SMichael Ellerman 858d9953105SMichael Ellerman return 0; 859d9953105SMichael Ellerman } 860a43c1590SMahesh Salgaonkar 861a43c1590SMahesh Salgaonkar long pseries_machine_check_realmode(struct pt_regs *regs) 862a43c1590SMahesh Salgaonkar { 863a43c1590SMahesh Salgaonkar struct rtas_error_log *errp; 864a43c1590SMahesh Salgaonkar int disposition; 865a43c1590SMahesh Salgaonkar 866a43c1590SMahesh Salgaonkar if (fwnmi_active) { 867a43c1590SMahesh Salgaonkar errp = fwnmi_get_errinfo(regs); 868a43c1590SMahesh Salgaonkar /* 869a43c1590SMahesh Salgaonkar * Call to fwnmi_release_errinfo() in real mode causes kernel 870a43c1590SMahesh Salgaonkar * to panic. Hence we will call it as soon as we go into 871a43c1590SMahesh Salgaonkar * virtual mode. 872a43c1590SMahesh Salgaonkar */ 8739ca766f9SNicholas Piggin disposition = mce_handle_error(regs, errp); 8749ca766f9SNicholas Piggin 87574c3354bSNicholas Piggin fwnmi_release_errinfo(); 8769ca766f9SNicholas Piggin 877a43c1590SMahesh Salgaonkar if (disposition == RTAS_DISP_FULLY_RECOVERED) 878a43c1590SMahesh Salgaonkar return 1; 879a43c1590SMahesh Salgaonkar } 880a43c1590SMahesh Salgaonkar 881a43c1590SMahesh Salgaonkar return 0; 882a43c1590SMahesh Salgaonkar } 883