xref: /openbmc/linux/arch/powerpc/platforms/pseries/ras.c (revision 1a59d1b8e05ea6ab45f7e18897de1ef0e6bc3da6)
1*1a59d1b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2d9953105SMichael Ellerman /*
3d9953105SMichael Ellerman  * Copyright (C) 2001 Dave Engebretsen IBM Corporation
4d9953105SMichael Ellerman  */
5d9953105SMichael Ellerman 
6d9953105SMichael Ellerman #include <linux/sched.h>
7d9953105SMichael Ellerman #include <linux/interrupt.h>
8d9953105SMichael Ellerman #include <linux/irq.h>
990128997SAnton Blanchard #include <linux/of.h>
1055fc0c56SAnton Blanchard #include <linux/fs.h>
1155fc0c56SAnton Blanchard #include <linux/reboot.h>
1294675cceSMahesh Salgaonkar #include <linux/irq_work.h>
13d9953105SMichael Ellerman 
14d9953105SMichael Ellerman #include <asm/machdep.h>
15d9953105SMichael Ellerman #include <asm/rtas.h>
168c4f1f29SMichael Ellerman #include <asm/firmware.h>
17a43c1590SMahesh Salgaonkar #include <asm/mce.h>
18d9953105SMichael Ellerman 
19577830b0SMichael Ellerman #include "pseries.h"
20c902be71SArnd Bergmann 
21d9953105SMichael Ellerman static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
22d9953105SMichael Ellerman static DEFINE_SPINLOCK(ras_log_buf_lock);
23d9953105SMichael Ellerman 
24d9953105SMichael Ellerman static int ras_check_exception_token;
25d9953105SMichael Ellerman 
2694675cceSMahesh Salgaonkar static void mce_process_errlog_event(struct irq_work *work);
2794675cceSMahesh Salgaonkar static struct irq_work mce_errlog_process_work = {
2894675cceSMahesh Salgaonkar 	.func = mce_process_errlog_event,
2994675cceSMahesh Salgaonkar };
3094675cceSMahesh Salgaonkar 
31d9953105SMichael Ellerman #define EPOW_SENSOR_TOKEN	9
32d9953105SMichael Ellerman #define EPOW_SENSOR_INDEX	0
33d9953105SMichael Ellerman 
34b4af279aSVipin K Parashar /* EPOW events counter variable */
35b4af279aSVipin K Parashar static int num_epow_events;
36b4af279aSVipin K Parashar 
37b7d9eb39SJohn Allen static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
387d12e780SDavid Howells static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
397d12e780SDavid Howells static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
40d9953105SMichael Ellerman 
4104fce21cSMahesh Salgaonkar /* RTAS pseries MCE errorlog section. */
4204fce21cSMahesh Salgaonkar struct pseries_mc_errorlog {
4304fce21cSMahesh Salgaonkar 	__be32	fru_id;
4404fce21cSMahesh Salgaonkar 	__be32	proc_id;
4504fce21cSMahesh Salgaonkar 	u8	error_type;
4604fce21cSMahesh Salgaonkar 	/*
4704fce21cSMahesh Salgaonkar 	 * sub_err_type (1 byte). Bit fields depends on error_type
4804fce21cSMahesh Salgaonkar 	 *
4904fce21cSMahesh Salgaonkar 	 *   MSB0
5004fce21cSMahesh Salgaonkar 	 *   |
5104fce21cSMahesh Salgaonkar 	 *   V
5204fce21cSMahesh Salgaonkar 	 *   01234567
5304fce21cSMahesh Salgaonkar 	 *   XXXXXXXX
5404fce21cSMahesh Salgaonkar 	 *
5504fce21cSMahesh Salgaonkar 	 * For error_type == MC_ERROR_TYPE_UE
5604fce21cSMahesh Salgaonkar 	 *   XXXXXXXX
5704fce21cSMahesh Salgaonkar 	 *   X		1: Permanent or Transient UE.
5804fce21cSMahesh Salgaonkar 	 *    X		1: Effective address provided.
5904fce21cSMahesh Salgaonkar 	 *     X	1: Logical address provided.
6004fce21cSMahesh Salgaonkar 	 *      XX	2: Reserved.
6104fce21cSMahesh Salgaonkar 	 *        XXX	3: Type of UE error.
6204fce21cSMahesh Salgaonkar 	 *
6304fce21cSMahesh Salgaonkar 	 * For error_type != MC_ERROR_TYPE_UE
6404fce21cSMahesh Salgaonkar 	 *   XXXXXXXX
6504fce21cSMahesh Salgaonkar 	 *   X		1: Effective address provided.
6604fce21cSMahesh Salgaonkar 	 *    XXXXX	5: Reserved.
6704fce21cSMahesh Salgaonkar 	 *         XX	2: Type of SLB/ERAT/TLB error.
6804fce21cSMahesh Salgaonkar 	 */
6904fce21cSMahesh Salgaonkar 	u8	sub_err_type;
7004fce21cSMahesh Salgaonkar 	u8	reserved_1[6];
7104fce21cSMahesh Salgaonkar 	__be64	effective_address;
7204fce21cSMahesh Salgaonkar 	__be64	logical_address;
7304fce21cSMahesh Salgaonkar } __packed;
7404fce21cSMahesh Salgaonkar 
7504fce21cSMahesh Salgaonkar /* RTAS pseries MCE error types */
7604fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_UE		0x00
7704fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_SLB		0x01
7804fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_ERAT		0x02
7904fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_TLB		0x04
8004fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_D_CACHE		0x05
8104fce21cSMahesh Salgaonkar #define MC_ERROR_TYPE_I_CACHE		0x07
8204fce21cSMahesh Salgaonkar 
8304fce21cSMahesh Salgaonkar /* RTAS pseries MCE error sub types */
8404fce21cSMahesh Salgaonkar #define MC_ERROR_UE_INDETERMINATE		0
8504fce21cSMahesh Salgaonkar #define MC_ERROR_UE_IFETCH			1
8604fce21cSMahesh Salgaonkar #define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH	2
8704fce21cSMahesh Salgaonkar #define MC_ERROR_UE_LOAD_STORE			3
8804fce21cSMahesh Salgaonkar #define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE	4
8904fce21cSMahesh Salgaonkar 
9004fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_PARITY		0
9104fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_MULTIHIT		1
9204fce21cSMahesh Salgaonkar #define MC_ERROR_SLB_INDETERMINATE	2
9304fce21cSMahesh Salgaonkar 
9404fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_PARITY		1
9504fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_MULTIHIT		2
9604fce21cSMahesh Salgaonkar #define MC_ERROR_ERAT_INDETERMINATE	3
9704fce21cSMahesh Salgaonkar 
9804fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_PARITY		1
9904fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_MULTIHIT		2
10004fce21cSMahesh Salgaonkar #define MC_ERROR_TLB_INDETERMINATE	3
10104fce21cSMahesh Salgaonkar 
10204fce21cSMahesh Salgaonkar static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
10304fce21cSMahesh Salgaonkar {
10404fce21cSMahesh Salgaonkar 	switch (mlog->error_type) {
10504fce21cSMahesh Salgaonkar 	case	MC_ERROR_TYPE_UE:
10604fce21cSMahesh Salgaonkar 		return (mlog->sub_err_type & 0x07);
10704fce21cSMahesh Salgaonkar 	case	MC_ERROR_TYPE_SLB:
10804fce21cSMahesh Salgaonkar 	case	MC_ERROR_TYPE_ERAT:
10904fce21cSMahesh Salgaonkar 	case	MC_ERROR_TYPE_TLB:
11004fce21cSMahesh Salgaonkar 		return (mlog->sub_err_type & 0x03);
11104fce21cSMahesh Salgaonkar 	default:
11204fce21cSMahesh Salgaonkar 		return 0;
11304fce21cSMahesh Salgaonkar 	}
11404fce21cSMahesh Salgaonkar }
11504fce21cSMahesh Salgaonkar 
11604fce21cSMahesh Salgaonkar static
11704fce21cSMahesh Salgaonkar inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
11804fce21cSMahesh Salgaonkar {
11904fce21cSMahesh Salgaonkar 	__be64 addr = 0;
12004fce21cSMahesh Salgaonkar 
12104fce21cSMahesh Salgaonkar 	switch (mlog->error_type) {
12204fce21cSMahesh Salgaonkar 	case	MC_ERROR_TYPE_UE:
12304fce21cSMahesh Salgaonkar 		if (mlog->sub_err_type & 0x40)
12404fce21cSMahesh Salgaonkar 			addr = mlog->effective_address;
12504fce21cSMahesh Salgaonkar 		break;
12604fce21cSMahesh Salgaonkar 	case	MC_ERROR_TYPE_SLB:
12704fce21cSMahesh Salgaonkar 	case	MC_ERROR_TYPE_ERAT:
12804fce21cSMahesh Salgaonkar 	case	MC_ERROR_TYPE_TLB:
12904fce21cSMahesh Salgaonkar 		if (mlog->sub_err_type & 0x80)
13004fce21cSMahesh Salgaonkar 			addr = mlog->effective_address;
13104fce21cSMahesh Salgaonkar 	default:
13204fce21cSMahesh Salgaonkar 		break;
13304fce21cSMahesh Salgaonkar 	}
13404fce21cSMahesh Salgaonkar 	return be64_to_cpu(addr);
13504fce21cSMahesh Salgaonkar }
1360ebfff14SBenjamin Herrenschmidt 
137d9953105SMichael Ellerman /*
138c9dccf1dSSam Bobroff  * Enable the hotplug interrupt late because processing them may touch other
139c9dccf1dSSam Bobroff  * devices or systems (e.g. hugepages) that have not been initialized at the
140c9dccf1dSSam Bobroff  * subsys stage.
141c9dccf1dSSam Bobroff  */
142c9dccf1dSSam Bobroff int __init init_ras_hotplug_IRQ(void)
143c9dccf1dSSam Bobroff {
144c9dccf1dSSam Bobroff 	struct device_node *np;
145c9dccf1dSSam Bobroff 
146c9dccf1dSSam Bobroff 	/* Hotplug Events */
147c9dccf1dSSam Bobroff 	np = of_find_node_by_path("/event-sources/hot-plug-events");
148c9dccf1dSSam Bobroff 	if (np != NULL) {
149c9dccf1dSSam Bobroff 		if (dlpar_workqueue_init() == 0)
150c9dccf1dSSam Bobroff 			request_event_sources_irqs(np, ras_hotplug_interrupt,
151c9dccf1dSSam Bobroff 						   "RAS_HOTPLUG");
152c9dccf1dSSam Bobroff 		of_node_put(np);
153c9dccf1dSSam Bobroff 	}
154c9dccf1dSSam Bobroff 
155c9dccf1dSSam Bobroff 	return 0;
156c9dccf1dSSam Bobroff }
157c9dccf1dSSam Bobroff machine_late_initcall(pseries, init_ras_hotplug_IRQ);
158c9dccf1dSSam Bobroff 
159c9dccf1dSSam Bobroff /*
160d9953105SMichael Ellerman  * Initialize handlers for the set of interrupts caused by hardware errors
161d9953105SMichael Ellerman  * and power system events.
162d9953105SMichael Ellerman  */
163d9953105SMichael Ellerman static int __init init_ras_IRQ(void)
164d9953105SMichael Ellerman {
165d9953105SMichael Ellerman 	struct device_node *np;
166d9953105SMichael Ellerman 
167d9953105SMichael Ellerman 	ras_check_exception_token = rtas_token("check-exception");
168d9953105SMichael Ellerman 
169d9953105SMichael Ellerman 	/* Internal Errors */
170d9953105SMichael Ellerman 	np = of_find_node_by_path("/event-sources/internal-errors");
171d9953105SMichael Ellerman 	if (np != NULL) {
17232c96f77SMark Nelson 		request_event_sources_irqs(np, ras_error_interrupt,
17332c96f77SMark Nelson 					   "RAS_ERROR");
174d9953105SMichael Ellerman 		of_node_put(np);
175d9953105SMichael Ellerman 	}
176d9953105SMichael Ellerman 
177d9953105SMichael Ellerman 	/* EPOW Events */
178d9953105SMichael Ellerman 	np = of_find_node_by_path("/event-sources/epow-events");
179d9953105SMichael Ellerman 	if (np != NULL) {
18032c96f77SMark Nelson 		request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
181d9953105SMichael Ellerman 		of_node_put(np);
182d9953105SMichael Ellerman 	}
183d9953105SMichael Ellerman 
18469ed3324SAnton Blanchard 	return 0;
185d9953105SMichael Ellerman }
1868e83e905SMichael Ellerman machine_subsys_initcall(pseries, init_ras_IRQ);
187d9953105SMichael Ellerman 
18855fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_NORMAL				1
18955fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_ON_UPS				2
19055fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS	3
19155fc0c56SAnton Blanchard #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH	4
19255fc0c56SAnton Blanchard 
19355fc0c56SAnton Blanchard static void handle_system_shutdown(char event_modifier)
19455fc0c56SAnton Blanchard {
19555fc0c56SAnton Blanchard 	switch (event_modifier) {
19655fc0c56SAnton Blanchard 	case EPOW_SHUTDOWN_NORMAL:
197b4af279aSVipin K Parashar 		pr_emerg("Power off requested\n");
1981b7e0cbeSliguang 		orderly_poweroff(true);
19955fc0c56SAnton Blanchard 		break;
20055fc0c56SAnton Blanchard 
20155fc0c56SAnton Blanchard 	case EPOW_SHUTDOWN_ON_UPS:
202b4af279aSVipin K Parashar 		pr_emerg("Loss of system power detected. System is running on"
203b4af279aSVipin K Parashar 			 " UPS/battery. Check RTAS error log for details\n");
20479872e35SAnshuman Khandual 		orderly_poweroff(true);
20555fc0c56SAnton Blanchard 		break;
20655fc0c56SAnton Blanchard 
20755fc0c56SAnton Blanchard 	case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
208b4af279aSVipin K Parashar 		pr_emerg("Loss of system critical functions detected. Check"
209b4af279aSVipin K Parashar 			 " RTAS error log for details\n");
2101b7e0cbeSliguang 		orderly_poweroff(true);
21155fc0c56SAnton Blanchard 		break;
21255fc0c56SAnton Blanchard 
21355fc0c56SAnton Blanchard 	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
214b4af279aSVipin K Parashar 		pr_emerg("High ambient temperature detected. Check RTAS"
215b4af279aSVipin K Parashar 			 " error log for details\n");
2161b7e0cbeSliguang 		orderly_poweroff(true);
21755fc0c56SAnton Blanchard 		break;
21855fc0c56SAnton Blanchard 
21955fc0c56SAnton Blanchard 	default:
220b4af279aSVipin K Parashar 		pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
22155fc0c56SAnton Blanchard 			event_modifier);
22255fc0c56SAnton Blanchard 	}
22355fc0c56SAnton Blanchard }
22455fc0c56SAnton Blanchard 
22555fc0c56SAnton Blanchard struct epow_errorlog {
22655fc0c56SAnton Blanchard 	unsigned char sensor_value;
22755fc0c56SAnton Blanchard 	unsigned char event_modifier;
22855fc0c56SAnton Blanchard 	unsigned char extended_modifier;
22955fc0c56SAnton Blanchard 	unsigned char reserved;
23055fc0c56SAnton Blanchard 	unsigned char platform_reason;
23155fc0c56SAnton Blanchard };
23255fc0c56SAnton Blanchard 
23355fc0c56SAnton Blanchard #define EPOW_RESET			0
23455fc0c56SAnton Blanchard #define EPOW_WARN_COOLING		1
23555fc0c56SAnton Blanchard #define EPOW_WARN_POWER			2
23655fc0c56SAnton Blanchard #define EPOW_SYSTEM_SHUTDOWN		3
23755fc0c56SAnton Blanchard #define EPOW_SYSTEM_HALT		4
23855fc0c56SAnton Blanchard #define EPOW_MAIN_ENCLOSURE		5
23955fc0c56SAnton Blanchard #define EPOW_POWER_OFF			7
24055fc0c56SAnton Blanchard 
241e51df2c1SAnton Blanchard static void rtas_parse_epow_errlog(struct rtas_error_log *log)
24255fc0c56SAnton Blanchard {
24355fc0c56SAnton Blanchard 	struct pseries_errorlog *pseries_log;
24455fc0c56SAnton Blanchard 	struct epow_errorlog *epow_log;
24555fc0c56SAnton Blanchard 	char action_code;
24655fc0c56SAnton Blanchard 	char modifier;
24755fc0c56SAnton Blanchard 
24855fc0c56SAnton Blanchard 	pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
24955fc0c56SAnton Blanchard 	if (pseries_log == NULL)
25055fc0c56SAnton Blanchard 		return;
25155fc0c56SAnton Blanchard 
25255fc0c56SAnton Blanchard 	epow_log = (struct epow_errorlog *)pseries_log->data;
25355fc0c56SAnton Blanchard 	action_code = epow_log->sensor_value & 0xF;	/* bottom 4 bits */
25455fc0c56SAnton Blanchard 	modifier = epow_log->event_modifier & 0xF;	/* bottom 4 bits */
25555fc0c56SAnton Blanchard 
25655fc0c56SAnton Blanchard 	switch (action_code) {
25755fc0c56SAnton Blanchard 	case EPOW_RESET:
258b4af279aSVipin K Parashar 		if (num_epow_events) {
259b4af279aSVipin K Parashar 			pr_info("Non critical power/cooling issue cleared\n");
260b4af279aSVipin K Parashar 			num_epow_events--;
261b4af279aSVipin K Parashar 		}
26255fc0c56SAnton Blanchard 		break;
26355fc0c56SAnton Blanchard 
26455fc0c56SAnton Blanchard 	case EPOW_WARN_COOLING:
265b4af279aSVipin K Parashar 		pr_info("Non-critical cooling issue detected. Check RTAS error"
266b4af279aSVipin K Parashar 			" log for details\n");
26755fc0c56SAnton Blanchard 		break;
26855fc0c56SAnton Blanchard 
26955fc0c56SAnton Blanchard 	case EPOW_WARN_POWER:
270b4af279aSVipin K Parashar 		pr_info("Non-critical power issue detected. Check RTAS error"
271b4af279aSVipin K Parashar 			" log for details\n");
27255fc0c56SAnton Blanchard 		break;
27355fc0c56SAnton Blanchard 
27455fc0c56SAnton Blanchard 	case EPOW_SYSTEM_SHUTDOWN:
27555fc0c56SAnton Blanchard 		handle_system_shutdown(epow_log->event_modifier);
27655fc0c56SAnton Blanchard 		break;
27755fc0c56SAnton Blanchard 
27855fc0c56SAnton Blanchard 	case EPOW_SYSTEM_HALT:
279b4af279aSVipin K Parashar 		pr_emerg("Critical power/cooling issue detected. Check RTAS"
280b4af279aSVipin K Parashar 			 " error log for details. Powering off.\n");
2811b7e0cbeSliguang 		orderly_poweroff(true);
28255fc0c56SAnton Blanchard 		break;
28355fc0c56SAnton Blanchard 
28455fc0c56SAnton Blanchard 	case EPOW_MAIN_ENCLOSURE:
28555fc0c56SAnton Blanchard 	case EPOW_POWER_OFF:
286b4af279aSVipin K Parashar 		pr_emerg("System about to lose power. Check RTAS error log "
287b4af279aSVipin K Parashar 			 " for details. Powering off immediately.\n");
28855fc0c56SAnton Blanchard 		emergency_sync();
28955fc0c56SAnton Blanchard 		kernel_power_off();
29055fc0c56SAnton Blanchard 		break;
29155fc0c56SAnton Blanchard 
29255fc0c56SAnton Blanchard 	default:
293b4af279aSVipin K Parashar 		pr_err("Unknown power/cooling event (action code  = %d)\n",
29455fc0c56SAnton Blanchard 			action_code);
29555fc0c56SAnton Blanchard 	}
296b4af279aSVipin K Parashar 
297b4af279aSVipin K Parashar 	/* Increment epow events counter variable */
298b4af279aSVipin K Parashar 	if (action_code != EPOW_RESET)
299b4af279aSVipin K Parashar 		num_epow_events++;
30055fc0c56SAnton Blanchard }
30155fc0c56SAnton Blanchard 
302b7d9eb39SJohn Allen static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
303b7d9eb39SJohn Allen {
304b7d9eb39SJohn Allen 	struct pseries_errorlog *pseries_log;
305b7d9eb39SJohn Allen 	struct pseries_hp_errorlog *hp_elog;
306b7d9eb39SJohn Allen 
307b7d9eb39SJohn Allen 	spin_lock(&ras_log_buf_lock);
308b7d9eb39SJohn Allen 
309b7d9eb39SJohn Allen 	rtas_call(ras_check_exception_token, 6, 1, NULL,
310b7d9eb39SJohn Allen 		  RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
311b7d9eb39SJohn Allen 		  RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
312b7d9eb39SJohn Allen 		  rtas_get_error_log_max());
313b7d9eb39SJohn Allen 
314b7d9eb39SJohn Allen 	pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
315b7d9eb39SJohn Allen 					   PSERIES_ELOG_SECT_ID_HOTPLUG);
316b7d9eb39SJohn Allen 	hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
317b7d9eb39SJohn Allen 
318b7d9eb39SJohn Allen 	/*
319b7d9eb39SJohn Allen 	 * Since PCI hotplug is not currently supported on pseries, put PCI
320b7d9eb39SJohn Allen 	 * hotplug events on the ras_log_buf to be handled by rtas_errd.
321b7d9eb39SJohn Allen 	 */
322b7d9eb39SJohn Allen 	if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
3234c5d87dbSOliver O'Halloran 	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
3244c5d87dbSOliver O'Halloran 	    hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
325fd12527aSNathan Fontenot 		queue_hotplug_event(hp_elog);
326b7d9eb39SJohn Allen 	else
327b7d9eb39SJohn Allen 		log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
328b7d9eb39SJohn Allen 
329b7d9eb39SJohn Allen 	spin_unlock(&ras_log_buf_lock);
330b7d9eb39SJohn Allen 	return IRQ_HANDLED;
331b7d9eb39SJohn Allen }
332b7d9eb39SJohn Allen 
33355fc0c56SAnton Blanchard /* Handle environmental and power warning (EPOW) interrupts. */
3347d12e780SDavid Howells static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
335d9953105SMichael Ellerman {
33655fc0c56SAnton Blanchard 	int status;
33755fc0c56SAnton Blanchard 	int state;
338d9953105SMichael Ellerman 	int critical;
339d9953105SMichael Ellerman 
3401c2cb594SThomas Huth 	status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX,
3411c2cb594SThomas Huth 				      &state);
342d9953105SMichael Ellerman 
343d9953105SMichael Ellerman 	if (state > 3)
344d9953105SMichael Ellerman 		critical = 1;		/* Time Critical */
345d9953105SMichael Ellerman 	else
346d9953105SMichael Ellerman 		critical = 0;
347d9953105SMichael Ellerman 
348d9953105SMichael Ellerman 	spin_lock(&ras_log_buf_lock);
349d9953105SMichael Ellerman 
350d9953105SMichael Ellerman 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
351b08e281bSMark Nelson 			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
352476eb491SGrant Likely 			   virq_to_hw(irq),
3536f43747fSAnton Blanchard 			   RTAS_EPOW_WARNING,
354d9953105SMichael Ellerman 			   critical, __pa(&ras_log_buf),
355d9953105SMichael Ellerman 				rtas_get_error_log_max());
356d9953105SMichael Ellerman 
357d9953105SMichael Ellerman 	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
358d9953105SMichael Ellerman 
35955fc0c56SAnton Blanchard 	rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
36055fc0c56SAnton Blanchard 
361d9953105SMichael Ellerman 	spin_unlock(&ras_log_buf_lock);
362d9953105SMichael Ellerman 	return IRQ_HANDLED;
363d9953105SMichael Ellerman }
364d9953105SMichael Ellerman 
365d9953105SMichael Ellerman /*
366d9953105SMichael Ellerman  * Handle hardware error interrupts.
367d9953105SMichael Ellerman  *
368d9953105SMichael Ellerman  * RTAS check-exception is called to collect data on the exception.  If
369d9953105SMichael Ellerman  * the error is deemed recoverable, we log a warning and return.
370d9953105SMichael Ellerman  * For nonrecoverable errors, an error is logged and we stop all processing
371d9953105SMichael Ellerman  * as quickly as possible in order to prevent propagation of the failure.
372d9953105SMichael Ellerman  */
3737d12e780SDavid Howells static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
374d9953105SMichael Ellerman {
375d9953105SMichael Ellerman 	struct rtas_error_log *rtas_elog;
376cc8b5263SAnton Blanchard 	int status;
377d9953105SMichael Ellerman 	int fatal;
378d9953105SMichael Ellerman 
379d9953105SMichael Ellerman 	spin_lock(&ras_log_buf_lock);
380d9953105SMichael Ellerman 
381d9953105SMichael Ellerman 	status = rtas_call(ras_check_exception_token, 6, 1, NULL,
382b08e281bSMark Nelson 			   RTAS_VECTOR_EXTERNAL_INTERRUPT,
383476eb491SGrant Likely 			   virq_to_hw(irq),
384d9953105SMichael Ellerman 			   RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
385d9953105SMichael Ellerman 			   __pa(&ras_log_buf),
386d9953105SMichael Ellerman 				rtas_get_error_log_max());
387d9953105SMichael Ellerman 
388d9953105SMichael Ellerman 	rtas_elog = (struct rtas_error_log *)ras_log_buf;
389d9953105SMichael Ellerman 
390a08a53eaSGreg Kurz 	if (status == 0 &&
391a08a53eaSGreg Kurz 	    rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
392d9953105SMichael Ellerman 		fatal = 1;
393d9953105SMichael Ellerman 	else
394d9953105SMichael Ellerman 		fatal = 0;
395d9953105SMichael Ellerman 
396d9953105SMichael Ellerman 	/* format and print the extended information */
397d9953105SMichael Ellerman 	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
398d9953105SMichael Ellerman 
399d9953105SMichael Ellerman 	if (fatal) {
400b4af279aSVipin K Parashar 		pr_emerg("Fatal hardware error detected. Check RTAS error"
401b4af279aSVipin K Parashar 			 " log for details. Powering off immediately\n");
402cc8b5263SAnton Blanchard 		emergency_sync();
403cc8b5263SAnton Blanchard 		kernel_power_off();
404d9953105SMichael Ellerman 	} else {
405b4af279aSVipin K Parashar 		pr_err("Recoverable hardware error detected\n");
406d9953105SMichael Ellerman 	}
407d9953105SMichael Ellerman 
408d9953105SMichael Ellerman 	spin_unlock(&ras_log_buf_lock);
409d9953105SMichael Ellerman 	return IRQ_HANDLED;
410d9953105SMichael Ellerman }
411d9953105SMichael Ellerman 
412d368514cSAnton Blanchard /*
413d368514cSAnton Blanchard  * Some versions of FWNMI place the buffer inside the 4kB page starting at
414d368514cSAnton Blanchard  * 0x7000. Other versions place it inside the rtas buffer. We check both.
415d368514cSAnton Blanchard  */
416d368514cSAnton Blanchard #define VALID_FWNMI_BUFFER(A) \
417d368514cSAnton Blanchard 	((((A) >= 0x7000) && ((A) < 0x7ff0)) || \
418d368514cSAnton Blanchard 	(((A) >= rtas.base) && ((A) < (rtas.base + rtas.size - 16))))
419d368514cSAnton Blanchard 
42094675cceSMahesh Salgaonkar static inline struct rtas_error_log *fwnmi_get_errlog(void)
42194675cceSMahesh Salgaonkar {
42294675cceSMahesh Salgaonkar 	return (struct rtas_error_log *)local_paca->mce_data_buf;
42394675cceSMahesh Salgaonkar }
42494675cceSMahesh Salgaonkar 
425d368514cSAnton Blanchard /*
426d368514cSAnton Blanchard  * Get the error information for errors coming through the
427d9953105SMichael Ellerman  * FWNMI vectors.  The pt_regs' r3 will be updated to reflect
428d9953105SMichael Ellerman  * the actual r3 if possible, and a ptr to the error log entry
429d9953105SMichael Ellerman  * will be returned if found.
430d9953105SMichael Ellerman  *
43194675cceSMahesh Salgaonkar  * Use one buffer mce_data_buf per cpu to store RTAS error.
432d368514cSAnton Blanchard  *
43394675cceSMahesh Salgaonkar  * The mce_data_buf does not have any locks or protection around it,
434d9953105SMichael Ellerman  * if a second machine check comes in, or a system reset is done
435d9953105SMichael Ellerman  * before we have logged the error, then we will get corruption in the
436d9953105SMichael Ellerman  * error log.  This is preferable over holding off on calling
437d9953105SMichael Ellerman  * ibm,nmi-interlock which would result in us checkstopping if a
438d9953105SMichael Ellerman  * second machine check did come in.
439d9953105SMichael Ellerman  */
440d9953105SMichael Ellerman static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
441d9953105SMichael Ellerman {
442d9953105SMichael Ellerman 	unsigned long *savep;
44394675cceSMahesh Salgaonkar 	struct rtas_error_log *h;
444d9953105SMichael Ellerman 
445ee1dd1e3SMahesh Salgaonkar 	/* Mask top two bits */
446ee1dd1e3SMahesh Salgaonkar 	regs->gpr[3] &= ~(0x3UL << 62);
447ee1dd1e3SMahesh Salgaonkar 
448d368514cSAnton Blanchard 	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
449f0e939aeSAnton Blanchard 		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
450d368514cSAnton Blanchard 		return NULL;
451d9953105SMichael Ellerman 	}
452d368514cSAnton Blanchard 
453d368514cSAnton Blanchard 	savep = __va(regs->gpr[3]);
454cd813e1cSMahesh Salgaonkar 	regs->gpr[3] = be64_to_cpu(savep[0]);	/* restore original r3 */
455d368514cSAnton Blanchard 
456d368514cSAnton Blanchard 	h = (struct rtas_error_log *)&savep[1];
45794675cceSMahesh Salgaonkar 	/* Use the per cpu buffer from paca to store rtas error log */
45894675cceSMahesh Salgaonkar 	memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
459a08a53eaSGreg Kurz 	if (!rtas_error_extended(h)) {
46094675cceSMahesh Salgaonkar 		memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
461d368514cSAnton Blanchard 	} else {
462a08a53eaSGreg Kurz 		int len, error_log_length;
463d368514cSAnton Blanchard 
464a08a53eaSGreg Kurz 		error_log_length = 8 + rtas_error_extended_log_length(h);
46574e96bf4SMahesh Salgaonkar 		len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
46694675cceSMahesh Salgaonkar 		memcpy(local_paca->mce_data_buf, h, len);
467d368514cSAnton Blanchard 	}
468d368514cSAnton Blanchard 
46994675cceSMahesh Salgaonkar 	return (struct rtas_error_log *)local_paca->mce_data_buf;
470d9953105SMichael Ellerman }
471d9953105SMichael Ellerman 
472d9953105SMichael Ellerman /* Call this when done with the data returned by FWNMI_get_errinfo.
473d9953105SMichael Ellerman  * It will release the saved data area for other CPUs in the
474d9953105SMichael Ellerman  * partition to receive FWNMI errors.
475d9953105SMichael Ellerman  */
476d9953105SMichael Ellerman static void fwnmi_release_errinfo(void)
477d9953105SMichael Ellerman {
478d9953105SMichael Ellerman 	int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
479d9953105SMichael Ellerman 	if (ret != 0)
480d368514cSAnton Blanchard 		printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
481d9953105SMichael Ellerman }
482d9953105SMichael Ellerman 
483c902be71SArnd Bergmann int pSeries_system_reset_exception(struct pt_regs *regs)
484d9953105SMichael Ellerman {
485bded0706SNicholas Piggin #ifdef __LITTLE_ENDIAN__
486bded0706SNicholas Piggin 	/*
487bded0706SNicholas Piggin 	 * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
488bded0706SNicholas Piggin 	 * to detect the bad SRR1 pattern here. Flip the NIP back to correct
489bded0706SNicholas Piggin 	 * endian for reporting purposes. Unfortunately the MSR can't be fixed,
490bded0706SNicholas Piggin 	 * so clear it. It will be missing MSR_RI so we won't try to recover.
491bded0706SNicholas Piggin 	 */
492bded0706SNicholas Piggin 	if ((be64_to_cpu(regs->msr) &
493bded0706SNicholas Piggin 			(MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
494bded0706SNicholas Piggin 			 MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
495bded0706SNicholas Piggin 		regs->nip = be64_to_cpu((__be64)regs->nip);
496bded0706SNicholas Piggin 		regs->msr = 0;
497bded0706SNicholas Piggin 	}
498bded0706SNicholas Piggin #endif
499bded0706SNicholas Piggin 
500d9953105SMichael Ellerman 	if (fwnmi_active) {
501d9953105SMichael Ellerman 		struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
502d9953105SMichael Ellerman 		if (errhdr) {
503d9953105SMichael Ellerman 			/* XXX Should look at FWNMI information */
504d9953105SMichael Ellerman 		}
505d9953105SMichael Ellerman 		fwnmi_release_errinfo();
506d9953105SMichael Ellerman 	}
507102c05e8SNicholas Piggin 
508102c05e8SNicholas Piggin 	if (smp_handle_nmi_ipi(regs))
509102c05e8SNicholas Piggin 		return 1;
510102c05e8SNicholas Piggin 
511c902be71SArnd Bergmann 	return 0; /* need to perform reset */
512d9953105SMichael Ellerman }
513d9953105SMichael Ellerman 
5148f0b8056SMahesh Salgaonkar #define VAL_TO_STRING(ar, val)	\
5158f0b8056SMahesh Salgaonkar 	(((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
5168f0b8056SMahesh Salgaonkar 
5178f0b8056SMahesh Salgaonkar static void pseries_print_mce_info(struct pt_regs *regs,
5188f0b8056SMahesh Salgaonkar 				   struct rtas_error_log *errp)
5198f0b8056SMahesh Salgaonkar {
5208f0b8056SMahesh Salgaonkar 	const char *level, *sevstr;
5218f0b8056SMahesh Salgaonkar 	struct pseries_errorlog *pseries_log;
5228f0b8056SMahesh Salgaonkar 	struct pseries_mc_errorlog *mce_log;
5238f0b8056SMahesh Salgaonkar 	u8 error_type, err_sub_type;
5248f0b8056SMahesh Salgaonkar 	u64 addr;
5258f0b8056SMahesh Salgaonkar 	u8 initiator = rtas_error_initiator(errp);
5268f0b8056SMahesh Salgaonkar 	int disposition = rtas_error_disposition(errp);
5278f0b8056SMahesh Salgaonkar 
5288f0b8056SMahesh Salgaonkar 	static const char * const initiators[] = {
529c9d8dda4SMahesh Salgaonkar 		[0] = "Unknown",
530c9d8dda4SMahesh Salgaonkar 		[1] = "CPU",
531c9d8dda4SMahesh Salgaonkar 		[2] = "PCI",
532c9d8dda4SMahesh Salgaonkar 		[3] = "ISA",
533c9d8dda4SMahesh Salgaonkar 		[4] = "Memory",
534c9d8dda4SMahesh Salgaonkar 		[5] = "Power Mgmt",
5358f0b8056SMahesh Salgaonkar 	};
5368f0b8056SMahesh Salgaonkar 	static const char * const mc_err_types[] = {
537c9d8dda4SMahesh Salgaonkar 		[0] = "UE",
538c9d8dda4SMahesh Salgaonkar 		[1] = "SLB",
539c9d8dda4SMahesh Salgaonkar 		[2] = "ERAT",
540c9d8dda4SMahesh Salgaonkar 		[3] = "Unknown",
541c9d8dda4SMahesh Salgaonkar 		[4] = "TLB",
542c9d8dda4SMahesh Salgaonkar 		[5] = "D-Cache",
543c9d8dda4SMahesh Salgaonkar 		[6] = "Unknown",
544c9d8dda4SMahesh Salgaonkar 		[7] = "I-Cache",
5458f0b8056SMahesh Salgaonkar 	};
5468f0b8056SMahesh Salgaonkar 	static const char * const mc_ue_types[] = {
547c9d8dda4SMahesh Salgaonkar 		[0] = "Indeterminate",
548c9d8dda4SMahesh Salgaonkar 		[1] = "Instruction fetch",
549c9d8dda4SMahesh Salgaonkar 		[2] = "Page table walk ifetch",
550c9d8dda4SMahesh Salgaonkar 		[3] = "Load/Store",
551c9d8dda4SMahesh Salgaonkar 		[4] = "Page table walk Load/Store",
5528f0b8056SMahesh Salgaonkar 	};
5538f0b8056SMahesh Salgaonkar 
5548f0b8056SMahesh Salgaonkar 	/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
5558f0b8056SMahesh Salgaonkar 	static const char * const mc_slb_types[] = {
556c9d8dda4SMahesh Salgaonkar 		[0] = "Parity",
557c9d8dda4SMahesh Salgaonkar 		[1] = "Multihit",
558c9d8dda4SMahesh Salgaonkar 		[2] = "Indeterminate",
5598f0b8056SMahesh Salgaonkar 	};
5608f0b8056SMahesh Salgaonkar 
5618f0b8056SMahesh Salgaonkar 	/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
5628f0b8056SMahesh Salgaonkar 	static const char * const mc_soft_types[] = {
563c9d8dda4SMahesh Salgaonkar 		[0] = "Unknown",
564c9d8dda4SMahesh Salgaonkar 		[1] = "Parity",
565c9d8dda4SMahesh Salgaonkar 		[2] = "Multihit",
566c9d8dda4SMahesh Salgaonkar 		[3] = "Indeterminate",
5678f0b8056SMahesh Salgaonkar 	};
5688f0b8056SMahesh Salgaonkar 
5698f0b8056SMahesh Salgaonkar 	if (!rtas_error_extended(errp)) {
5708f0b8056SMahesh Salgaonkar 		pr_err("Machine check interrupt: Missing extended error log\n");
5718f0b8056SMahesh Salgaonkar 		return;
5728f0b8056SMahesh Salgaonkar 	}
5738f0b8056SMahesh Salgaonkar 
5748f0b8056SMahesh Salgaonkar 	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
5758f0b8056SMahesh Salgaonkar 	if (pseries_log == NULL)
5768f0b8056SMahesh Salgaonkar 		return;
5778f0b8056SMahesh Salgaonkar 
5788f0b8056SMahesh Salgaonkar 	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
5798f0b8056SMahesh Salgaonkar 
5808f0b8056SMahesh Salgaonkar 	error_type = mce_log->error_type;
5818f0b8056SMahesh Salgaonkar 	err_sub_type = rtas_mc_error_sub_type(mce_log);
5828f0b8056SMahesh Salgaonkar 
5838f0b8056SMahesh Salgaonkar 	switch (rtas_error_severity(errp)) {
5848f0b8056SMahesh Salgaonkar 	case RTAS_SEVERITY_NO_ERROR:
5858f0b8056SMahesh Salgaonkar 		level = KERN_INFO;
5868f0b8056SMahesh Salgaonkar 		sevstr = "Harmless";
5878f0b8056SMahesh Salgaonkar 		break;
5888f0b8056SMahesh Salgaonkar 	case RTAS_SEVERITY_WARNING:
5898f0b8056SMahesh Salgaonkar 		level = KERN_WARNING;
5908f0b8056SMahesh Salgaonkar 		sevstr = "";
5918f0b8056SMahesh Salgaonkar 		break;
5928f0b8056SMahesh Salgaonkar 	case RTAS_SEVERITY_ERROR:
5938f0b8056SMahesh Salgaonkar 	case RTAS_SEVERITY_ERROR_SYNC:
5948f0b8056SMahesh Salgaonkar 		level = KERN_ERR;
5958f0b8056SMahesh Salgaonkar 		sevstr = "Severe";
5968f0b8056SMahesh Salgaonkar 		break;
5978f0b8056SMahesh Salgaonkar 	case RTAS_SEVERITY_FATAL:
5988f0b8056SMahesh Salgaonkar 	default:
5998f0b8056SMahesh Salgaonkar 		level = KERN_ERR;
6008f0b8056SMahesh Salgaonkar 		sevstr = "Fatal";
6018f0b8056SMahesh Salgaonkar 		break;
6028f0b8056SMahesh Salgaonkar 	}
6038f0b8056SMahesh Salgaonkar 
604c6d15258SMahesh Salgaonkar #ifdef CONFIG_PPC_BOOK3S_64
605c6d15258SMahesh Salgaonkar 	/* Display faulty slb contents for SLB errors. */
606c6d15258SMahesh Salgaonkar 	if (error_type == MC_ERROR_TYPE_SLB)
607c6d15258SMahesh Salgaonkar 		slb_dump_contents(local_paca->mce_faulty_slbs);
608c6d15258SMahesh Salgaonkar #endif
609c6d15258SMahesh Salgaonkar 
6108f0b8056SMahesh Salgaonkar 	printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
6118f0b8056SMahesh Salgaonkar 	       disposition == RTAS_DISP_FULLY_RECOVERED ?
6128f0b8056SMahesh Salgaonkar 	       "Recovered" : "Not recovered");
6138f0b8056SMahesh Salgaonkar 	if (user_mode(regs)) {
6148f0b8056SMahesh Salgaonkar 		printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
6158f0b8056SMahesh Salgaonkar 		       regs->nip, current->pid, current->comm);
6168f0b8056SMahesh Salgaonkar 	} else {
6178f0b8056SMahesh Salgaonkar 		printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
6188f0b8056SMahesh Salgaonkar 		       (void *)regs->nip);
6198f0b8056SMahesh Salgaonkar 	}
6208f0b8056SMahesh Salgaonkar 	printk("%s  Initiator: %s\n", level,
6218f0b8056SMahesh Salgaonkar 	       VAL_TO_STRING(initiators, initiator));
6228f0b8056SMahesh Salgaonkar 
6238f0b8056SMahesh Salgaonkar 	switch (error_type) {
6248f0b8056SMahesh Salgaonkar 	case MC_ERROR_TYPE_UE:
6258f0b8056SMahesh Salgaonkar 		printk("%s  Error type: %s [%s]\n", level,
6268f0b8056SMahesh Salgaonkar 		       VAL_TO_STRING(mc_err_types, error_type),
6278f0b8056SMahesh Salgaonkar 		       VAL_TO_STRING(mc_ue_types, err_sub_type));
6288f0b8056SMahesh Salgaonkar 		break;
6298f0b8056SMahesh Salgaonkar 	case MC_ERROR_TYPE_SLB:
6308f0b8056SMahesh Salgaonkar 		printk("%s  Error type: %s [%s]\n", level,
6318f0b8056SMahesh Salgaonkar 		       VAL_TO_STRING(mc_err_types, error_type),
6328f0b8056SMahesh Salgaonkar 		       VAL_TO_STRING(mc_slb_types, err_sub_type));
6338f0b8056SMahesh Salgaonkar 		break;
6348f0b8056SMahesh Salgaonkar 	case MC_ERROR_TYPE_ERAT:
6358f0b8056SMahesh Salgaonkar 	case MC_ERROR_TYPE_TLB:
6368f0b8056SMahesh Salgaonkar 		printk("%s  Error type: %s [%s]\n", level,
6378f0b8056SMahesh Salgaonkar 		       VAL_TO_STRING(mc_err_types, error_type),
6388f0b8056SMahesh Salgaonkar 		       VAL_TO_STRING(mc_soft_types, err_sub_type));
6398f0b8056SMahesh Salgaonkar 		break;
6408f0b8056SMahesh Salgaonkar 	default:
6418f0b8056SMahesh Salgaonkar 		printk("%s  Error type: %s\n", level,
6428f0b8056SMahesh Salgaonkar 		       VAL_TO_STRING(mc_err_types, error_type));
6438f0b8056SMahesh Salgaonkar 		break;
6448f0b8056SMahesh Salgaonkar 	}
6458f0b8056SMahesh Salgaonkar 
6468f0b8056SMahesh Salgaonkar 	addr = rtas_mc_get_effective_addr(mce_log);
6478f0b8056SMahesh Salgaonkar 	if (addr)
6488f0b8056SMahesh Salgaonkar 		printk("%s    Effective address: %016llx\n", level, addr);
6498f0b8056SMahesh Salgaonkar }
6508f0b8056SMahesh Salgaonkar 
651a43c1590SMahesh Salgaonkar static int mce_handle_error(struct rtas_error_log *errp)
652a43c1590SMahesh Salgaonkar {
653a43c1590SMahesh Salgaonkar 	struct pseries_errorlog *pseries_log;
654a43c1590SMahesh Salgaonkar 	struct pseries_mc_errorlog *mce_log;
655a43c1590SMahesh Salgaonkar 	int disposition = rtas_error_disposition(errp);
656a43c1590SMahesh Salgaonkar 	u8 error_type;
657a43c1590SMahesh Salgaonkar 
658a43c1590SMahesh Salgaonkar 	if (!rtas_error_extended(errp))
659a43c1590SMahesh Salgaonkar 		goto out;
660a43c1590SMahesh Salgaonkar 
661a43c1590SMahesh Salgaonkar 	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
662a43c1590SMahesh Salgaonkar 	if (pseries_log == NULL)
663a43c1590SMahesh Salgaonkar 		goto out;
664a43c1590SMahesh Salgaonkar 
665a43c1590SMahesh Salgaonkar 	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
666a43c1590SMahesh Salgaonkar 	error_type = mce_log->error_type;
667a43c1590SMahesh Salgaonkar 
668a43c1590SMahesh Salgaonkar #ifdef CONFIG_PPC_BOOK3S_64
669a43c1590SMahesh Salgaonkar 	if (disposition == RTAS_DISP_NOT_RECOVERED) {
670a43c1590SMahesh Salgaonkar 		switch (error_type) {
671a43c1590SMahesh Salgaonkar 		case	MC_ERROR_TYPE_SLB:
672a43c1590SMahesh Salgaonkar 		case	MC_ERROR_TYPE_ERAT:
673c6d15258SMahesh Salgaonkar 			/*
674c6d15258SMahesh Salgaonkar 			 * Store the old slb content in paca before flushing.
675c6d15258SMahesh Salgaonkar 			 * Print this when we go to virtual mode.
676c6d15258SMahesh Salgaonkar 			 * There are chances that we may hit MCE again if there
677c6d15258SMahesh Salgaonkar 			 * is a parity error on the SLB entry we trying to read
678c6d15258SMahesh Salgaonkar 			 * for saving. Hence limit the slb saving to single
679c6d15258SMahesh Salgaonkar 			 * level of recursion.
680c6d15258SMahesh Salgaonkar 			 */
681c6d15258SMahesh Salgaonkar 			if (local_paca->in_mce == 1)
682c6d15258SMahesh Salgaonkar 				slb_save_contents(local_paca->mce_faulty_slbs);
683a43c1590SMahesh Salgaonkar 			flush_and_reload_slb();
684a43c1590SMahesh Salgaonkar 			disposition = RTAS_DISP_FULLY_RECOVERED;
685a43c1590SMahesh Salgaonkar 			rtas_set_disposition_recovered(errp);
686a43c1590SMahesh Salgaonkar 			break;
687a43c1590SMahesh Salgaonkar 		default:
688a43c1590SMahesh Salgaonkar 			break;
689a43c1590SMahesh Salgaonkar 		}
690a43c1590SMahesh Salgaonkar 	}
691a43c1590SMahesh Salgaonkar #endif
692a43c1590SMahesh Salgaonkar 
693a43c1590SMahesh Salgaonkar out:
694a43c1590SMahesh Salgaonkar 	return disposition;
695a43c1590SMahesh Salgaonkar }
696a43c1590SMahesh Salgaonkar 
6977f177f98SGanesh Goudar #ifdef CONFIG_MEMORY_FAILURE
6987f177f98SGanesh Goudar 
6997f177f98SGanesh Goudar static DEFINE_PER_CPU(int, rtas_ue_count);
7007f177f98SGanesh Goudar static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
7017f177f98SGanesh Goudar 
7027f177f98SGanesh Goudar #define UE_EFFECTIVE_ADDR_PROVIDED	0x40
7037f177f98SGanesh Goudar #define UE_LOGICAL_ADDR_PROVIDED	0x20
7047f177f98SGanesh Goudar 
7057f177f98SGanesh Goudar 
7067f177f98SGanesh Goudar static void pseries_hwpoison_work_fn(struct work_struct *work)
7077f177f98SGanesh Goudar {
7087f177f98SGanesh Goudar 	unsigned long paddr;
7097f177f98SGanesh Goudar 	int index;
7107f177f98SGanesh Goudar 
7117f177f98SGanesh Goudar 	while (__this_cpu_read(rtas_ue_count) > 0) {
7127f177f98SGanesh Goudar 		index = __this_cpu_read(rtas_ue_count) - 1;
7137f177f98SGanesh Goudar 		paddr = __this_cpu_read(rtas_ue_paddr[index]);
7147f177f98SGanesh Goudar 		memory_failure(paddr >> PAGE_SHIFT, 0);
7157f177f98SGanesh Goudar 		__this_cpu_dec(rtas_ue_count);
7167f177f98SGanesh Goudar 	}
7177f177f98SGanesh Goudar }
7187f177f98SGanesh Goudar 
7197f177f98SGanesh Goudar static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
7207f177f98SGanesh Goudar 
7217f177f98SGanesh Goudar static void queue_ue_paddr(unsigned long paddr)
7227f177f98SGanesh Goudar {
7237f177f98SGanesh Goudar 	int index;
7247f177f98SGanesh Goudar 
7257f177f98SGanesh Goudar 	index = __this_cpu_inc_return(rtas_ue_count) - 1;
7267f177f98SGanesh Goudar 	if (index >= MAX_MC_EVT) {
7277f177f98SGanesh Goudar 		__this_cpu_dec(rtas_ue_count);
7287f177f98SGanesh Goudar 		return;
7297f177f98SGanesh Goudar 	}
7307f177f98SGanesh Goudar 	this_cpu_write(rtas_ue_paddr[index], paddr);
7317f177f98SGanesh Goudar 	schedule_work(&hwpoison_work);
7327f177f98SGanesh Goudar }
7337f177f98SGanesh Goudar 
7347f177f98SGanesh Goudar static void pseries_do_memory_failure(struct pt_regs *regs,
7357f177f98SGanesh Goudar 				      struct pseries_mc_errorlog *mce_log)
7367f177f98SGanesh Goudar {
7377f177f98SGanesh Goudar 	unsigned long paddr;
7387f177f98SGanesh Goudar 
7397f177f98SGanesh Goudar 	if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
7407f177f98SGanesh Goudar 		paddr = be64_to_cpu(mce_log->logical_address);
7417f177f98SGanesh Goudar 	} else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
7427f177f98SGanesh Goudar 		unsigned long pfn;
7437f177f98SGanesh Goudar 
7447f177f98SGanesh Goudar 		pfn = addr_to_pfn(regs,
7457f177f98SGanesh Goudar 				  be64_to_cpu(mce_log->effective_address));
7467f177f98SGanesh Goudar 		if (pfn == ULONG_MAX)
7477f177f98SGanesh Goudar 			return;
7487f177f98SGanesh Goudar 		paddr = pfn << PAGE_SHIFT;
7497f177f98SGanesh Goudar 	} else {
7507f177f98SGanesh Goudar 		return;
7517f177f98SGanesh Goudar 	}
7527f177f98SGanesh Goudar 	queue_ue_paddr(paddr);
7537f177f98SGanesh Goudar }
7547f177f98SGanesh Goudar 
7557f177f98SGanesh Goudar static void pseries_process_ue(struct pt_regs *regs,
7567f177f98SGanesh Goudar 			       struct rtas_error_log *errp)
7577f177f98SGanesh Goudar {
7587f177f98SGanesh Goudar 	struct pseries_errorlog *pseries_log;
7597f177f98SGanesh Goudar 	struct pseries_mc_errorlog *mce_log;
7607f177f98SGanesh Goudar 
7617f177f98SGanesh Goudar 	if (!rtas_error_extended(errp))
7627f177f98SGanesh Goudar 		return;
7637f177f98SGanesh Goudar 
7647f177f98SGanesh Goudar 	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
7657f177f98SGanesh Goudar 	if (!pseries_log)
7667f177f98SGanesh Goudar 		return;
7677f177f98SGanesh Goudar 
7687f177f98SGanesh Goudar 	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
7697f177f98SGanesh Goudar 
7707f177f98SGanesh Goudar 	if (mce_log->error_type == MC_ERROR_TYPE_UE)
7717f177f98SGanesh Goudar 		pseries_do_memory_failure(regs, mce_log);
7727f177f98SGanesh Goudar }
7737f177f98SGanesh Goudar #else
7747f177f98SGanesh Goudar static inline void pseries_process_ue(struct pt_regs *regs,
7757f177f98SGanesh Goudar 				      struct rtas_error_log *errp) { }
7767f177f98SGanesh Goudar #endif /*CONFIG_MEMORY_FAILURE */
7777f177f98SGanesh Goudar 
778d9953105SMichael Ellerman /*
77994675cceSMahesh Salgaonkar  * Process MCE rtas errlog event.
78094675cceSMahesh Salgaonkar  */
78194675cceSMahesh Salgaonkar static void mce_process_errlog_event(struct irq_work *work)
78294675cceSMahesh Salgaonkar {
78394675cceSMahesh Salgaonkar 	struct rtas_error_log *err;
78494675cceSMahesh Salgaonkar 
78594675cceSMahesh Salgaonkar 	err = fwnmi_get_errlog();
78694675cceSMahesh Salgaonkar 	log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
78794675cceSMahesh Salgaonkar }
78894675cceSMahesh Salgaonkar 
78994675cceSMahesh Salgaonkar /*
790d9953105SMichael Ellerman  * See if we can recover from a machine check exception.
791d9953105SMichael Ellerman  * This is only called on power4 (or above) and only via
792d9953105SMichael Ellerman  * the Firmware Non-Maskable Interrupts (fwnmi) handler
793d9953105SMichael Ellerman  * which provides the error analysis for us.
794d9953105SMichael Ellerman  *
795d9953105SMichael Ellerman  * Return 1 if corrected (or delivered a signal).
796d9953105SMichael Ellerman  * Return 0 if there is nothing we can do.
797d9953105SMichael Ellerman  */
798d9953105SMichael Ellerman static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
799d9953105SMichael Ellerman {
800d47d1d8aSAnton Blanchard 	int recovered = 0;
801a08a53eaSGreg Kurz 	int disposition = rtas_error_disposition(err);
802d9953105SMichael Ellerman 
8038f0b8056SMahesh Salgaonkar 	pseries_print_mce_info(regs, err);
8048f0b8056SMahesh Salgaonkar 
805d47d1d8aSAnton Blanchard 	if (!(regs->msr & MSR_RI)) {
806d47d1d8aSAnton Blanchard 		/* If MSR_RI isn't set, we cannot recover */
8078f0b8056SMahesh Salgaonkar 		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
808d47d1d8aSAnton Blanchard 		recovered = 0;
809d47d1d8aSAnton Blanchard 
810a08a53eaSGreg Kurz 	} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
811d9953105SMichael Ellerman 		/* Platform corrected itself */
812d47d1d8aSAnton Blanchard 		recovered = 1;
813d47d1d8aSAnton Blanchard 
814a08a53eaSGreg Kurz 	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
815d47d1d8aSAnton Blanchard 		/* Platform corrected itself but could be degraded */
816d47d1d8aSAnton Blanchard 		printk(KERN_ERR "MCE: limited recovery, system may "
817d47d1d8aSAnton Blanchard 		       "be degraded\n");
818d47d1d8aSAnton Blanchard 		recovered = 1;
819d47d1d8aSAnton Blanchard 
820d47d1d8aSAnton Blanchard 	} else if (user_mode(regs) && !is_global_init(current) &&
821a08a53eaSGreg Kurz 		   rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) {
822d47d1d8aSAnton Blanchard 
823d47d1d8aSAnton Blanchard 		/*
824d47d1d8aSAnton Blanchard 		 * If we received a synchronous error when in userspace
825d47d1d8aSAnton Blanchard 		 * kill the task. Firmware may report details of the fail
826d47d1d8aSAnton Blanchard 		 * asynchronously, so we can't rely on the target and type
827d47d1d8aSAnton Blanchard 		 * fields being valid here.
828d47d1d8aSAnton Blanchard 		 */
829d47d1d8aSAnton Blanchard 		printk(KERN_ERR "MCE: uncorrectable error, killing task "
830d47d1d8aSAnton Blanchard 		       "%s:%d\n", current->comm, current->pid);
831d47d1d8aSAnton Blanchard 
832d47d1d8aSAnton Blanchard 		_exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
833d47d1d8aSAnton Blanchard 		recovered = 1;
834d9953105SMichael Ellerman 	}
835d9953105SMichael Ellerman 
8367f177f98SGanesh Goudar 	pseries_process_ue(regs, err);
8377f177f98SGanesh Goudar 
83894675cceSMahesh Salgaonkar 	/* Queue irq work to log this rtas event later. */
83994675cceSMahesh Salgaonkar 	irq_work_queue(&mce_errlog_process_work);
840d9953105SMichael Ellerman 
841d47d1d8aSAnton Blanchard 	return recovered;
842d9953105SMichael Ellerman }
843d9953105SMichael Ellerman 
844d9953105SMichael Ellerman /*
845d9953105SMichael Ellerman  * Handle a machine check.
846d9953105SMichael Ellerman  *
847d9953105SMichael Ellerman  * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
848d9953105SMichael Ellerman  * should be present.  If so the handler which called us tells us if the
849d9953105SMichael Ellerman  * error was recovered (never true if RI=0).
850d9953105SMichael Ellerman  *
851d9953105SMichael Ellerman  * On hardware prior to Power 4 these exceptions were asynchronous which
852d9953105SMichael Ellerman  * means we can't tell exactly where it occurred and so we can't recover.
853d9953105SMichael Ellerman  */
854d9953105SMichael Ellerman int pSeries_machine_check_exception(struct pt_regs *regs)
855d9953105SMichael Ellerman {
856d9953105SMichael Ellerman 	struct rtas_error_log *errp;
857d9953105SMichael Ellerman 
858d9953105SMichael Ellerman 	if (fwnmi_active) {
859d9953105SMichael Ellerman 		fwnmi_release_errinfo();
860a43c1590SMahesh Salgaonkar 		errp = fwnmi_get_errlog();
861d9953105SMichael Ellerman 		if (errp && recover_mce(regs, errp))
862d9953105SMichael Ellerman 			return 1;
863d9953105SMichael Ellerman 	}
864d9953105SMichael Ellerman 
865d9953105SMichael Ellerman 	return 0;
866d9953105SMichael Ellerman }
867a43c1590SMahesh Salgaonkar 
868a43c1590SMahesh Salgaonkar long pseries_machine_check_realmode(struct pt_regs *regs)
869a43c1590SMahesh Salgaonkar {
870a43c1590SMahesh Salgaonkar 	struct rtas_error_log *errp;
871a43c1590SMahesh Salgaonkar 	int disposition;
872a43c1590SMahesh Salgaonkar 
873a43c1590SMahesh Salgaonkar 	if (fwnmi_active) {
874a43c1590SMahesh Salgaonkar 		errp = fwnmi_get_errinfo(regs);
875a43c1590SMahesh Salgaonkar 		/*
876a43c1590SMahesh Salgaonkar 		 * Call to fwnmi_release_errinfo() in real mode causes kernel
877a43c1590SMahesh Salgaonkar 		 * to panic. Hence we will call it as soon as we go into
878a43c1590SMahesh Salgaonkar 		 * virtual mode.
879a43c1590SMahesh Salgaonkar 		 */
880a43c1590SMahesh Salgaonkar 		disposition = mce_handle_error(errp);
881a43c1590SMahesh Salgaonkar 		if (disposition == RTAS_DISP_FULLY_RECOVERED)
882a43c1590SMahesh Salgaonkar 			return 1;
883a43c1590SMahesh Salgaonkar 	}
884a43c1590SMahesh Salgaonkar 
885a43c1590SMahesh Salgaonkar 	return 0;
886a43c1590SMahesh Salgaonkar }
887