xref: /openbmc/linux/drivers/acpi/apei/bert.c (revision 79d0150d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * APEI Boot Error Record Table (BERT) support
4  *
5  * Copyright 2011 Intel Corp.
6  *   Author: Huang Ying <ying.huang@intel.com>
7  *
8  * Under normal circumstances, when a hardware error occurs, the error
9  * handler receives control and processes the error. This gives OSPM a
10  * chance to process the error condition, report it, and optionally attempt
11  * recovery. In some cases, the system is unable to process an error.
12  * For example, system firmware or a management controller may choose to
13  * reset the system or the system might experience an uncontrolled crash
14  * or reset.The boot error source is used to report unhandled errors that
15  * occurred in a previous boot. This mechanism is described in the BERT
16  * table.
17  *
18  * For more information about BERT, please refer to ACPI Specification
19  * version 4.0, section 17.3.1
20  */
21 
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/acpi.h>
26 #include <linux/cper.h>
27 #include <linux/io.h>
28 
29 #include "apei-internal.h"
30 
31 #undef pr_fmt
32 #define pr_fmt(fmt) "BERT: " fmt
33 
34 #define ACPI_BERT_PRINT_MAX_RECORDS 5
35 #define ACPI_BERT_PRINT_MAX_LEN 1024
36 
37 static int bert_disable;
38 
39 /*
40  * Print "all" the error records in the BERT table, but avoid huge spam to
41  * the console if the BIOS included oversize records, or too many records.
42  * Skipping some records here does not lose anything because the full
43  * data is available to user tools in:
44  *	/sys/firmware/acpi/tables/data/BERT
45  */
46 static void __init bert_print_all(struct acpi_bert_region *region,
47 				  unsigned int region_len)
48 {
49 	struct acpi_hest_generic_status *estatus =
50 		(struct acpi_hest_generic_status *)region;
51 	int remain = region_len;
52 	int printed = 0, skipped = 0;
53 	u32 estatus_len;
54 
55 	while (remain >= sizeof(struct acpi_bert_region)) {
56 		estatus_len = cper_estatus_len(estatus);
57 		if (remain < estatus_len) {
58 			pr_err(FW_BUG "Truncated status block (length: %u).\n",
59 			       estatus_len);
60 			break;
61 		}
62 
63 		/* No more error records. */
64 		if (!estatus->block_status)
65 			break;
66 
67 		if (cper_estatus_check(estatus)) {
68 			pr_err(FW_BUG "Invalid error record.\n");
69 			break;
70 		}
71 
72 		if (estatus_len < ACPI_BERT_PRINT_MAX_LEN &&
73 		    printed < ACPI_BERT_PRINT_MAX_RECORDS) {
74 			pr_info_once("Error records from previous boot:\n");
75 			cper_estatus_print(KERN_INFO HW_ERR, estatus);
76 			printed++;
77 		} else {
78 			skipped++;
79 		}
80 
81 		/*
82 		 * Because the boot error source is "one-time polled" type,
83 		 * clear Block Status of current Generic Error Status Block,
84 		 * once it's printed.
85 		 */
86 		estatus->block_status = 0;
87 
88 		estatus = (void *)estatus + estatus_len;
89 		remain -= estatus_len;
90 	}
91 
92 	if (skipped)
93 		pr_info(HW_ERR "Skipped %d error records\n", skipped);
94 
95 	if (printed + skipped)
96 		pr_info("Total records found: %d\n", printed + skipped);
97 }
98 
99 static int __init setup_bert_disable(char *str)
100 {
101 	bert_disable = 1;
102 
103 	return 1;
104 }
105 __setup("bert_disable", setup_bert_disable);
106 
107 static int __init bert_check_table(struct acpi_table_bert *bert_tab)
108 {
109 	if (bert_tab->header.length < sizeof(struct acpi_table_bert) ||
110 	    bert_tab->region_length < sizeof(struct acpi_bert_region))
111 		return -EINVAL;
112 
113 	return 0;
114 }
115 
116 static int __init bert_init(void)
117 {
118 	struct apei_resources bert_resources;
119 	struct acpi_bert_region *boot_error_region;
120 	struct acpi_table_bert *bert_tab;
121 	unsigned int region_len;
122 	acpi_status status;
123 	int rc = 0;
124 
125 	if (acpi_disabled)
126 		return 0;
127 
128 	if (bert_disable) {
129 		pr_info("Boot Error Record Table support is disabled.\n");
130 		return 0;
131 	}
132 
133 	status = acpi_get_table(ACPI_SIG_BERT, 0, (struct acpi_table_header **)&bert_tab);
134 	if (status == AE_NOT_FOUND)
135 		return 0;
136 
137 	if (ACPI_FAILURE(status)) {
138 		pr_err("get table failed, %s.\n", acpi_format_exception(status));
139 		return -EINVAL;
140 	}
141 
142 	rc = bert_check_table(bert_tab);
143 	if (rc) {
144 		pr_err(FW_BUG "table invalid.\n");
145 		goto out_put_bert_tab;
146 	}
147 
148 	region_len = bert_tab->region_length;
149 	apei_resources_init(&bert_resources);
150 	rc = apei_resources_add(&bert_resources, bert_tab->address,
151 				region_len, true);
152 	if (rc)
153 		goto out_put_bert_tab;
154 	rc = apei_resources_request(&bert_resources, "APEI BERT");
155 	if (rc)
156 		goto out_fini;
157 	boot_error_region = ioremap_cache(bert_tab->address, region_len);
158 	if (boot_error_region) {
159 		bert_print_all(boot_error_region, region_len);
160 		iounmap(boot_error_region);
161 	} else {
162 		rc = -ENOMEM;
163 	}
164 
165 	apei_resources_release(&bert_resources);
166 out_fini:
167 	apei_resources_fini(&bert_resources);
168 out_put_bert_tab:
169 	acpi_put_table((struct acpi_table_header *)bert_tab);
170 
171 	return rc;
172 }
173 
174 late_initcall(bert_init);
175