1 /* 2 * UEFI Common Platform Error Record (CPER) support 3 * 4 * Copyright (C) 2010, Intel Corp. 5 * Author: Huang Ying <ying.huang@intel.com> 6 * 7 * CPER is the format used to describe platform hardware error by 8 * various tables, such as ERST, BERT and HEST etc. 9 * 10 * For more information about CPER, please refer to Appendix N of UEFI 11 * Specification version 2.4. 12 * 13 * This program is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU General Public License version 15 * 2 as published by the Free Software Foundation. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 */ 26 27 #include <linux/kernel.h> 28 #include <linux/module.h> 29 #include <linux/time.h> 30 #include <linux/cper.h> 31 #include <linux/dmi.h> 32 #include <linux/acpi.h> 33 #include <linux/pci.h> 34 #include <linux/aer.h> 35 #include <linux/printk.h> 36 #include <linux/bcd.h> 37 #include <acpi/ghes.h> 38 #include <ras/ras_event.h> 39 40 #define INDENT_SP " " 41 42 static char rcd_decode_str[CPER_REC_LEN]; 43 44 /* 45 * CPER record ID need to be unique even after reboot, because record 46 * ID is used as index for ERST storage, while CPER records from 47 * multiple boot may co-exist in ERST. 48 */ 49 u64 cper_next_record_id(void) 50 { 51 static atomic64_t seq; 52 53 if (!atomic64_read(&seq)) 54 atomic64_set(&seq, ((u64)get_seconds()) << 32); 55 56 return atomic64_inc_return(&seq); 57 } 58 EXPORT_SYMBOL_GPL(cper_next_record_id); 59 60 static const char * const severity_strs[] = { 61 "recoverable", 62 "fatal", 63 "corrected", 64 "info", 65 }; 66 67 const char *cper_severity_str(unsigned int severity) 68 { 69 return severity < ARRAY_SIZE(severity_strs) ? 70 severity_strs[severity] : "unknown"; 71 } 72 EXPORT_SYMBOL_GPL(cper_severity_str); 73 74 /* 75 * cper_print_bits - print strings for set bits 76 * @pfx: prefix for each line, including log level and prefix string 77 * @bits: bit mask 78 * @strs: string array, indexed by bit position 79 * @strs_size: size of the string array: @strs 80 * 81 * For each set bit in @bits, print the corresponding string in @strs. 82 * If the output length is longer than 80, multiple line will be 83 * printed, with @pfx is printed at the beginning of each line. 84 */ 85 void cper_print_bits(const char *pfx, unsigned int bits, 86 const char * const strs[], unsigned int strs_size) 87 { 88 int i, len = 0; 89 const char *str; 90 char buf[84]; 91 92 for (i = 0; i < strs_size; i++) { 93 if (!(bits & (1U << i))) 94 continue; 95 str = strs[i]; 96 if (!str) 97 continue; 98 if (len && len + strlen(str) + 2 > 80) { 99 printk("%s\n", buf); 100 len = 0; 101 } 102 if (!len) 103 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str); 104 else 105 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str); 106 } 107 if (len) 108 printk("%s\n", buf); 109 } 110 111 static const char * const proc_type_strs[] = { 112 "IA32/X64", 113 "IA64", 114 "ARM", 115 }; 116 117 static const char * const proc_isa_strs[] = { 118 "IA32", 119 "IA64", 120 "X64", 121 "ARM A32/T32", 122 "ARM A64", 123 }; 124 125 const char * const cper_proc_error_type_strs[] = { 126 "cache error", 127 "TLB error", 128 "bus error", 129 "micro-architectural error", 130 }; 131 132 static const char * const proc_op_strs[] = { 133 "unknown or generic", 134 "data read", 135 "data write", 136 "instruction execution", 137 }; 138 139 static const char * const proc_flag_strs[] = { 140 "restartable", 141 "precise IP", 142 "overflow", 143 "corrected", 144 }; 145 146 static void cper_print_proc_generic(const char *pfx, 147 const struct cper_sec_proc_generic *proc) 148 { 149 if (proc->validation_bits & CPER_PROC_VALID_TYPE) 150 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, 151 proc->proc_type < ARRAY_SIZE(proc_type_strs) ? 152 proc_type_strs[proc->proc_type] : "unknown"); 153 if (proc->validation_bits & CPER_PROC_VALID_ISA) 154 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, 155 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ? 156 proc_isa_strs[proc->proc_isa] : "unknown"); 157 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { 158 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); 159 cper_print_bits(pfx, proc->proc_error_type, 160 cper_proc_error_type_strs, 161 ARRAY_SIZE(cper_proc_error_type_strs)); 162 } 163 if (proc->validation_bits & CPER_PROC_VALID_OPERATION) 164 printk("%s""operation: %d, %s\n", pfx, proc->operation, 165 proc->operation < ARRAY_SIZE(proc_op_strs) ? 166 proc_op_strs[proc->operation] : "unknown"); 167 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { 168 printk("%s""flags: 0x%02x\n", pfx, proc->flags); 169 cper_print_bits(pfx, proc->flags, proc_flag_strs, 170 ARRAY_SIZE(proc_flag_strs)); 171 } 172 if (proc->validation_bits & CPER_PROC_VALID_LEVEL) 173 printk("%s""level: %d\n", pfx, proc->level); 174 if (proc->validation_bits & CPER_PROC_VALID_VERSION) 175 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version); 176 if (proc->validation_bits & CPER_PROC_VALID_ID) 177 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id); 178 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS) 179 printk("%s""target_address: 0x%016llx\n", 180 pfx, proc->target_addr); 181 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID) 182 printk("%s""requestor_id: 0x%016llx\n", 183 pfx, proc->requestor_id); 184 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID) 185 printk("%s""responder_id: 0x%016llx\n", 186 pfx, proc->responder_id); 187 if (proc->validation_bits & CPER_PROC_VALID_IP) 188 printk("%s""IP: 0x%016llx\n", pfx, proc->ip); 189 } 190 191 static const char * const mem_err_type_strs[] = { 192 "unknown", 193 "no error", 194 "single-bit ECC", 195 "multi-bit ECC", 196 "single-symbol chipkill ECC", 197 "multi-symbol chipkill ECC", 198 "master abort", 199 "target abort", 200 "parity error", 201 "watchdog timeout", 202 "invalid address", 203 "mirror Broken", 204 "memory sparing", 205 "scrub corrected error", 206 "scrub uncorrected error", 207 "physical memory map-out event", 208 }; 209 210 const char *cper_mem_err_type_str(unsigned int etype) 211 { 212 return etype < ARRAY_SIZE(mem_err_type_strs) ? 213 mem_err_type_strs[etype] : "unknown"; 214 } 215 EXPORT_SYMBOL_GPL(cper_mem_err_type_str); 216 217 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg) 218 { 219 u32 len, n; 220 221 if (!msg) 222 return 0; 223 224 n = 0; 225 len = CPER_REC_LEN - 1; 226 if (mem->validation_bits & CPER_MEM_VALID_NODE) 227 n += scnprintf(msg + n, len - n, "node: %d ", mem->node); 228 if (mem->validation_bits & CPER_MEM_VALID_CARD) 229 n += scnprintf(msg + n, len - n, "card: %d ", mem->card); 230 if (mem->validation_bits & CPER_MEM_VALID_MODULE) 231 n += scnprintf(msg + n, len - n, "module: %d ", mem->module); 232 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) 233 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank); 234 if (mem->validation_bits & CPER_MEM_VALID_BANK) 235 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank); 236 if (mem->validation_bits & CPER_MEM_VALID_DEVICE) 237 n += scnprintf(msg + n, len - n, "device: %d ", mem->device); 238 if (mem->validation_bits & CPER_MEM_VALID_ROW) 239 n += scnprintf(msg + n, len - n, "row: %d ", mem->row); 240 if (mem->validation_bits & CPER_MEM_VALID_COLUMN) 241 n += scnprintf(msg + n, len - n, "column: %d ", mem->column); 242 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) 243 n += scnprintf(msg + n, len - n, "bit_position: %d ", 244 mem->bit_pos); 245 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) 246 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ", 247 mem->requestor_id); 248 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) 249 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ", 250 mem->responder_id); 251 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) 252 scnprintf(msg + n, len - n, "target_id: 0x%016llx ", 253 mem->target_id); 254 255 msg[n] = '\0'; 256 return n; 257 } 258 259 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) 260 { 261 u32 len, n; 262 const char *bank = NULL, *device = NULL; 263 264 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) 265 return 0; 266 267 n = 0; 268 len = CPER_REC_LEN - 1; 269 dmi_memdev_name(mem->mem_dev_handle, &bank, &device); 270 if (bank && device) 271 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device); 272 else 273 n = snprintf(msg, len, 274 "DIMM location: not present. DMI handle: 0x%.4x ", 275 mem->mem_dev_handle); 276 277 msg[n] = '\0'; 278 return n; 279 } 280 281 void cper_mem_err_pack(const struct cper_sec_mem_err *mem, 282 struct cper_mem_err_compact *cmem) 283 { 284 cmem->validation_bits = mem->validation_bits; 285 cmem->node = mem->node; 286 cmem->card = mem->card; 287 cmem->module = mem->module; 288 cmem->bank = mem->bank; 289 cmem->device = mem->device; 290 cmem->row = mem->row; 291 cmem->column = mem->column; 292 cmem->bit_pos = mem->bit_pos; 293 cmem->requestor_id = mem->requestor_id; 294 cmem->responder_id = mem->responder_id; 295 cmem->target_id = mem->target_id; 296 cmem->rank = mem->rank; 297 cmem->mem_array_handle = mem->mem_array_handle; 298 cmem->mem_dev_handle = mem->mem_dev_handle; 299 } 300 301 const char *cper_mem_err_unpack(struct trace_seq *p, 302 struct cper_mem_err_compact *cmem) 303 { 304 const char *ret = trace_seq_buffer_ptr(p); 305 306 if (cper_mem_err_location(cmem, rcd_decode_str)) 307 trace_seq_printf(p, "%s", rcd_decode_str); 308 if (cper_dimm_err_location(cmem, rcd_decode_str)) 309 trace_seq_printf(p, "%s", rcd_decode_str); 310 trace_seq_putc(p, '\0'); 311 312 return ret; 313 } 314 315 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, 316 int len) 317 { 318 struct cper_mem_err_compact cmem; 319 320 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ 321 if (len == sizeof(struct cper_sec_mem_err_old) && 322 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) { 323 pr_err(FW_WARN "valid bits set for fields beyond structure\n"); 324 return; 325 } 326 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) 327 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); 328 if (mem->validation_bits & CPER_MEM_VALID_PA) 329 printk("%s""physical_address: 0x%016llx\n", 330 pfx, mem->physical_addr); 331 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) 332 printk("%s""physical_address_mask: 0x%016llx\n", 333 pfx, mem->physical_addr_mask); 334 cper_mem_err_pack(mem, &cmem); 335 if (cper_mem_err_location(&cmem, rcd_decode_str)) 336 printk("%s%s\n", pfx, rcd_decode_str); 337 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { 338 u8 etype = mem->error_type; 339 printk("%s""error_type: %d, %s\n", pfx, etype, 340 cper_mem_err_type_str(etype)); 341 } 342 if (cper_dimm_err_location(&cmem, rcd_decode_str)) 343 printk("%s%s\n", pfx, rcd_decode_str); 344 } 345 346 static const char * const pcie_port_type_strs[] = { 347 "PCIe end point", 348 "legacy PCI end point", 349 "unknown", 350 "unknown", 351 "root port", 352 "upstream switch port", 353 "downstream switch port", 354 "PCIe to PCI/PCI-X bridge", 355 "PCI/PCI-X to PCIe bridge", 356 "root complex integrated endpoint device", 357 "root complex event collector", 358 }; 359 360 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, 361 const struct acpi_hest_generic_data *gdata) 362 { 363 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) 364 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, 365 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ? 366 pcie_port_type_strs[pcie->port_type] : "unknown"); 367 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) 368 printk("%s""version: %d.%d\n", pfx, 369 pcie->version.major, pcie->version.minor); 370 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS) 371 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx, 372 pcie->command, pcie->status); 373 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) { 374 const __u8 *p; 375 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx, 376 pcie->device_id.segment, pcie->device_id.bus, 377 pcie->device_id.device, pcie->device_id.function); 378 printk("%s""slot: %d\n", pfx, 379 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT); 380 printk("%s""secondary_bus: 0x%02x\n", pfx, 381 pcie->device_id.secondary_bus); 382 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx, 383 pcie->device_id.vendor_id, pcie->device_id.device_id); 384 p = pcie->device_id.class_code; 385 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]); 386 } 387 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER) 388 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx, 389 pcie->serial_number.lower, pcie->serial_number.upper); 390 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS) 391 printk( 392 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", 393 pfx, pcie->bridge.secondary_status, pcie->bridge.control); 394 } 395 396 static void cper_print_tstamp(const char *pfx, 397 struct acpi_hest_generic_data_v300 *gdata) 398 { 399 __u8 hour, min, sec, day, mon, year, century, *timestamp; 400 401 if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) { 402 timestamp = (__u8 *)&(gdata->time_stamp); 403 sec = bcd2bin(timestamp[0]); 404 min = bcd2bin(timestamp[1]); 405 hour = bcd2bin(timestamp[2]); 406 day = bcd2bin(timestamp[4]); 407 mon = bcd2bin(timestamp[5]); 408 year = bcd2bin(timestamp[6]); 409 century = bcd2bin(timestamp[7]); 410 411 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx, 412 (timestamp[3] & 0x1 ? "precise " : "imprecise "), 413 century, year, mon, day, hour, min, sec); 414 } 415 } 416 417 static void 418 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, 419 int sec_no) 420 { 421 guid_t *sec_type = (guid_t *)gdata->section_type; 422 __u16 severity; 423 char newpfx[64]; 424 425 if (acpi_hest_get_version(gdata) >= 3) 426 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata); 427 428 severity = gdata->error_severity; 429 printk("%s""Error %d, type: %s\n", pfx, sec_no, 430 cper_severity_str(severity)); 431 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) 432 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id); 433 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) 434 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text); 435 436 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); 437 if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) { 438 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata); 439 440 printk("%s""section_type: general processor error\n", newpfx); 441 if (gdata->error_data_length >= sizeof(*proc_err)) 442 cper_print_proc_generic(newpfx, proc_err); 443 else 444 goto err_section_too_small; 445 } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { 446 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); 447 448 printk("%s""section_type: memory error\n", newpfx); 449 if (gdata->error_data_length >= 450 sizeof(struct cper_sec_mem_err_old)) 451 cper_print_mem(newpfx, mem_err, 452 gdata->error_data_length); 453 else 454 goto err_section_too_small; 455 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { 456 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata); 457 458 printk("%s""section_type: PCIe error\n", newpfx); 459 if (gdata->error_data_length >= sizeof(*pcie)) 460 cper_print_pcie(newpfx, pcie, gdata); 461 else 462 goto err_section_too_small; 463 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM) 464 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) { 465 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata); 466 467 printk("%ssection_type: ARM processor error\n", newpfx); 468 if (gdata->error_data_length >= sizeof(*arm_err)) 469 cper_print_proc_arm(newpfx, arm_err); 470 else 471 goto err_section_too_small; 472 #endif 473 } else { 474 const void *err = acpi_hest_get_payload(gdata); 475 476 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type); 477 printk("%ssection length: %#x\n", newpfx, 478 gdata->error_data_length); 479 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err, 480 gdata->error_data_length, true); 481 } 482 483 return; 484 485 err_section_too_small: 486 pr_err(FW_WARN "error section length is too small\n"); 487 } 488 489 void cper_estatus_print(const char *pfx, 490 const struct acpi_hest_generic_status *estatus) 491 { 492 struct acpi_hest_generic_data *gdata; 493 int sec_no = 0; 494 char newpfx[64]; 495 __u16 severity; 496 497 severity = estatus->error_severity; 498 if (severity == CPER_SEV_CORRECTED) 499 printk("%s%s\n", pfx, 500 "It has been corrected by h/w " 501 "and requires no further action"); 502 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity)); 503 snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); 504 505 apei_estatus_for_each_section(estatus, gdata) { 506 cper_estatus_print_section(newpfx, gdata, sec_no); 507 sec_no++; 508 } 509 } 510 EXPORT_SYMBOL_GPL(cper_estatus_print); 511 512 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus) 513 { 514 if (estatus->data_length && 515 estatus->data_length < sizeof(struct acpi_hest_generic_data)) 516 return -EINVAL; 517 if (estatus->raw_data_length && 518 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length) 519 return -EINVAL; 520 521 return 0; 522 } 523 EXPORT_SYMBOL_GPL(cper_estatus_check_header); 524 525 int cper_estatus_check(const struct acpi_hest_generic_status *estatus) 526 { 527 struct acpi_hest_generic_data *gdata; 528 unsigned int data_len, gedata_len; 529 int rc; 530 531 rc = cper_estatus_check_header(estatus); 532 if (rc) 533 return rc; 534 data_len = estatus->data_length; 535 536 apei_estatus_for_each_section(estatus, gdata) { 537 gedata_len = acpi_hest_get_error_length(gdata); 538 if (gedata_len > data_len - acpi_hest_get_size(gdata)) 539 return -EINVAL; 540 data_len -= acpi_hest_get_record_size(gdata); 541 } 542 if (data_len) 543 return -EINVAL; 544 545 return 0; 546 } 547 EXPORT_SYMBOL_GPL(cper_estatus_check); 548