1 /* 2 * UEFI Common Platform Error Record (CPER) support 3 * 4 * Copyright (C) 2010, Intel Corp. 5 * Author: Huang Ying <ying.huang@intel.com> 6 * 7 * CPER is the format used to describe platform hardware error by 8 * various tables, such as ERST, BERT and HEST etc. 9 * 10 * For more information about CPER, please refer to Appendix N of UEFI 11 * Specification version 2.4. 12 * 13 * This program is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU General Public License version 15 * 2 as published by the Free Software Foundation. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 * 22 * You should have received a copy of the GNU General Public License 23 * along with this program; if not, write to the Free Software 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 */ 26 27 #include <linux/kernel.h> 28 #include <linux/module.h> 29 #include <linux/time.h> 30 #include <linux/cper.h> 31 #include <linux/dmi.h> 32 #include <linux/acpi.h> 33 #include <linux/pci.h> 34 #include <linux/aer.h> 35 #include <linux/printk.h> 36 #include <linux/bcd.h> 37 #include <acpi/ghes.h> 38 #include <ras/ras_event.h> 39 40 static char rcd_decode_str[CPER_REC_LEN]; 41 42 /* 43 * CPER record ID need to be unique even after reboot, because record 44 * ID is used as index for ERST storage, while CPER records from 45 * multiple boot may co-exist in ERST. 46 */ 47 u64 cper_next_record_id(void) 48 { 49 static atomic64_t seq; 50 51 if (!atomic64_read(&seq)) 52 atomic64_set(&seq, ((u64)get_seconds()) << 32); 53 54 return atomic64_inc_return(&seq); 55 } 56 EXPORT_SYMBOL_GPL(cper_next_record_id); 57 58 static const char * const severity_strs[] = { 59 "recoverable", 60 "fatal", 61 "corrected", 62 "info", 63 }; 64 65 const char *cper_severity_str(unsigned int severity) 66 { 67 return severity < ARRAY_SIZE(severity_strs) ? 68 severity_strs[severity] : "unknown"; 69 } 70 EXPORT_SYMBOL_GPL(cper_severity_str); 71 72 /* 73 * cper_print_bits - print strings for set bits 74 * @pfx: prefix for each line, including log level and prefix string 75 * @bits: bit mask 76 * @strs: string array, indexed by bit position 77 * @strs_size: size of the string array: @strs 78 * 79 * For each set bit in @bits, print the corresponding string in @strs. 80 * If the output length is longer than 80, multiple line will be 81 * printed, with @pfx is printed at the beginning of each line. 82 */ 83 void cper_print_bits(const char *pfx, unsigned int bits, 84 const char * const strs[], unsigned int strs_size) 85 { 86 int i, len = 0; 87 const char *str; 88 char buf[84]; 89 90 for (i = 0; i < strs_size; i++) { 91 if (!(bits & (1U << i))) 92 continue; 93 str = strs[i]; 94 if (!str) 95 continue; 96 if (len && len + strlen(str) + 2 > 80) { 97 printk("%s\n", buf); 98 len = 0; 99 } 100 if (!len) 101 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str); 102 else 103 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str); 104 } 105 if (len) 106 printk("%s\n", buf); 107 } 108 109 static const char * const proc_type_strs[] = { 110 "IA32/X64", 111 "IA64", 112 "ARM", 113 }; 114 115 static const char * const proc_isa_strs[] = { 116 "IA32", 117 "IA64", 118 "X64", 119 "ARM A32/T32", 120 "ARM A64", 121 }; 122 123 const char * const cper_proc_error_type_strs[] = { 124 "cache error", 125 "TLB error", 126 "bus error", 127 "micro-architectural error", 128 }; 129 130 static const char * const proc_op_strs[] = { 131 "unknown or generic", 132 "data read", 133 "data write", 134 "instruction execution", 135 }; 136 137 static const char * const proc_flag_strs[] = { 138 "restartable", 139 "precise IP", 140 "overflow", 141 "corrected", 142 }; 143 144 static void cper_print_proc_generic(const char *pfx, 145 const struct cper_sec_proc_generic *proc) 146 { 147 if (proc->validation_bits & CPER_PROC_VALID_TYPE) 148 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, 149 proc->proc_type < ARRAY_SIZE(proc_type_strs) ? 150 proc_type_strs[proc->proc_type] : "unknown"); 151 if (proc->validation_bits & CPER_PROC_VALID_ISA) 152 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, 153 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ? 154 proc_isa_strs[proc->proc_isa] : "unknown"); 155 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { 156 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); 157 cper_print_bits(pfx, proc->proc_error_type, 158 cper_proc_error_type_strs, 159 ARRAY_SIZE(cper_proc_error_type_strs)); 160 } 161 if (proc->validation_bits & CPER_PROC_VALID_OPERATION) 162 printk("%s""operation: %d, %s\n", pfx, proc->operation, 163 proc->operation < ARRAY_SIZE(proc_op_strs) ? 164 proc_op_strs[proc->operation] : "unknown"); 165 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { 166 printk("%s""flags: 0x%02x\n", pfx, proc->flags); 167 cper_print_bits(pfx, proc->flags, proc_flag_strs, 168 ARRAY_SIZE(proc_flag_strs)); 169 } 170 if (proc->validation_bits & CPER_PROC_VALID_LEVEL) 171 printk("%s""level: %d\n", pfx, proc->level); 172 if (proc->validation_bits & CPER_PROC_VALID_VERSION) 173 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version); 174 if (proc->validation_bits & CPER_PROC_VALID_ID) 175 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id); 176 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS) 177 printk("%s""target_address: 0x%016llx\n", 178 pfx, proc->target_addr); 179 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID) 180 printk("%s""requestor_id: 0x%016llx\n", 181 pfx, proc->requestor_id); 182 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID) 183 printk("%s""responder_id: 0x%016llx\n", 184 pfx, proc->responder_id); 185 if (proc->validation_bits & CPER_PROC_VALID_IP) 186 printk("%s""IP: 0x%016llx\n", pfx, proc->ip); 187 } 188 189 static const char * const mem_err_type_strs[] = { 190 "unknown", 191 "no error", 192 "single-bit ECC", 193 "multi-bit ECC", 194 "single-symbol chipkill ECC", 195 "multi-symbol chipkill ECC", 196 "master abort", 197 "target abort", 198 "parity error", 199 "watchdog timeout", 200 "invalid address", 201 "mirror Broken", 202 "memory sparing", 203 "scrub corrected error", 204 "scrub uncorrected error", 205 "physical memory map-out event", 206 }; 207 208 const char *cper_mem_err_type_str(unsigned int etype) 209 { 210 return etype < ARRAY_SIZE(mem_err_type_strs) ? 211 mem_err_type_strs[etype] : "unknown"; 212 } 213 EXPORT_SYMBOL_GPL(cper_mem_err_type_str); 214 215 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg) 216 { 217 u32 len, n; 218 219 if (!msg) 220 return 0; 221 222 n = 0; 223 len = CPER_REC_LEN - 1; 224 if (mem->validation_bits & CPER_MEM_VALID_NODE) 225 n += scnprintf(msg + n, len - n, "node: %d ", mem->node); 226 if (mem->validation_bits & CPER_MEM_VALID_CARD) 227 n += scnprintf(msg + n, len - n, "card: %d ", mem->card); 228 if (mem->validation_bits & CPER_MEM_VALID_MODULE) 229 n += scnprintf(msg + n, len - n, "module: %d ", mem->module); 230 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) 231 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank); 232 if (mem->validation_bits & CPER_MEM_VALID_BANK) 233 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank); 234 if (mem->validation_bits & CPER_MEM_VALID_DEVICE) 235 n += scnprintf(msg + n, len - n, "device: %d ", mem->device); 236 if (mem->validation_bits & CPER_MEM_VALID_ROW) 237 n += scnprintf(msg + n, len - n, "row: %d ", mem->row); 238 if (mem->validation_bits & CPER_MEM_VALID_COLUMN) 239 n += scnprintf(msg + n, len - n, "column: %d ", mem->column); 240 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) 241 n += scnprintf(msg + n, len - n, "bit_position: %d ", 242 mem->bit_pos); 243 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) 244 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ", 245 mem->requestor_id); 246 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) 247 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ", 248 mem->responder_id); 249 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) 250 scnprintf(msg + n, len - n, "target_id: 0x%016llx ", 251 mem->target_id); 252 253 msg[n] = '\0'; 254 return n; 255 } 256 257 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) 258 { 259 u32 len, n; 260 const char *bank = NULL, *device = NULL; 261 262 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) 263 return 0; 264 265 n = 0; 266 len = CPER_REC_LEN - 1; 267 dmi_memdev_name(mem->mem_dev_handle, &bank, &device); 268 if (bank && device) 269 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device); 270 else 271 n = snprintf(msg, len, 272 "DIMM location: not present. DMI handle: 0x%.4x ", 273 mem->mem_dev_handle); 274 275 msg[n] = '\0'; 276 return n; 277 } 278 279 void cper_mem_err_pack(const struct cper_sec_mem_err *mem, 280 struct cper_mem_err_compact *cmem) 281 { 282 cmem->validation_bits = mem->validation_bits; 283 cmem->node = mem->node; 284 cmem->card = mem->card; 285 cmem->module = mem->module; 286 cmem->bank = mem->bank; 287 cmem->device = mem->device; 288 cmem->row = mem->row; 289 cmem->column = mem->column; 290 cmem->bit_pos = mem->bit_pos; 291 cmem->requestor_id = mem->requestor_id; 292 cmem->responder_id = mem->responder_id; 293 cmem->target_id = mem->target_id; 294 cmem->rank = mem->rank; 295 cmem->mem_array_handle = mem->mem_array_handle; 296 cmem->mem_dev_handle = mem->mem_dev_handle; 297 } 298 299 const char *cper_mem_err_unpack(struct trace_seq *p, 300 struct cper_mem_err_compact *cmem) 301 { 302 const char *ret = trace_seq_buffer_ptr(p); 303 304 if (cper_mem_err_location(cmem, rcd_decode_str)) 305 trace_seq_printf(p, "%s", rcd_decode_str); 306 if (cper_dimm_err_location(cmem, rcd_decode_str)) 307 trace_seq_printf(p, "%s", rcd_decode_str); 308 trace_seq_putc(p, '\0'); 309 310 return ret; 311 } 312 313 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, 314 int len) 315 { 316 struct cper_mem_err_compact cmem; 317 318 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ 319 if (len == sizeof(struct cper_sec_mem_err_old) && 320 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) { 321 pr_err(FW_WARN "valid bits set for fields beyond structure\n"); 322 return; 323 } 324 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) 325 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); 326 if (mem->validation_bits & CPER_MEM_VALID_PA) 327 printk("%s""physical_address: 0x%016llx\n", 328 pfx, mem->physical_addr); 329 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) 330 printk("%s""physical_address_mask: 0x%016llx\n", 331 pfx, mem->physical_addr_mask); 332 cper_mem_err_pack(mem, &cmem); 333 if (cper_mem_err_location(&cmem, rcd_decode_str)) 334 printk("%s%s\n", pfx, rcd_decode_str); 335 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { 336 u8 etype = mem->error_type; 337 printk("%s""error_type: %d, %s\n", pfx, etype, 338 cper_mem_err_type_str(etype)); 339 } 340 if (cper_dimm_err_location(&cmem, rcd_decode_str)) 341 printk("%s%s\n", pfx, rcd_decode_str); 342 } 343 344 static const char * const pcie_port_type_strs[] = { 345 "PCIe end point", 346 "legacy PCI end point", 347 "unknown", 348 "unknown", 349 "root port", 350 "upstream switch port", 351 "downstream switch port", 352 "PCIe to PCI/PCI-X bridge", 353 "PCI/PCI-X to PCIe bridge", 354 "root complex integrated endpoint device", 355 "root complex event collector", 356 }; 357 358 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, 359 const struct acpi_hest_generic_data *gdata) 360 { 361 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) 362 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, 363 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ? 364 pcie_port_type_strs[pcie->port_type] : "unknown"); 365 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) 366 printk("%s""version: %d.%d\n", pfx, 367 pcie->version.major, pcie->version.minor); 368 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS) 369 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx, 370 pcie->command, pcie->status); 371 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) { 372 const __u8 *p; 373 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx, 374 pcie->device_id.segment, pcie->device_id.bus, 375 pcie->device_id.device, pcie->device_id.function); 376 printk("%s""slot: %d\n", pfx, 377 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT); 378 printk("%s""secondary_bus: 0x%02x\n", pfx, 379 pcie->device_id.secondary_bus); 380 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx, 381 pcie->device_id.vendor_id, pcie->device_id.device_id); 382 p = pcie->device_id.class_code; 383 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]); 384 } 385 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER) 386 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx, 387 pcie->serial_number.lower, pcie->serial_number.upper); 388 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS) 389 printk( 390 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", 391 pfx, pcie->bridge.secondary_status, pcie->bridge.control); 392 } 393 394 static void cper_print_tstamp(const char *pfx, 395 struct acpi_hest_generic_data_v300 *gdata) 396 { 397 __u8 hour, min, sec, day, mon, year, century, *timestamp; 398 399 if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) { 400 timestamp = (__u8 *)&(gdata->time_stamp); 401 sec = bcd2bin(timestamp[0]); 402 min = bcd2bin(timestamp[1]); 403 hour = bcd2bin(timestamp[2]); 404 day = bcd2bin(timestamp[4]); 405 mon = bcd2bin(timestamp[5]); 406 year = bcd2bin(timestamp[6]); 407 century = bcd2bin(timestamp[7]); 408 409 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx, 410 (timestamp[3] & 0x1 ? "precise " : "imprecise "), 411 century, year, mon, day, hour, min, sec); 412 } 413 } 414 415 static void 416 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, 417 int sec_no) 418 { 419 guid_t *sec_type = (guid_t *)gdata->section_type; 420 __u16 severity; 421 char newpfx[64]; 422 423 if (acpi_hest_get_version(gdata) >= 3) 424 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata); 425 426 severity = gdata->error_severity; 427 printk("%s""Error %d, type: %s\n", pfx, sec_no, 428 cper_severity_str(severity)); 429 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) 430 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id); 431 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) 432 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text); 433 434 snprintf(newpfx, sizeof(newpfx), "%s ", pfx); 435 if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) { 436 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata); 437 438 printk("%s""section_type: general processor error\n", newpfx); 439 if (gdata->error_data_length >= sizeof(*proc_err)) 440 cper_print_proc_generic(newpfx, proc_err); 441 else 442 goto err_section_too_small; 443 } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { 444 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); 445 446 printk("%s""section_type: memory error\n", newpfx); 447 if (gdata->error_data_length >= 448 sizeof(struct cper_sec_mem_err_old)) 449 cper_print_mem(newpfx, mem_err, 450 gdata->error_data_length); 451 else 452 goto err_section_too_small; 453 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { 454 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata); 455 456 printk("%s""section_type: PCIe error\n", newpfx); 457 if (gdata->error_data_length >= sizeof(*pcie)) 458 cper_print_pcie(newpfx, pcie, gdata); 459 else 460 goto err_section_too_small; 461 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM) 462 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) { 463 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata); 464 465 printk("%ssection_type: ARM processor error\n", newpfx); 466 if (gdata->error_data_length >= sizeof(*arm_err)) 467 cper_print_proc_arm(newpfx, arm_err); 468 else 469 goto err_section_too_small; 470 #endif 471 #if defined(CONFIG_UEFI_CPER_X86) 472 } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) { 473 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata); 474 475 printk("%ssection_type: IA32/X64 processor error\n", newpfx); 476 if (gdata->error_data_length >= sizeof(*ia_err)) 477 cper_print_proc_ia(newpfx, ia_err); 478 else 479 goto err_section_too_small; 480 #endif 481 } else { 482 const void *err = acpi_hest_get_payload(gdata); 483 484 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type); 485 printk("%ssection length: %#x\n", newpfx, 486 gdata->error_data_length); 487 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err, 488 gdata->error_data_length, true); 489 } 490 491 return; 492 493 err_section_too_small: 494 pr_err(FW_WARN "error section length is too small\n"); 495 } 496 497 void cper_estatus_print(const char *pfx, 498 const struct acpi_hest_generic_status *estatus) 499 { 500 struct acpi_hest_generic_data *gdata; 501 int sec_no = 0; 502 char newpfx[64]; 503 __u16 severity; 504 505 severity = estatus->error_severity; 506 if (severity == CPER_SEV_CORRECTED) 507 printk("%s%s\n", pfx, 508 "It has been corrected by h/w " 509 "and requires no further action"); 510 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity)); 511 snprintf(newpfx, sizeof(newpfx), "%s ", pfx); 512 513 apei_estatus_for_each_section(estatus, gdata) { 514 cper_estatus_print_section(newpfx, gdata, sec_no); 515 sec_no++; 516 } 517 } 518 EXPORT_SYMBOL_GPL(cper_estatus_print); 519 520 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus) 521 { 522 if (estatus->data_length && 523 estatus->data_length < sizeof(struct acpi_hest_generic_data)) 524 return -EINVAL; 525 if (estatus->raw_data_length && 526 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length) 527 return -EINVAL; 528 529 return 0; 530 } 531 EXPORT_SYMBOL_GPL(cper_estatus_check_header); 532 533 int cper_estatus_check(const struct acpi_hest_generic_status *estatus) 534 { 535 struct acpi_hest_generic_data *gdata; 536 unsigned int data_len, gedata_len; 537 int rc; 538 539 rc = cper_estatus_check_header(estatus); 540 if (rc) 541 return rc; 542 data_len = estatus->data_length; 543 544 apei_estatus_for_each_section(estatus, gdata) { 545 gedata_len = acpi_hest_get_error_length(gdata); 546 if (gedata_len > data_len - acpi_hest_get_size(gdata)) 547 return -EINVAL; 548 data_len -= acpi_hest_get_record_size(gdata); 549 } 550 if (data_len) 551 return -EINVAL; 552 553 return 0; 554 } 555 EXPORT_SYMBOL_GPL(cper_estatus_check); 556