1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * UEFI Common Platform Error Record (CPER) support 4 * 5 * Copyright (C) 2010, Intel Corp. 6 * Author: Huang Ying <ying.huang@intel.com> 7 * 8 * CPER is the format used to describe platform hardware error by 9 * various tables, such as ERST, BERT and HEST etc. 10 * 11 * For more information about CPER, please refer to Appendix N of UEFI 12 * Specification version 2.4. 13 */ 14 15 #include <linux/kernel.h> 16 #include <linux/module.h> 17 #include <linux/time.h> 18 #include <linux/cper.h> 19 #include <linux/dmi.h> 20 #include <linux/acpi.h> 21 #include <linux/pci.h> 22 #include <linux/aer.h> 23 #include <linux/printk.h> 24 #include <linux/bcd.h> 25 #include <acpi/ghes.h> 26 #include <ras/ras_event.h> 27 28 static char rcd_decode_str[CPER_REC_LEN]; 29 30 /* 31 * CPER record ID need to be unique even after reboot, because record 32 * ID is used as index for ERST storage, while CPER records from 33 * multiple boot may co-exist in ERST. 34 */ 35 u64 cper_next_record_id(void) 36 { 37 static atomic64_t seq; 38 39 if (!atomic64_read(&seq)) { 40 time64_t time = ktime_get_real_seconds(); 41 42 /* 43 * This code is unlikely to still be needed in year 2106, 44 * but just in case, let's use a few more bits for timestamps 45 * after y2038 to be sure they keep increasing monotonically 46 * for the next few hundred years... 47 */ 48 if (time < 0x80000000) 49 atomic64_set(&seq, (ktime_get_real_seconds()) << 32); 50 else 51 atomic64_set(&seq, 0x8000000000000000ull | 52 ktime_get_real_seconds() << 24); 53 } 54 55 return atomic64_inc_return(&seq); 56 } 57 EXPORT_SYMBOL_GPL(cper_next_record_id); 58 59 static const char * const severity_strs[] = { 60 "recoverable", 61 "fatal", 62 "corrected", 63 "info", 64 }; 65 66 const char *cper_severity_str(unsigned int severity) 67 { 68 return severity < ARRAY_SIZE(severity_strs) ? 69 severity_strs[severity] : "unknown"; 70 } 71 EXPORT_SYMBOL_GPL(cper_severity_str); 72 73 /* 74 * cper_print_bits - print strings for set bits 75 * @pfx: prefix for each line, including log level and prefix string 76 * @bits: bit mask 77 * @strs: string array, indexed by bit position 78 * @strs_size: size of the string array: @strs 79 * 80 * For each set bit in @bits, print the corresponding string in @strs. 81 * If the output length is longer than 80, multiple line will be 82 * printed, with @pfx is printed at the beginning of each line. 83 */ 84 void cper_print_bits(const char *pfx, unsigned int bits, 85 const char * const strs[], unsigned int strs_size) 86 { 87 int i, len = 0; 88 const char *str; 89 char buf[84]; 90 91 for (i = 0; i < strs_size; i++) { 92 if (!(bits & (1U << i))) 93 continue; 94 str = strs[i]; 95 if (!str) 96 continue; 97 if (len && len + strlen(str) + 2 > 80) { 98 printk("%s\n", buf); 99 len = 0; 100 } 101 if (!len) 102 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str); 103 else 104 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str); 105 } 106 if (len) 107 printk("%s\n", buf); 108 } 109 110 static const char * const proc_type_strs[] = { 111 "IA32/X64", 112 "IA64", 113 "ARM", 114 }; 115 116 static const char * const proc_isa_strs[] = { 117 "IA32", 118 "IA64", 119 "X64", 120 "ARM A32/T32", 121 "ARM A64", 122 }; 123 124 const char * const cper_proc_error_type_strs[] = { 125 "cache error", 126 "TLB error", 127 "bus error", 128 "micro-architectural error", 129 }; 130 131 static const char * const proc_op_strs[] = { 132 "unknown or generic", 133 "data read", 134 "data write", 135 "instruction execution", 136 }; 137 138 static const char * const proc_flag_strs[] = { 139 "restartable", 140 "precise IP", 141 "overflow", 142 "corrected", 143 }; 144 145 static void cper_print_proc_generic(const char *pfx, 146 const struct cper_sec_proc_generic *proc) 147 { 148 if (proc->validation_bits & CPER_PROC_VALID_TYPE) 149 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, 150 proc->proc_type < ARRAY_SIZE(proc_type_strs) ? 151 proc_type_strs[proc->proc_type] : "unknown"); 152 if (proc->validation_bits & CPER_PROC_VALID_ISA) 153 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, 154 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ? 155 proc_isa_strs[proc->proc_isa] : "unknown"); 156 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { 157 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); 158 cper_print_bits(pfx, proc->proc_error_type, 159 cper_proc_error_type_strs, 160 ARRAY_SIZE(cper_proc_error_type_strs)); 161 } 162 if (proc->validation_bits & CPER_PROC_VALID_OPERATION) 163 printk("%s""operation: %d, %s\n", pfx, proc->operation, 164 proc->operation < ARRAY_SIZE(proc_op_strs) ? 165 proc_op_strs[proc->operation] : "unknown"); 166 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { 167 printk("%s""flags: 0x%02x\n", pfx, proc->flags); 168 cper_print_bits(pfx, proc->flags, proc_flag_strs, 169 ARRAY_SIZE(proc_flag_strs)); 170 } 171 if (proc->validation_bits & CPER_PROC_VALID_LEVEL) 172 printk("%s""level: %d\n", pfx, proc->level); 173 if (proc->validation_bits & CPER_PROC_VALID_VERSION) 174 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version); 175 if (proc->validation_bits & CPER_PROC_VALID_ID) 176 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id); 177 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS) 178 printk("%s""target_address: 0x%016llx\n", 179 pfx, proc->target_addr); 180 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID) 181 printk("%s""requestor_id: 0x%016llx\n", 182 pfx, proc->requestor_id); 183 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID) 184 printk("%s""responder_id: 0x%016llx\n", 185 pfx, proc->responder_id); 186 if (proc->validation_bits & CPER_PROC_VALID_IP) 187 printk("%s""IP: 0x%016llx\n", pfx, proc->ip); 188 } 189 190 static const char * const mem_err_type_strs[] = { 191 "unknown", 192 "no error", 193 "single-bit ECC", 194 "multi-bit ECC", 195 "single-symbol chipkill ECC", 196 "multi-symbol chipkill ECC", 197 "master abort", 198 "target abort", 199 "parity error", 200 "watchdog timeout", 201 "invalid address", 202 "mirror Broken", 203 "memory sparing", 204 "scrub corrected error", 205 "scrub uncorrected error", 206 "physical memory map-out event", 207 }; 208 209 const char *cper_mem_err_type_str(unsigned int etype) 210 { 211 return etype < ARRAY_SIZE(mem_err_type_strs) ? 212 mem_err_type_strs[etype] : "unknown"; 213 } 214 EXPORT_SYMBOL_GPL(cper_mem_err_type_str); 215 216 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg) 217 { 218 u32 len, n; 219 220 if (!msg) 221 return 0; 222 223 n = 0; 224 len = CPER_REC_LEN - 1; 225 if (mem->validation_bits & CPER_MEM_VALID_NODE) 226 n += scnprintf(msg + n, len - n, "node: %d ", mem->node); 227 if (mem->validation_bits & CPER_MEM_VALID_CARD) 228 n += scnprintf(msg + n, len - n, "card: %d ", mem->card); 229 if (mem->validation_bits & CPER_MEM_VALID_MODULE) 230 n += scnprintf(msg + n, len - n, "module: %d ", mem->module); 231 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) 232 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank); 233 if (mem->validation_bits & CPER_MEM_VALID_BANK) 234 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank); 235 if (mem->validation_bits & CPER_MEM_VALID_DEVICE) 236 n += scnprintf(msg + n, len - n, "device: %d ", mem->device); 237 if (mem->validation_bits & CPER_MEM_VALID_ROW) 238 n += scnprintf(msg + n, len - n, "row: %d ", mem->row); 239 if (mem->validation_bits & CPER_MEM_VALID_COLUMN) 240 n += scnprintf(msg + n, len - n, "column: %d ", mem->column); 241 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) 242 n += scnprintf(msg + n, len - n, "bit_position: %d ", 243 mem->bit_pos); 244 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) 245 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ", 246 mem->requestor_id); 247 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) 248 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ", 249 mem->responder_id); 250 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) 251 scnprintf(msg + n, len - n, "target_id: 0x%016llx ", 252 mem->target_id); 253 254 msg[n] = '\0'; 255 return n; 256 } 257 258 static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) 259 { 260 u32 len, n; 261 const char *bank = NULL, *device = NULL; 262 263 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) 264 return 0; 265 266 n = 0; 267 len = CPER_REC_LEN - 1; 268 dmi_memdev_name(mem->mem_dev_handle, &bank, &device); 269 if (bank && device) 270 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device); 271 else 272 n = snprintf(msg, len, 273 "DIMM location: not present. DMI handle: 0x%.4x ", 274 mem->mem_dev_handle); 275 276 msg[n] = '\0'; 277 return n; 278 } 279 280 void cper_mem_err_pack(const struct cper_sec_mem_err *mem, 281 struct cper_mem_err_compact *cmem) 282 { 283 cmem->validation_bits = mem->validation_bits; 284 cmem->node = mem->node; 285 cmem->card = mem->card; 286 cmem->module = mem->module; 287 cmem->bank = mem->bank; 288 cmem->device = mem->device; 289 cmem->row = mem->row; 290 cmem->column = mem->column; 291 cmem->bit_pos = mem->bit_pos; 292 cmem->requestor_id = mem->requestor_id; 293 cmem->responder_id = mem->responder_id; 294 cmem->target_id = mem->target_id; 295 cmem->rank = mem->rank; 296 cmem->mem_array_handle = mem->mem_array_handle; 297 cmem->mem_dev_handle = mem->mem_dev_handle; 298 } 299 300 const char *cper_mem_err_unpack(struct trace_seq *p, 301 struct cper_mem_err_compact *cmem) 302 { 303 const char *ret = trace_seq_buffer_ptr(p); 304 305 if (cper_mem_err_location(cmem, rcd_decode_str)) 306 trace_seq_printf(p, "%s", rcd_decode_str); 307 if (cper_dimm_err_location(cmem, rcd_decode_str)) 308 trace_seq_printf(p, "%s", rcd_decode_str); 309 trace_seq_putc(p, '\0'); 310 311 return ret; 312 } 313 314 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, 315 int len) 316 { 317 struct cper_mem_err_compact cmem; 318 319 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ 320 if (len == sizeof(struct cper_sec_mem_err_old) && 321 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) { 322 pr_err(FW_WARN "valid bits set for fields beyond structure\n"); 323 return; 324 } 325 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) 326 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); 327 if (mem->validation_bits & CPER_MEM_VALID_PA) 328 printk("%s""physical_address: 0x%016llx\n", 329 pfx, mem->physical_addr); 330 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) 331 printk("%s""physical_address_mask: 0x%016llx\n", 332 pfx, mem->physical_addr_mask); 333 cper_mem_err_pack(mem, &cmem); 334 if (cper_mem_err_location(&cmem, rcd_decode_str)) 335 printk("%s%s\n", pfx, rcd_decode_str); 336 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { 337 u8 etype = mem->error_type; 338 printk("%s""error_type: %d, %s\n", pfx, etype, 339 cper_mem_err_type_str(etype)); 340 } 341 if (cper_dimm_err_location(&cmem, rcd_decode_str)) 342 printk("%s%s\n", pfx, rcd_decode_str); 343 } 344 345 static const char * const pcie_port_type_strs[] = { 346 "PCIe end point", 347 "legacy PCI end point", 348 "unknown", 349 "unknown", 350 "root port", 351 "upstream switch port", 352 "downstream switch port", 353 "PCIe to PCI/PCI-X bridge", 354 "PCI/PCI-X to PCIe bridge", 355 "root complex integrated endpoint device", 356 "root complex event collector", 357 }; 358 359 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, 360 const struct acpi_hest_generic_data *gdata) 361 { 362 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) 363 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, 364 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ? 365 pcie_port_type_strs[pcie->port_type] : "unknown"); 366 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) 367 printk("%s""version: %d.%d\n", pfx, 368 pcie->version.major, pcie->version.minor); 369 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS) 370 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx, 371 pcie->command, pcie->status); 372 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) { 373 const __u8 *p; 374 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx, 375 pcie->device_id.segment, pcie->device_id.bus, 376 pcie->device_id.device, pcie->device_id.function); 377 printk("%s""slot: %d\n", pfx, 378 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT); 379 printk("%s""secondary_bus: 0x%02x\n", pfx, 380 pcie->device_id.secondary_bus); 381 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx, 382 pcie->device_id.vendor_id, pcie->device_id.device_id); 383 p = pcie->device_id.class_code; 384 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]); 385 } 386 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER) 387 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx, 388 pcie->serial_number.lower, pcie->serial_number.upper); 389 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS) 390 printk( 391 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", 392 pfx, pcie->bridge.secondary_status, pcie->bridge.control); 393 } 394 395 static void cper_print_tstamp(const char *pfx, 396 struct acpi_hest_generic_data_v300 *gdata) 397 { 398 __u8 hour, min, sec, day, mon, year, century, *timestamp; 399 400 if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) { 401 timestamp = (__u8 *)&(gdata->time_stamp); 402 sec = bcd2bin(timestamp[0]); 403 min = bcd2bin(timestamp[1]); 404 hour = bcd2bin(timestamp[2]); 405 day = bcd2bin(timestamp[4]); 406 mon = bcd2bin(timestamp[5]); 407 year = bcd2bin(timestamp[6]); 408 century = bcd2bin(timestamp[7]); 409 410 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx, 411 (timestamp[3] & 0x1 ? "precise " : "imprecise "), 412 century, year, mon, day, hour, min, sec); 413 } 414 } 415 416 static void 417 cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, 418 int sec_no) 419 { 420 guid_t *sec_type = (guid_t *)gdata->section_type; 421 __u16 severity; 422 char newpfx[64]; 423 424 if (acpi_hest_get_version(gdata) >= 3) 425 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata); 426 427 severity = gdata->error_severity; 428 printk("%s""Error %d, type: %s\n", pfx, sec_no, 429 cper_severity_str(severity)); 430 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) 431 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id); 432 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) 433 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text); 434 435 snprintf(newpfx, sizeof(newpfx), "%s ", pfx); 436 if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) { 437 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata); 438 439 printk("%s""section_type: general processor error\n", newpfx); 440 if (gdata->error_data_length >= sizeof(*proc_err)) 441 cper_print_proc_generic(newpfx, proc_err); 442 else 443 goto err_section_too_small; 444 } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { 445 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); 446 447 printk("%s""section_type: memory error\n", newpfx); 448 if (gdata->error_data_length >= 449 sizeof(struct cper_sec_mem_err_old)) 450 cper_print_mem(newpfx, mem_err, 451 gdata->error_data_length); 452 else 453 goto err_section_too_small; 454 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { 455 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata); 456 457 printk("%s""section_type: PCIe error\n", newpfx); 458 if (gdata->error_data_length >= sizeof(*pcie)) 459 cper_print_pcie(newpfx, pcie, gdata); 460 else 461 goto err_section_too_small; 462 #if defined(CONFIG_ARM64) || defined(CONFIG_ARM) 463 } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { 464 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata); 465 466 printk("%ssection_type: ARM processor error\n", newpfx); 467 if (gdata->error_data_length >= sizeof(*arm_err)) 468 cper_print_proc_arm(newpfx, arm_err); 469 else 470 goto err_section_too_small; 471 #endif 472 #if defined(CONFIG_UEFI_CPER_X86) 473 } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) { 474 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata); 475 476 printk("%ssection_type: IA32/X64 processor error\n", newpfx); 477 if (gdata->error_data_length >= sizeof(*ia_err)) 478 cper_print_proc_ia(newpfx, ia_err); 479 else 480 goto err_section_too_small; 481 #endif 482 } else { 483 const void *err = acpi_hest_get_payload(gdata); 484 485 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type); 486 printk("%ssection length: %#x\n", newpfx, 487 gdata->error_data_length); 488 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err, 489 gdata->error_data_length, true); 490 } 491 492 return; 493 494 err_section_too_small: 495 pr_err(FW_WARN "error section length is too small\n"); 496 } 497 498 void cper_estatus_print(const char *pfx, 499 const struct acpi_hest_generic_status *estatus) 500 { 501 struct acpi_hest_generic_data *gdata; 502 int sec_no = 0; 503 char newpfx[64]; 504 __u16 severity; 505 506 severity = estatus->error_severity; 507 if (severity == CPER_SEV_CORRECTED) 508 printk("%s%s\n", pfx, 509 "It has been corrected by h/w " 510 "and requires no further action"); 511 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity)); 512 snprintf(newpfx, sizeof(newpfx), "%s ", pfx); 513 514 apei_estatus_for_each_section(estatus, gdata) { 515 cper_estatus_print_section(newpfx, gdata, sec_no); 516 sec_no++; 517 } 518 } 519 EXPORT_SYMBOL_GPL(cper_estatus_print); 520 521 int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus) 522 { 523 if (estatus->data_length && 524 estatus->data_length < sizeof(struct acpi_hest_generic_data)) 525 return -EINVAL; 526 if (estatus->raw_data_length && 527 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length) 528 return -EINVAL; 529 530 return 0; 531 } 532 EXPORT_SYMBOL_GPL(cper_estatus_check_header); 533 534 int cper_estatus_check(const struct acpi_hest_generic_status *estatus) 535 { 536 struct acpi_hest_generic_data *gdata; 537 unsigned int data_len, record_size; 538 int rc; 539 540 rc = cper_estatus_check_header(estatus); 541 if (rc) 542 return rc; 543 544 data_len = estatus->data_length; 545 546 apei_estatus_for_each_section(estatus, gdata) { 547 if (sizeof(struct acpi_hest_generic_data) > data_len) 548 return -EINVAL; 549 550 record_size = acpi_hest_get_record_size(gdata); 551 if (record_size > data_len) 552 return -EINVAL; 553 554 data_len -= record_size; 555 } 556 if (data_len) 557 return -EINVAL; 558 559 return 0; 560 } 561 EXPORT_SYMBOL_GPL(cper_estatus_check); 562