1 /* 2 * Copyright (c) 2018 Intel Corporation. 3 * Copyright (c) 2018-present Facebook. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 #include <boost/algorithm/string/join.hpp> 19 #include <boost/container/flat_map.hpp> 20 #include <ipmid/api.hpp> 21 #include <nlohmann/json.hpp> 22 #include <phosphor-logging/log.hpp> 23 #include <sdbusplus/message/types.hpp> 24 #include <sdbusplus/timer.hpp> 25 #include <storagecommands.hpp> 26 27 #include <fstream> 28 #include <iostream> 29 #include <sstream> 30 #include <thread> 31 32 enum class MemErrType 33 { 34 memTrainErr = 0, 35 memPmicErr = 7 36 }; 37 38 enum class PostEvtType 39 { 40 pxeBootFail = 0, 41 httpBootFail = 6, 42 getCertFail = 7, 43 amdAblFail = 10 44 }; 45 46 enum class PcieEvtType 47 { 48 dpc = 0 49 }; 50 51 enum class MemEvtType 52 { 53 ppr = 0, 54 adddc = 5, 55 noDimm = 7 56 }; 57 58 //---------------------------------------------------------------------- 59 // Platform specific functions for storing app data 60 //---------------------------------------------------------------------- 61 62 static std::string byteToStr(uint8_t byte) 63 { 64 std::stringstream ss; 65 66 ss << std::hex << std::uppercase << std::setfill('0'); 67 ss << std::setw(2) << (int)byte; 68 69 return ss.str(); 70 } 71 72 static void toHexStr(std::vector<uint8_t>& bytes, std::string& hexStr) 73 { 74 std::stringstream stream; 75 stream << std::hex << std::uppercase << std::setfill('0'); 76 for (const uint8_t byte : bytes) 77 { 78 stream << std::setw(2) << static_cast<int>(byte); 79 } 80 hexStr = stream.str(); 81 } 82 83 static int fromHexStr(const std::string hexStr, std::vector<uint8_t>& data) 84 { 85 for (unsigned int i = 0; i < hexStr.size(); i += 2) 86 { 87 try 88 { 89 data.push_back(static_cast<uint8_t>( 90 std::stoul(hexStr.substr(i, 2), nullptr, 16))); 91 } 92 catch (const std::invalid_argument& e) 93 { 94 phosphor::logging::log<phosphor::logging::level::ERR>(e.what()); 95 return -1; 96 } 97 catch (const std::out_of_range& e) 98 { 99 phosphor::logging::log<phosphor::logging::level::ERR>(e.what()); 100 return -1; 101 } 102 } 103 return 0; 104 } 105 106 namespace fb_oem::ipmi::sel 107 { 108 109 class SELData 110 { 111 private: 112 nlohmann::json selDataObj; 113 114 void flush() 115 { 116 std::ofstream file(SEL_JSON_DATA_FILE); 117 file << selDataObj; 118 file.close(); 119 } 120 121 void init() 122 { 123 selDataObj[KEY_SEL_VER] = 0x51; 124 selDataObj[KEY_SEL_COUNT] = 0; 125 selDataObj[KEY_ADD_TIME] = 0xFFFFFFFF; 126 selDataObj[KEY_ERASE_TIME] = 0xFFFFFFFF; 127 selDataObj[KEY_OPER_SUPP] = 0x02; 128 /* Spec indicates that more than 64kB is free */ 129 selDataObj[KEY_FREE_SPACE] = 0xFFFF; 130 } 131 132 void writeEmptyJson() 133 { 134 selDataObj = nlohmann::json::object(); // Create an empty JSON object 135 std::ofstream outFile(SEL_JSON_DATA_FILE); 136 if (outFile) 137 { 138 // Write empty JSON object to the file 139 outFile << selDataObj.dump(4); 140 outFile.close(); 141 } 142 else 143 { 144 lg2::info("Failed to create SEL JSON file with empty JSON."); 145 } 146 } 147 148 public: 149 SELData() 150 { 151 /* Get App data stored in json file */ 152 std::ifstream file(SEL_JSON_DATA_FILE); 153 if (file) 154 { 155 try 156 { 157 file >> selDataObj; 158 } 159 catch (const nlohmann::json::parse_error& e) 160 { 161 lg2::error("Error parsing SEL JSON file: {ERROR}", "ERROR", e); 162 writeEmptyJson(); 163 init(); // Initialize to default values 164 } 165 file.close(); 166 } 167 else 168 { 169 lg2::info("Failed to open SEL JSON file."); 170 writeEmptyJson(); 171 init(); 172 } 173 174 /* Initialize SelData object if no entries. */ 175 if (selDataObj.find(KEY_SEL_COUNT) == selDataObj.end()) 176 { 177 init(); 178 } 179 } 180 181 int clear() 182 { 183 /* Clear the complete Sel Json object */ 184 selDataObj.clear(); 185 /* Reinitialize it with basic data */ 186 init(); 187 /* Save the erase time */ 188 struct timespec selTime = {}; 189 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0) 190 { 191 return -1; 192 } 193 selDataObj[KEY_ERASE_TIME] = selTime.tv_sec; 194 flush(); 195 return 0; 196 } 197 198 uint32_t getCount() 199 { 200 return selDataObj[KEY_SEL_COUNT]; 201 } 202 203 void getInfo(GetSELInfoData& info) 204 { 205 info.selVersion = selDataObj[KEY_SEL_VER]; 206 info.entries = selDataObj[KEY_SEL_COUNT]; 207 info.freeSpace = selDataObj[KEY_FREE_SPACE]; 208 info.addTimeStamp = selDataObj[KEY_ADD_TIME]; 209 info.eraseTimeStamp = selDataObj[KEY_ERASE_TIME]; 210 info.operationSupport = selDataObj[KEY_OPER_SUPP]; 211 } 212 213 int getEntry(uint32_t index, std::string& rawStr) 214 { 215 std::stringstream ss; 216 ss << std::hex; 217 ss << std::setw(2) << std::setfill('0') << index; 218 219 /* Check or the requested SEL Entry, if record is available */ 220 if (selDataObj.find(ss.str()) == selDataObj.end()) 221 { 222 return -1; 223 } 224 225 rawStr = selDataObj[ss.str()][KEY_SEL_ENTRY_RAW]; 226 return 0; 227 } 228 229 int addEntry(std::string keyStr) 230 { 231 struct timespec selTime = {}; 232 233 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0) 234 { 235 return -1; 236 } 237 238 selDataObj[KEY_ADD_TIME] = selTime.tv_sec; 239 240 int selCount = selDataObj[KEY_SEL_COUNT]; 241 selDataObj[KEY_SEL_COUNT] = ++selCount; 242 243 std::stringstream ss; 244 ss << std::hex; 245 ss << std::setw(2) << std::setfill('0') << selCount; 246 247 selDataObj[ss.str()][KEY_SEL_ENTRY_RAW] = keyStr; 248 flush(); 249 return selCount; 250 } 251 }; 252 253 /* 254 * A Function to parse common SEL message, a helper function 255 * for parseStdSel. 256 * 257 * Note that this function __CANNOT__ be overridden. 258 * To add board specific routine, please override parseStdSel. 259 */ 260 261 /*Used by decoding ME event*/ 262 std::vector<std::string> nmDomName = { 263 "Entire Platform", "CPU Subsystem", 264 "Memory Subsystem", "HW Protection", 265 "High Power I/O subsystem", "Unknown"}; 266 267 /* Default log message for unknown type */ 268 static void logDefault(uint8_t*, std::string& errLog) 269 { 270 errLog = "Unknown"; 271 } 272 273 static void logSysEvent(uint8_t* data, std::string& errLog) 274 { 275 if (data[0] == 0xE5) 276 { 277 errLog = "Cause of Time change - "; 278 switch (data[2]) 279 { 280 case 0x00: 281 errLog += "NTP"; 282 break; 283 case 0x01: 284 errLog += "Host RTL"; 285 break; 286 case 0x02: 287 errLog += "Set SEL time cmd"; 288 break; 289 case 0x03: 290 errLog += "Set SEL time UTC offset cmd"; 291 break; 292 default: 293 errLog += "Unknown"; 294 } 295 296 if (data[1] == 0x00) 297 errLog += " - First Time"; 298 else if (data[1] == 0x80) 299 errLog += " - Second Time"; 300 } 301 else 302 { 303 errLog = "Unknown"; 304 } 305 } 306 307 static void logThermalEvent(uint8_t* data, std::string& errLog) 308 { 309 if (data[0] == 0x1) 310 { 311 errLog = "Limit Exceeded"; 312 } 313 else 314 { 315 errLog = "Unknown"; 316 } 317 } 318 319 static void logCritIrq(uint8_t* data, std::string& errLog) 320 { 321 if (data[0] == 0x0) 322 { 323 errLog = "NMI / Diagnostic Interrupt"; 324 } 325 else if (data[0] == 0x03) 326 { 327 errLog = "Software NMI"; 328 } 329 else 330 { 331 errLog = "Unknown"; 332 } 333 334 /* TODO: Call add_cri_sel for CRITICAL_IRQ */ 335 } 336 337 static void logPostErr(uint8_t* data, std::string& errLog) 338 { 339 if ((data[0] & 0x0F) == 0x0) 340 { 341 errLog = "System Firmware Error"; 342 } 343 else 344 { 345 errLog = "Unknown"; 346 } 347 348 if (((data[0] >> 6) & 0x03) == 0x3) 349 { 350 // TODO: Need to implement IPMI spec based Post Code 351 errLog += ", IPMI Post Code"; 352 } 353 else if (((data[0] >> 6) & 0x03) == 0x2) 354 { 355 errLog += ", OEM Post Code 0x" + byteToStr(data[2]) + 356 byteToStr(data[1]); 357 358 switch ((data[2] << 8) | data[1]) 359 { 360 case 0xA105: 361 errLog += ", BMC Failed (No Response)"; 362 break; 363 case 0xA106: 364 errLog += ", BMC Failed (Self Test Fail)"; 365 break; 366 case 0xA10A: 367 errLog += ", System Firmware Corruption Detected"; 368 break; 369 case 0xA10B: 370 errLog += ", TPM Self-Test FAIL Detected"; 371 } 372 } 373 } 374 375 static void logMchChkErr(uint8_t* data, std::string& errLog) 376 { 377 /* TODO: Call add_cri_sel for CRITICAL_IRQ */ 378 switch (data[0] & 0x0F) 379 { 380 case 0x0B: 381 switch ((data[1] >> 5) & 0x03) 382 { 383 case 0x00: 384 errLog = "Uncorrected Recoverable Error"; 385 break; 386 case 0x01: 387 errLog = "Uncorrected Thread Fatal Error"; 388 break; 389 case 0x02: 390 errLog = "Uncorrected System Fatal Error"; 391 break; 392 default: 393 errLog = "Unknown"; 394 } 395 break; 396 case 0x0C: 397 switch ((data[1] >> 5) & 0x03) 398 { 399 case 0x00: 400 errLog = "Correctable Error"; 401 break; 402 case 0x01: 403 errLog = "Deferred Error"; 404 break; 405 default: 406 errLog = "Unknown"; 407 } 408 break; 409 default: 410 errLog = "Unknown"; 411 } 412 413 errLog += ", Machine Check bank Number " + std::to_string(data[1]) + 414 ", CPU " + std::to_string(data[2] >> 5) + ", Core " + 415 std::to_string(data[2] & 0x1F); 416 } 417 418 static void logPcieErr(uint8_t* data, std::string& errLog) 419 { 420 std::stringstream tmp1, tmp2; 421 tmp1 << std::hex << std::uppercase << std::setfill('0'); 422 tmp2 << std::hex << std::uppercase << std::setfill('0'); 423 tmp1 << " (Bus " << std::setw(2) << (int)(data[2]) << " / Dev " 424 << std::setw(2) << (int)(data[1] >> 3) << " / Fun " << std::setw(2) 425 << (int)(data[1] & 0x7) << ")"; 426 427 switch (data[0] & 0xF) 428 { 429 case 0x4: 430 errLog = "PCI PERR" + tmp1.str(); 431 break; 432 case 0x5: 433 errLog = "PCI SERR" + tmp1.str(); 434 break; 435 case 0x7: 436 errLog = "Correctable" + tmp1.str(); 437 break; 438 case 0x8: 439 errLog = "Uncorrectable" + tmp1.str(); 440 break; 441 case 0xA: 442 errLog = "Bus Fatal" + tmp1.str(); 443 break; 444 case 0xD: 445 { 446 uint32_t venId = (uint32_t)data[1] << 8 | (uint32_t)data[2]; 447 tmp2 << "Vendor ID: 0x" << std::setw(4) << venId; 448 errLog = tmp2.str(); 449 } 450 break; 451 case 0xE: 452 { 453 uint32_t devId = (uint32_t)data[1] << 8 | (uint32_t)data[2]; 454 tmp2 << "Device ID: 0x" << std::setw(4) << devId; 455 errLog = tmp2.str(); 456 } 457 break; 458 case 0xF: 459 tmp2 << "Error ID from downstream: 0x" << std::setw(2) 460 << (int)(data[1]) << std::setw(2) << (int)(data[2]); 461 errLog = tmp2.str(); 462 break; 463 default: 464 errLog = "Unknown"; 465 } 466 } 467 468 static void logIioErr(uint8_t* data, std::string& errLog) 469 { 470 std::vector<std::string> tmpStr = { 471 "IRP0", "IRP1", " IIO-Core", "VT-d", "Intel Quick Data", 472 "Misc", " DMA", "ITC", "OTC", "CI"}; 473 474 if ((data[0] & 0xF) == 0) 475 { 476 errLog += "CPU " + std::to_string(data[2] >> 5) + ", Error ID 0x" + 477 byteToStr(data[1]) + " - "; 478 479 if ((data[2] & 0xF) <= 0x9) 480 { 481 errLog += tmpStr[(data[2] & 0xF)]; 482 } 483 else 484 { 485 errLog += "Reserved"; 486 } 487 } 488 else 489 { 490 errLog = "Unknown"; 491 } 492 } 493 494 [[maybe_unused]] static void logMemErr(uint8_t* dataPtr, std::string& errLog) 495 { 496 uint8_t snrType = dataPtr[0]; 497 uint8_t snrNum = dataPtr[1]; 498 uint8_t* data = &(dataPtr[3]); 499 500 /* TODO: add pal_add_cri_sel */ 501 502 if (snrNum == memoryEccError) 503 { 504 /* SEL from MEMORY_ECC_ERR Sensor */ 505 switch (data[0] & 0x0F) 506 { 507 case 0x0: 508 if (snrType == 0x0C) 509 { 510 errLog = "Correctable"; 511 } 512 else if (snrType == 0x10) 513 { 514 errLog = "Correctable ECC error Logging Disabled"; 515 } 516 break; 517 case 0x1: 518 errLog = "Uncorrectable"; 519 break; 520 case 0x5: 521 errLog = "Correctable ECC error Logging Limit Disabled"; 522 break; 523 default: 524 errLog = "Unknown"; 525 } 526 } 527 else if (snrNum == memoryErrLogDIS) 528 { 529 // SEL from MEMORY_ERR_LOG_DIS Sensor 530 if ((data[0] & 0x0F) == 0x0) 531 { 532 errLog = "Correctable Memory Error Logging Disabled"; 533 } 534 else 535 { 536 errLog = "Unknown"; 537 } 538 } 539 else 540 { 541 errLog = "Unknown"; 542 return; 543 } 544 545 /* Common routine for both MEM_ECC_ERR and MEMORY_ERR_LOG_DIS */ 546 547 errLog += " (DIMM " + byteToStr(data[2]) + ") Logical Rank " + 548 std::to_string(data[1] & 0x03); 549 550 /* DIMM number (data[2]): 551 * Bit[7:5]: Socket number (Range: 0-7) 552 * Bit[4:3]: Channel number (Range: 0-3) 553 * Bit[2:0]: DIMM number (Range: 0-7) 554 */ 555 556 /* TODO: Verify these bits */ 557 std::string cpuStr = "CPU# " + std::to_string((data[2] & 0xE0) >> 5); 558 std::string chStr = "CHN# " + std::to_string((data[2] & 0x18) >> 3); 559 std::string dimmStr = "DIMM#" + std::to_string(data[2] & 0x7); 560 561 switch ((data[1] & 0xC) >> 2) 562 { 563 case 0x0: 564 { 565 /* All Info Valid */ 566 [[maybe_unused]] uint8_t chnNum = (data[2] & 0x1C) >> 2; 567 [[maybe_unused]] uint8_t dimmNum = data[2] & 0x3; 568 569 /* TODO: If critical SEL logging is available, do it */ 570 if (snrType == 0x0C) 571 { 572 if ((data[0] & 0x0F) == 0x0) 573 { 574 /* TODO: add_cri_sel */ 575 /* "DIMM"+ 'A'+ chnNum + dimmNum + " ECC err,FRU:1" 576 */ 577 } 578 else if ((data[0] & 0x0F) == 0x1) 579 { 580 /* TODO: add_cri_sel */ 581 /* "DIMM"+ 'A'+ chnNum + dimmNum + " UECC err,FRU:1" 582 */ 583 } 584 } 585 /* Continue to parse the error into a string. All Info Valid 586 */ 587 errLog += " (" + cpuStr + ", " + chStr + ", " + dimmStr + ")"; 588 } 589 590 break; 591 case 0x1: 592 593 /* DIMM info not valid */ 594 errLog += " (" + cpuStr + ", " + chStr + ")"; 595 break; 596 case 0x2: 597 598 /* CHN info not valid */ 599 errLog += " (" + cpuStr + ", " + dimmStr + ")"; 600 break; 601 case 0x3: 602 603 /* CPU info not valid */ 604 errLog += " (" + chStr + ", " + dimmStr + ")"; 605 break; 606 } 607 } 608 609 static void logPwrErr(uint8_t* data, std::string& errLog) 610 { 611 if (data[0] == 0x1) 612 { 613 errLog = "SYS_PWROK failure"; 614 /* Also try logging to Critical log file, if available */ 615 /* "SYS_PWROK failure,FRU:1" */ 616 } 617 else if (data[0] == 0x2) 618 { 619 errLog = "PCH_PWROK failure"; 620 /* Also try logging to Critical log file, if available */ 621 /* "PCH_PWROK failure,FRU:1" */ 622 } 623 else 624 { 625 errLog = "Unknown"; 626 } 627 } 628 629 static void logCatErr(uint8_t* data, std::string& errLog) 630 { 631 if (data[0] == 0x0) 632 { 633 errLog = "IERR/CATERR"; 634 /* Also try logging to Critical log file, if available */ 635 /* "IERR,FRU:1 */ 636 } 637 else if (data[0] == 0xB) 638 { 639 errLog = "MCERR/CATERR"; 640 /* Also try logging to Critical log file, if available */ 641 /* "MCERR,FRU:1 */ 642 } 643 else 644 { 645 errLog = "Unknown"; 646 } 647 } 648 649 static void logDimmHot(uint8_t* data, std::string& errLog) 650 { 651 if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x01FFFF) 652 { 653 errLog = "SOC MEMHOT"; 654 } 655 else 656 { 657 errLog = "Unknown"; 658 /* Also try logging to Critical log file, if available */ 659 /* ""CPU_DIMM_HOT %s,FRU:1" */ 660 } 661 } 662 663 static void logSwNMI(uint8_t* data, std::string& errLog) 664 { 665 if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x03FFFF) 666 { 667 errLog = "Software NMI"; 668 } 669 else 670 { 671 errLog = "Unknown SW NMI"; 672 } 673 } 674 675 static void logCPUThermalSts(uint8_t* data, std::string& errLog) 676 { 677 switch (data[0]) 678 { 679 case 0x0: 680 errLog = "CPU Critical Temperature"; 681 break; 682 case 0x1: 683 errLog = "PROCHOT#"; 684 break; 685 case 0x2: 686 errLog = "TCC Activation"; 687 break; 688 default: 689 errLog = "Unknown"; 690 } 691 } 692 693 static void logMEPwrState(uint8_t* data, std::string& errLog) 694 { 695 switch (data[0]) 696 { 697 case 0: 698 errLog = "RUNNING"; 699 break; 700 case 2: 701 errLog = "POWER_OFF"; 702 break; 703 default: 704 errLog = "Unknown[" + std::to_string(data[0]) + "]"; 705 break; 706 } 707 } 708 709 static void logSPSFwHealth(uint8_t* data, std::string& errLog) 710 { 711 if ((data[0] & 0x0F) == 0x00) 712 { 713 const std::vector<std::string> tmpStr = { 714 "Recovery GPIO forced", 715 "Image execution failed", 716 "Flash erase error", 717 "Flash state information", 718 "Internal error", 719 "BMC did not respond", 720 "Direct Flash update", 721 "Manufacturing error", 722 "Automatic Restore to Factory Presets", 723 "Firmware Exception", 724 "Flash Wear-Out Protection Warning", 725 "Unknown", 726 "Unknown", 727 "DMI interface error", 728 "MCTP interface error", 729 "Auto-configuration finished", 730 "Unsupported Segment Defined Feature", 731 "Unknown", 732 "CPU Debug Capability Disabled", 733 "UMA operation error"}; 734 735 if (data[1] < 0x14) 736 { 737 errLog = tmpStr[data[1]]; 738 } 739 else 740 { 741 errLog = "Unknown"; 742 } 743 } 744 else if ((data[0] & 0x0F) == 0x01) 745 { 746 errLog = "SMBus link failure"; 747 } 748 else 749 { 750 errLog = "Unknown"; 751 } 752 } 753 754 static void logNmExcA(uint8_t* data, std::string& errLog) 755 { 756 /*NM4.0 #550710, Revision 1.95, and turn to p.155*/ 757 if (data[0] == 0xA8) 758 { 759 errLog = "Policy Correction Time Exceeded"; 760 } 761 else 762 { 763 errLog = "Unknown"; 764 } 765 } 766 767 static void logPCHThermal(uint8_t* data, std::string& errLog) 768 { 769 const std::vector<std::string> thresEvtName = { 770 "Lower Non-critical", 771 "Unknown", 772 "Lower Critical", 773 "Unknown", 774 "Lower Non-recoverable", 775 "Unknown", 776 "Unknown", 777 "Upper Non-critical", 778 "Unknown", 779 "Upper Critical", 780 "Unknown", 781 "Upper Non-recoverable"}; 782 783 if ((data[0] & 0x0f) < 12) 784 { 785 errLog = thresEvtName[(data[0] & 0x0f)]; 786 } 787 else 788 { 789 errLog = "Unknown"; 790 } 791 792 errLog += ", curr_val: " + std::to_string(data[1]) + 793 " C, thresh_val: " + std::to_string(data[2]) + " C"; 794 } 795 796 static void logNmHealth(uint8_t* data, std::string& errLog) 797 { 798 std::vector<std::string> nmErrType = { 799 "Unknown", 800 "Unknown", 801 "Unknown", 802 "Unknown", 803 "Unknown", 804 "Unknown", 805 "Unknown", 806 "Extended Telemetry Device Reading Failure", 807 "Outlet Temperature Reading Failure", 808 "Volumetric Airflow Reading Failure", 809 "Policy Misconfiguration", 810 "Power Sensor Reading Failure", 811 "Inlet Temperature Reading Failure", 812 "Host Communication Error", 813 "Real-time Clock Synchronization Failure", 814 "Platform Shutdown Initiated by Intel NM Policy", 815 "Unknown"}; 816 uint8_t nmTypeIdx = (data[0] & 0xf); 817 uint8_t domIdx = (data[1] & 0xf); 818 uint8_t errIdx = ((data[1] >> 4) & 0xf); 819 820 if (nmTypeIdx == 2) 821 { 822 errLog = "SensorIntelNM"; 823 } 824 else 825 { 826 errLog = "Unknown"; 827 } 828 829 errLog += ", Domain:" + nmDomName[domIdx] + ", ErrType:" + 830 nmErrType[errIdx] + ", Err:0x" + byteToStr(data[2]); 831 } 832 833 static void logNmCap(uint8_t* data, std::string& errLog) 834 { 835 const std::vector<std::string> nmCapStsStr = {"Not Available", "Available"}; 836 if (data[0] & 0x7) // BIT1=policy, BIT2=monitoring, BIT3=pwr 837 // limit and the others are reserved 838 { 839 errLog = "PolicyInterface:" + nmCapStsStr[BIT(data[0], 0)] + 840 ",Monitoring:" + nmCapStsStr[BIT(data[0], 1)] + 841 ",PowerLimit:" + nmCapStsStr[BIT(data[0], 2)]; 842 } 843 else 844 { 845 errLog = "Unknown"; 846 } 847 } 848 849 static void logNmThreshold(uint8_t* data, std::string& errLog) 850 { 851 uint8_t thresNum = (data[0] & 0x3); 852 uint8_t domIdx = (data[1] & 0xf); 853 uint8_t polId = data[2]; 854 uint8_t polEvtIdx = BIT(data[0], 3); 855 const std::vector<std::string> polEvtStr = { 856 "Threshold Exceeded", "Policy Correction Time Exceeded"}; 857 858 errLog = "Threshold Number:" + std::to_string(thresNum) + "-" + 859 polEvtStr[polEvtIdx] + ", Domain:" + nmDomName[domIdx] + 860 ", PolicyID:0x" + byteToStr(polId); 861 } 862 863 static void logPwrThreshold(uint8_t* data, std::string& errLog) 864 { 865 if (data[0] == 0x00) 866 { 867 errLog = "Limit Not Exceeded"; 868 } 869 else if (data[0] == 0x01) 870 { 871 errLog = "Limit Exceeded"; 872 } 873 else 874 { 875 errLog = "Unknown"; 876 } 877 } 878 879 static void logMSMI(uint8_t* data, std::string& errLog) 880 { 881 if (data[0] == 0x0) 882 { 883 errLog = "IERR/MSMI"; 884 } 885 else if (data[0] == 0x0B) 886 { 887 errLog = "MCERR/MSMI"; 888 } 889 else 890 { 891 errLog = "Unknown"; 892 } 893 } 894 895 static void logHprWarn(uint8_t* data, std::string& errLog) 896 { 897 if (data[2] == 0x01) 898 { 899 if (data[1] == 0xFF) 900 { 901 errLog = "Infinite Time"; 902 } 903 else 904 { 905 errLog = std::to_string(data[1]) + " minutes"; 906 } 907 } 908 else 909 { 910 errLog = "Unknown"; 911 } 912 } 913 914 static const boost::container::flat_map< 915 uint8_t, 916 std::pair<std::string, std::function<void(uint8_t*, std::string&)>>> 917 sensorNameTable = { 918 {0xE9, {"SYSTEM_EVENT", logSysEvent}}, 919 {0x7D, {"THERM_THRESH_EVT", logThermalEvent}}, 920 {0xAA, {"BUTTON", logDefault}}, 921 {0xAB, {"POWER_STATE", logDefault}}, 922 {0xEA, {"CRITICAL_IRQ", logCritIrq}}, 923 {0x2B, {"POST_ERROR", logPostErr}}, 924 {0x40, {"MACHINE_CHK_ERR", logMchChkErr}}, 925 {0x41, {"PCIE_ERR", logPcieErr}}, 926 {0x43, {"IIO_ERR", logIioErr}}, 927 {0X63, {"MEMORY_ECC_ERR", logDefault}}, 928 {0X87, {"MEMORY_ERR_LOG_DIS", logDefault}}, 929 {0X51, {"PROCHOT_EXT", logDefault}}, 930 {0X56, {"PWR_ERR", logPwrErr}}, 931 {0xE6, {"CATERR_A", logCatErr}}, 932 {0xEB, {"CATERR_B", logCatErr}}, 933 {0xB3, {"CPU_DIMM_HOT", logDimmHot}}, 934 {0x90, {"SOFTWARE_NMI", logSwNMI}}, 935 {0x1C, {"CPU0_THERM_STATUS", logCPUThermalSts}}, 936 {0x1D, {"CPU1_THERM_STATUS", logCPUThermalSts}}, 937 {0x16, {"ME_POWER_STATE", logMEPwrState}}, 938 {0x17, {"SPS_FW_HEALTH", logSPSFwHealth}}, 939 {0x18, {"NM_EXCEPTION_A", logNmExcA}}, 940 {0x08, {"PCH_THERM_THRESHOLD", logPCHThermal}}, 941 {0x19, {"NM_HEALTH", logNmHealth}}, 942 {0x1A, {"NM_CAPABILITIES", logNmCap}}, 943 {0x1B, {"NM_THRESHOLD", logNmThreshold}}, 944 {0x3B, {"PWR_THRESH_EVT", logPwrThreshold}}, 945 {0xE7, {"MSMI", logMSMI}}, 946 {0xC5, {"HPR_WARNING", logHprWarn}}}; 947 948 static void parseSelHelper(StdSELEntry* data, std::string& errStr) 949 { 950 /* Check if sensor type is OS_BOOT (0x1f) */ 951 if (data->sensorType == 0x1F) 952 { 953 /* OS_BOOT used by OS */ 954 switch (data->eventData1 & 0xF) 955 { 956 case 0x07: 957 errStr = "Base OS/Hypervisor Installation started"; 958 break; 959 case 0x08: 960 errStr = "Base OS/Hypervisor Installation completed"; 961 break; 962 case 0x09: 963 errStr = "Base OS/Hypervisor Installation aborted"; 964 break; 965 case 0x0A: 966 errStr = "Base OS/Hypervisor Installation failed"; 967 break; 968 default: 969 errStr = "Unknown"; 970 } 971 return; 972 } 973 974 auto findSensorName = sensorNameTable.find(data->sensorNum); 975 if (findSensorName == sensorNameTable.end()) 976 { 977 errStr = "Unknown"; 978 return; 979 } 980 else 981 { 982 switch (data->sensorNum) 983 { 984 /* logMemErr function needs data from sensor type */ 985 case memoryEccError: 986 case memoryErrLogDIS: 987 findSensorName->second.second(&(data->sensorType), errStr); 988 break; 989 /* Other sensor function needs only event data for parsing */ 990 default: 991 findSensorName->second.second(&(data->eventData1), errStr); 992 } 993 } 994 995 if (((data->eventData3 & 0x80) >> 7) == 0) 996 { 997 errStr += " Assertion"; 998 } 999 else 1000 { 1001 errStr += " Deassertion"; 1002 } 1003 } 1004 1005 static void parseDimmPhyloc(StdSELEntry* data, std::string& errStr) 1006 { 1007 // Log when " All info available" 1008 uint8_t chNum = (data->eventData3 & 0x18) >> 3; 1009 uint8_t dimmNum = data->eventData3 & 0x7; 1010 uint8_t rankNum = data->eventData2 & 0x03; 1011 uint8_t nodeNum = (data->eventData3 & 0xE0) >> 5; 1012 1013 if (chNum == 3 && dimmNum == 0) 1014 { 1015 errStr += " Node: " + std::to_string(nodeNum) + "," + 1016 " Card: " + std::to_string(chNum) + "," + 1017 " Module: " + std::to_string(dimmNum) + "," + 1018 " Rank Number: " + std::to_string(rankNum) + "," + 1019 " Location: DIMM A0"; 1020 } 1021 else if (chNum == 2 && dimmNum == 0) 1022 { 1023 errStr += " Node: " + std::to_string(nodeNum) + "," + 1024 " Card: " + std::to_string(chNum) + "," + 1025 " Module: " + std::to_string(dimmNum) + "," + 1026 " Rank Number: " + std::to_string(rankNum) + "," + 1027 " Location: DIMM B0"; 1028 } 1029 else if (chNum == 4 && dimmNum == 0) 1030 { 1031 errStr += " Node: " + std::to_string(nodeNum) + "," + 1032 " Card: " + std::to_string(chNum) + "," + 1033 " Module: " + std::to_string(dimmNum) + "," + 1034 " Rank Number: " + std::to_string(rankNum) + "," + 1035 " Location: DIMM C0 "; 1036 } 1037 else if (chNum == 5 && dimmNum == 0) 1038 { 1039 errStr += " Node: " + std::to_string(nodeNum) + "," + 1040 " Card: " + std::to_string(chNum) + "," + 1041 " Module: " + std::to_string(dimmNum) + "," + 1042 " Rank Number: " + std::to_string(rankNum) + "," + 1043 " Location: DIMM D0"; 1044 } 1045 else 1046 { 1047 errStr += " Node: " + std::to_string(nodeNum) + "," + 1048 " Card: " + std::to_string(chNum) + "," + 1049 " Module: " + std::to_string(dimmNum) + "," + 1050 " Rank Number: " + std::to_string(rankNum) + "," + 1051 " Location: DIMM Unknown"; 1052 } 1053 } 1054 1055 static void parseStdSel(StdSELEntry* data, std::string& errStr) 1056 { 1057 std::stringstream tmpStream; 1058 tmpStream << std::hex << std::uppercase; 1059 1060 /* TODO: add pal_add_cri_sel */ 1061 switch (data->sensorNum) 1062 { 1063 case memoryEccError: 1064 switch (data->eventData1 & 0x0F) 1065 { 1066 case 0x00: 1067 errStr = "Correctable"; 1068 tmpStream << "DIMM" << std::setw(2) << std::setfill('0') 1069 << data->eventData3 << " ECC err"; 1070 parseDimmPhyloc(data, errStr); 1071 break; 1072 case 0x01: 1073 errStr = "Uncorrectable"; 1074 tmpStream << "DIMM" << std::setw(2) << std::setfill('0') 1075 << data->eventData3 << " UECC err"; 1076 parseDimmPhyloc(data, errStr); 1077 break; 1078 case 0x02: 1079 errStr = "Parity"; 1080 break; 1081 case 0x05: 1082 errStr = "Correctable ECC error Logging Limit Reached"; 1083 break; 1084 default: 1085 errStr = "Unknown"; 1086 } 1087 break; 1088 case memoryErrLogDIS: 1089 if ((data->eventData1 & 0x0F) == 0) 1090 { 1091 errStr = "Correctable Memory Error Logging Disabled"; 1092 } 1093 else 1094 { 1095 errStr = "Unknown"; 1096 } 1097 break; 1098 default: 1099 parseSelHelper(data, errStr); 1100 return; 1101 } 1102 1103 errStr += " (DIMM " + std::to_string(data->eventData3) + ")"; 1104 errStr += " Logical Rank " + std::to_string(data->eventData2 & 0x03); 1105 1106 switch ((data->eventData2 & 0x0C) >> 2) 1107 { 1108 case 0x00: 1109 // Ignore when " All info available" 1110 break; 1111 case 0x01: 1112 errStr += " DIMM info not valid"; 1113 break; 1114 case 0x02: 1115 errStr += " CHN info not valid"; 1116 break; 1117 case 0x03: 1118 errStr += " CPU info not valid"; 1119 break; 1120 default: 1121 errStr += " Unknown"; 1122 } 1123 1124 if (((data->eventType & 0x80) >> 7) == 0) 1125 { 1126 errStr += " Assertion"; 1127 } 1128 else 1129 { 1130 errStr += " Deassertion"; 1131 } 1132 1133 return; 1134 } 1135 1136 static void parseOemSel(TsOemSELEntry* data, std::string& errStr) 1137 { 1138 std::stringstream tmpStream; 1139 tmpStream << std::hex << std::uppercase << std::setfill('0'); 1140 1141 switch (data->recordType) 1142 { 1143 case 0xC0: 1144 tmpStream << "VID:0x" << std::setw(2) << (int)data->oemData[1] 1145 << std::setw(2) << (int)data->oemData[0] << " DID:0x" 1146 << std::setw(2) << (int)data->oemData[3] << std::setw(2) 1147 << (int)data->oemData[2] << " Slot:0x" << std::setw(2) 1148 << (int)data->oemData[4] << " Error ID:0x" << std::setw(2) 1149 << (int)data->oemData[5]; 1150 break; 1151 case 0xC2: 1152 tmpStream << "Extra info:0x" << std::setw(2) 1153 << (int)data->oemData[1] << " MSCOD:0x" << std::setw(2) 1154 << (int)data->oemData[3] << std::setw(2) 1155 << (int)data->oemData[2] << " MCACOD:0x" << std::setw(2) 1156 << (int)data->oemData[5] << std::setw(2) 1157 << (int)data->oemData[4]; 1158 break; 1159 case 0xC3: 1160 int bank = (data->oemData[1] & 0xf0) >> 4; 1161 int col = ((data->oemData[1] & 0x0f) << 8) | data->oemData[2]; 1162 1163 tmpStream << "Fail Device:0x" << std::setw(2) 1164 << (int)data->oemData[0] << " Bank:0x" << std::setw(2) 1165 << bank << " Column:0x" << std::setw(2) << col 1166 << " Failed Row:0x" << std::setw(2) 1167 << (int)data->oemData[3] << std::setw(2) 1168 << (int)data->oemData[4] << std::setw(2) 1169 << (int)data->oemData[5]; 1170 } 1171 1172 errStr = tmpStream.str(); 1173 1174 return; 1175 } 1176 1177 static std::string dimmLocationStr(uint8_t socket, uint8_t channel, 1178 uint8_t slot) 1179 { 1180 uint8_t sled = (socket >> 4) & 0x3; 1181 1182 socket &= 0xf; 1183 if (channel == 0xFF && slot == 0xFF) 1184 { 1185 return std::format( 1186 "DIMM Slot Location: Sled {:02}/Socket {:02}, Channel unknown" 1187 ", Slot unknown, DIMM unknown", 1188 sled, socket); 1189 } 1190 else 1191 { 1192 channel &= 0xf; 1193 slot &= 0xf; 1194 const char label[] = {'A', 'C', 'B', 'D'}; 1195 uint8_t idx = socket * 2 + slot; 1196 return std::format("DIMM Slot Location: Sled {:02}/Socket {:02}" 1197 ", Channel {:02}, Slot {:02} DIMM {}", 1198 sled, socket, channel, slot, 1199 (idx < sizeof(label)) 1200 ? label[idx] + std::to_string(channel) 1201 : "NA"); 1202 } 1203 } 1204 1205 static void parseOemUnifiedSel(NtsOemSELEntry* data, std::string& errStr) 1206 { 1207 uint8_t* ptr = data->oemData; 1208 uint8_t eventType = ptr[5] & 0xf; 1209 int genInfo = ptr[0]; 1210 int errType = genInfo & 0x0f; 1211 std::vector<std::string> dimmErr = { 1212 "Memory training failure", 1213 "Memory correctable error", 1214 "Memory uncorrectable error", 1215 "Memory correctable error (Patrol scrub)", 1216 "Memory uncorrectable error (Patrol scrub)", 1217 "Memory Parity Error (PCC=0)", 1218 "Memory Parity Error (PCC=1)", 1219 "Memory PMIC Error", 1220 "CXL Memory training error", 1221 "Reserved"}; 1222 std::vector<std::string> postEvent = { 1223 "System PXE boot fail", 1224 "CMOS/NVRAM configuration cleared", 1225 "TPM Self-Test Fail", 1226 "Boot Drive failure", 1227 "Data Drive failure", 1228 "Received invalid boot order request from BMC", 1229 "System HTTP boot fail", 1230 "BIOS fails to get the certificate from BMC", 1231 "Password cleared by jumper", 1232 "DXE FV check failure", 1233 "AMD ABL failure", 1234 "Reserved"}; 1235 std::vector<std::string> certErr = { 1236 "No certificate at BMC", "IPMI transaction fail", 1237 "Certificate data corrupted", "Reserved"}; 1238 std::vector<std::string> pcieEvent = { 1239 "PCIe DPC Event", 1240 "PCIe LER Event", 1241 "PCIe Link Retraining and Recovery", 1242 "PCIe Link CRC Error Check and Retry", 1243 "PCIe Corrupt Data Containment", 1244 "PCIe Express ECRC", 1245 "Reserved"}; 1246 std::vector<std::string> memEvent = { 1247 "Memory PPR event", 1248 "Memory Correctable Error logging limit reached", 1249 "Memory disable/map-out for FRB", 1250 "Memory SDDC", 1251 "Memory Address range/Partial mirroring", 1252 "Memory ADDDC", 1253 "Memory SMBus hang recovery", 1254 "No DIMM in System", 1255 "Reserved"}; 1256 std::vector<std::string> memPprTime = {"Boot time", "Autonomous", 1257 "Run time", "Reserved"}; 1258 std::vector<std::string> memPpr = {"PPR success", "PPR fail", "PPR request", 1259 "Reserved"}; 1260 std::vector<std::string> memAdddc = { 1261 "Bank VLS", "r-Bank VLS + re-buddy", "r-Bank VLS + Rank VLS", 1262 "r-Rank VLS + re-buddy", "Reserved"}; 1263 std::vector<std::string> pprEvent = {"PPR disable", "Soft PPR", "Hard PPR", 1264 "Reserved"}; 1265 1266 std::stringstream tmpStream; 1267 1268 switch (errType) 1269 { 1270 case unifiedPcieErr: 1271 tmpStream << std::format( 1272 "GeneralInfo: x86/PCIeErr(0x{:02X})" 1273 ", Bus {:02X}/Dev {:02X}/Fun {:02X}, TotalErrID1Cnt: 0x{:04X}" 1274 ", ErrID2: 0x{:02X}, ErrID1: 0x{:02X}", 1275 genInfo, ptr[8], ptr[7] >> 3, ptr[7] & 0x7, 1276 (ptr[10] << 8) | ptr[9], ptr[11], ptr[12]); 1277 break; 1278 case unifiedMemErr: 1279 eventType = ptr[9] & 0xf; 1280 tmpStream << std::format( 1281 "GeneralInfo: MemErr(0x{:02X}), {}, DIMM Failure Event: {}", 1282 genInfo, dimmLocationStr(ptr[5], ptr[6], ptr[7]), 1283 dimmErr[std::min(eventType, 1284 static_cast<uint8_t>(dimmErr.size() - 1))]); 1285 1286 if (static_cast<MemErrType>(eventType) == MemErrType::memTrainErr || 1287 static_cast<MemErrType>(eventType) == MemErrType::memPmicErr) 1288 { 1289 bool amd = ptr[9] & 0x80; 1290 tmpStream << std::format( 1291 ", Major Code: 0x{:02X}, Minor Code: 0x{:0{}X}", ptr[10], 1292 amd ? (ptr[12] << 8 | ptr[11]) : ptr[11], amd ? 4 : 2); 1293 } 1294 break; 1295 case unifiedIioErr: 1296 tmpStream << std::format( 1297 "GeneralInfo: IIOErr(0x{:02X})" 1298 ", IIO Port Location: Sled {:02}/Socket {:02}, Stack 0x{:02X}" 1299 ", Error Type: 0x{:02X}, Error Severity: 0x{:02X}" 1300 ", Error ID: 0x{:02X}", 1301 genInfo, (ptr[5] >> 4) & 0x3, ptr[5] & 0xf, ptr[6], ptr[10], 1302 ptr[11] & 0xf, ptr[12]); 1303 break; 1304 case unifiedPostEvt: 1305 tmpStream << std::format( 1306 "GeneralInfo: POST(0x{:02X}), POST Failure Event: {}", genInfo, 1307 postEvent[std::min( 1308 eventType, static_cast<uint8_t>(postEvent.size() - 1))]); 1309 1310 switch (static_cast<PostEvtType>(eventType)) 1311 { 1312 case PostEvtType::pxeBootFail: 1313 case PostEvtType::httpBootFail: 1314 { 1315 uint8_t failType = ptr[10] & 0xf; 1316 tmpStream 1317 << std::format(", Fail Type: {}, Error Code: 0x{:02X}", 1318 (failType == 4 || failType == 6) 1319 ? std::format("IPv{} fail", failType) 1320 : std::format("0x{:02X}", ptr[10]), 1321 ptr[11]); 1322 break; 1323 } 1324 case PostEvtType::getCertFail: 1325 tmpStream << std::format( 1326 ", Failure Detail: {}", 1327 certErr[std::min( 1328 ptr[9], static_cast<uint8_t>(certErr.size() - 1))]); 1329 break; 1330 case PostEvtType::amdAblFail: 1331 tmpStream << std::format(", ABL Error Code: 0x{:04X}", 1332 (ptr[12] << 8) | ptr[11]); 1333 break; 1334 } 1335 break; 1336 case unifiedPcieEvt: 1337 tmpStream << std::format( 1338 "GeneralInfo: PCIeEvent(0x{:02X}), PCIe Failure Event: {}", 1339 genInfo, 1340 pcieEvent[std::min( 1341 eventType, static_cast<uint8_t>(pcieEvent.size() - 1))]); 1342 1343 if (static_cast<PcieEvtType>(eventType) == PcieEvtType::dpc) 1344 { 1345 tmpStream << std::format( 1346 ", Status: 0x{:04X}, Source ID: 0x{:04X}", 1347 (ptr[8] << 8) | ptr[7], (ptr[10] << 8) | ptr[9]); 1348 } 1349 break; 1350 case unifiedMemEvt: 1351 eventType = ptr[9] & 0xf; 1352 tmpStream 1353 << std::format("GeneralInfo: MemEvent(0x{:02X})", genInfo) 1354 << (static_cast<MemEvtType>(eventType) != MemEvtType::noDimm 1355 ? std::format(", {}", 1356 dimmLocationStr(ptr[5], ptr[6], ptr[7])) 1357 : "") 1358 << ", DIMM Failure Event: "; 1359 1360 switch (static_cast<MemEvtType>(eventType)) 1361 { 1362 case MemEvtType::ppr: 1363 tmpStream << std::format("{} {}", 1364 memPprTime[(ptr[10] >> 2) & 0x3], 1365 memPpr[ptr[10] & 0x3]); 1366 break; 1367 case MemEvtType::adddc: 1368 tmpStream << std::format( 1369 "{} {}", 1370 memEvent[std::min(eventType, static_cast<uint8_t>( 1371 memEvent.size() - 1))], 1372 memAdddc[std::min( 1373 static_cast<uint8_t>(ptr[11] & 0xf), 1374 static_cast<uint8_t>(memAdddc.size() - 1))]); 1375 break; 1376 default: 1377 tmpStream << std::format( 1378 "{}", memEvent[std::min( 1379 eventType, 1380 static_cast<uint8_t>(memEvent.size() - 1))]); 1381 break; 1382 } 1383 break; 1384 case unifiedBootGuard: 1385 tmpStream << std::format( 1386 "GeneralInfo: Boot Guard ACM Failure Events(0x{:02X})" 1387 ", Error Class: 0x{:02X}, Error Code: 0x{:02X}", 1388 genInfo, ptr[9], ptr[10]); 1389 break; 1390 case unifiedPprEvt: 1391 tmpStream << std::format( 1392 "GeneralInfo: PPREvent(0x{:02X}), {}" 1393 ", DIMM Info: {:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}", 1394 genInfo, 1395 pprEvent[std::min(eventType, 1396 static_cast<uint8_t>(pprEvent.size() - 1))], 1397 ptr[6], ptr[7], ptr[8], ptr[9], ptr[10], ptr[11], ptr[12]); 1398 break; 1399 default: 1400 std::vector<uint8_t> oemData(ptr, ptr + 13); 1401 std::string oemDataStr; 1402 toHexStr(oemData, oemDataStr); 1403 tmpStream << std::format("Undefined Error Type(0x{:02X}), Raw: {}", 1404 errType, oemDataStr); 1405 } 1406 1407 errStr = tmpStream.str(); 1408 1409 return; 1410 } 1411 1412 static void parseSelData(uint8_t fruId, std::vector<uint8_t>& reqData, 1413 std::string& msgLog) 1414 { 1415 /* Get record type */ 1416 int recType = reqData[2]; 1417 std::string errType, errLog; 1418 1419 uint8_t* ptr = NULL; 1420 1421 std::stringstream recTypeStream; 1422 recTypeStream << std::hex << std::uppercase << std::setfill('0') 1423 << std::setw(2) << recType; 1424 1425 msgLog = "SEL Entry: FRU: " + std::to_string(fruId) + ", Record: "; 1426 1427 if (recType == stdErrType) 1428 { 1429 StdSELEntry* data = reinterpret_cast<StdSELEntry*>(&reqData[0]); 1430 std::string sensorName; 1431 1432 errType = stdErr; 1433 if (data->sensorType == 0x1F) 1434 { 1435 sensorName = "OS"; 1436 } 1437 else 1438 { 1439 auto findSensorName = sensorNameTable.find(data->sensorNum); 1440 if (findSensorName == sensorNameTable.end()) 1441 { 1442 sensorName = "Unknown"; 1443 } 1444 else 1445 { 1446 sensorName = findSensorName->second.first; 1447 } 1448 } 1449 1450 parseStdSel(data, errLog); 1451 ptr = &(data->eventData1); 1452 std::vector<uint8_t> evtData(ptr, ptr + 3); 1453 std::string eventData; 1454 toHexStr(evtData, eventData); 1455 1456 std::stringstream senNumStream; 1457 senNumStream << std::hex << std::uppercase << std::setfill('0') 1458 << std::setw(2) << (int)(data->sensorNum); 1459 1460 msgLog += errType + " (0x" + recTypeStream.str() + 1461 "), Sensor: " + sensorName + " (0x" + senNumStream.str() + 1462 "), Event Data: (" + eventData + ") " + errLog; 1463 } 1464 else if ((recType >= oemTSErrTypeMin) && (recType <= oemTSErrTypeMax)) 1465 { 1466 /* timestamped OEM SEL records */ 1467 TsOemSELEntry* data = reinterpret_cast<TsOemSELEntry*>(&reqData[0]); 1468 ptr = data->mfrId; 1469 std::vector<uint8_t> mfrIdData(ptr, ptr + 3); 1470 std::string mfrIdStr; 1471 toHexStr(mfrIdData, mfrIdStr); 1472 1473 ptr = data->oemData; 1474 std::vector<uint8_t> oemData(ptr, ptr + 6); 1475 std::string oemDataStr; 1476 toHexStr(oemData, oemDataStr); 1477 1478 errType = oemTSErr; 1479 parseOemSel(data, errLog); 1480 1481 msgLog += errType + " (0x" + recTypeStream.str() + "), MFG ID: " + 1482 mfrIdStr + ", OEM Data: (" + oemDataStr + ") " + errLog; 1483 } 1484 else if (recType == fbUniErrType) 1485 { 1486 NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]); 1487 errType = fbUniSELErr; 1488 parseOemUnifiedSel(data, errLog); 1489 msgLog += errType + " (0x" + recTypeStream.str() + "), " + errLog; 1490 } 1491 else if ((recType >= oemNTSErrTypeMin) && (recType <= oemNTSErrTypeMax)) 1492 { 1493 /* Non timestamped OEM SEL records */ 1494 NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]); 1495 errType = oemNTSErr; 1496 1497 ptr = data->oemData; 1498 std::vector<uint8_t> oemData(ptr, ptr + 13); 1499 std::string oemDataStr; 1500 toHexStr(oemData, oemDataStr); 1501 1502 parseOemSel((TsOemSELEntry*)data, errLog); 1503 msgLog += errType + " (0x" + recTypeStream.str() + "), OEM Data: (" + 1504 oemDataStr + ") " + errLog; 1505 } 1506 else 1507 { 1508 errType = unknownErr; 1509 toHexStr(reqData, errLog); 1510 msgLog += errType + " (0x" + recTypeStream.str() + 1511 ") RawData: " + errLog; 1512 } 1513 } 1514 1515 } // namespace fb_oem::ipmi::sel 1516 1517 namespace ipmi 1518 { 1519 1520 namespace storage 1521 { 1522 1523 static void registerSELFunctions() __attribute__((constructor)); 1524 static fb_oem::ipmi::sel::SELData selObj __attribute__((init_priority(101))); 1525 1526 ipmi::RspType<uint8_t, // SEL version 1527 uint16_t, // SEL entry count 1528 uint16_t, // free space 1529 uint32_t, // last add timestamp 1530 uint32_t, // last erase timestamp 1531 uint8_t> // operation support 1532 ipmiStorageGetSELInfo() 1533 { 1534 fb_oem::ipmi::sel::GetSELInfoData info; 1535 1536 selObj.getInfo(info); 1537 return ipmi::responseSuccess(info.selVersion, info.entries, info.freeSpace, 1538 info.addTimeStamp, info.eraseTimeStamp, 1539 info.operationSupport); 1540 } 1541 1542 ipmi::RspType<uint16_t, std::vector<uint8_t>> 1543 ipmiStorageGetSELEntry(std::vector<uint8_t> data) 1544 { 1545 if (data.size() != sizeof(fb_oem::ipmi::sel::GetSELEntryRequest)) 1546 { 1547 return ipmi::responseReqDataLenInvalid(); 1548 } 1549 1550 fb_oem::ipmi::sel::GetSELEntryRequest* reqData = 1551 reinterpret_cast<fb_oem::ipmi::sel::GetSELEntryRequest*>(&data[0]); 1552 1553 if (reqData->reservID != 0) 1554 { 1555 if (!checkSELReservation(reqData->reservID)) 1556 { 1557 return ipmi::responseInvalidReservationId(); 1558 } 1559 } 1560 1561 uint16_t selCnt = selObj.getCount(); 1562 if (selCnt == 0) 1563 { 1564 return ipmi::responseSensorInvalid(); 1565 } 1566 1567 /* If it is asked for first entry */ 1568 if (reqData->recordID == fb_oem::ipmi::sel::firstEntry) 1569 { 1570 /* First Entry (0x0000) as per Spec */ 1571 reqData->recordID = 1; 1572 } 1573 else if (reqData->recordID == fb_oem::ipmi::sel::lastEntry) 1574 { 1575 /* Last entry (0xFFFF) as per Spec */ 1576 reqData->recordID = selCnt; 1577 } 1578 1579 std::string ipmiRaw; 1580 1581 if (selObj.getEntry(reqData->recordID, ipmiRaw) < 0) 1582 { 1583 return ipmi::responseSensorInvalid(); 1584 } 1585 1586 std::vector<uint8_t> recDataBytes; 1587 if (fromHexStr(ipmiRaw, recDataBytes) < 0) 1588 { 1589 return ipmi::responseUnspecifiedError(); 1590 } 1591 1592 /* Identify the next SEL record ID. If recordID is same as 1593 * total SeL count then next id should be last entry else 1594 * it should be incremented by 1 to current RecordID 1595 */ 1596 uint16_t nextRecord; 1597 if (reqData->recordID == selCnt) 1598 { 1599 nextRecord = fb_oem::ipmi::sel::lastEntry; 1600 } 1601 else 1602 { 1603 nextRecord = reqData->recordID + 1; 1604 } 1605 1606 if (reqData->readLen == fb_oem::ipmi::sel::entireRecord) 1607 { 1608 return ipmi::responseSuccess(nextRecord, recDataBytes); 1609 } 1610 else 1611 { 1612 if (reqData->offset >= fb_oem::ipmi::sel::selRecordSize || 1613 reqData->readLen > fb_oem::ipmi::sel::selRecordSize) 1614 { 1615 return ipmi::responseUnspecifiedError(); 1616 } 1617 std::vector<uint8_t> recPartData; 1618 1619 auto diff = fb_oem::ipmi::sel::selRecordSize - reqData->offset; 1620 auto readLength = std::min(diff, static_cast<int>(reqData->readLen)); 1621 1622 for (int i = 0; i < readLength; i++) 1623 { 1624 recPartData.push_back(recDataBytes[i + reqData->offset]); 1625 } 1626 return ipmi::responseSuccess(nextRecord, recPartData); 1627 } 1628 } 1629 1630 // Retry function to log the SEL entry message and make D-Bus call 1631 bool logWithRetry( 1632 const std::string& journalMsg, const std::string& messageID, 1633 const std::string& logErr, const std::string& severity, 1634 const std::map<std::string, std::string>& ad, int maxRetries = 10, 1635 std::chrono::milliseconds waitTimeMs = std::chrono::milliseconds(100)) 1636 { 1637 // Attempt to log the SEL entry message 1638 lg2::info( 1639 "SEL Entry Added: {IPMI_RAW}, IPMISEL_MESSAGE_ID={MESSAGE_ID}, IPMISEL_MESSAGE_ARGS={LOG_ERR}", 1640 "IPMI_RAW", journalMsg, "MESSAGE_ID", messageID, "LOG_ERR", logErr); 1641 1642 int attempts = 0; 1643 while (attempts < maxRetries) 1644 { 1645 // Create D-Bus call 1646 auto bus = sdbusplus::bus::new_default(); 1647 auto reqMsg = bus.new_method_call( 1648 "xyz.openbmc_project.Logging", "/xyz/openbmc_project/logging", 1649 "xyz.openbmc_project.Logging.Create", "Create"); 1650 reqMsg.append(logErr, severity, ad); 1651 1652 try 1653 { 1654 // Attempt to make the D-Bus call 1655 bus.call(reqMsg); 1656 return true; // D-Bus call successful, exit the loop 1657 } 1658 catch (sdbusplus::exception_t& e) 1659 { 1660 lg2::error("D-Bus call failed: {ERROR}", "ERROR", e); 1661 } 1662 1663 // Wait before retrying 1664 std::this_thread::sleep_for(std::chrono::milliseconds(waitTimeMs)); 1665 attempts++; 1666 } 1667 1668 return false; // Failed after max retries 1669 } 1670 1671 // Main function to add SEL entry 1672 ipmi::RspType<uint16_t> 1673 ipmiStorageAddSELEntry(ipmi::Context::ptr ctx, std::vector<uint8_t> data) 1674 { 1675 /* Per the IPMI spec, need to cancel any reservation when a 1676 * SEL entry is added 1677 */ 1678 cancelSELReservation(); 1679 1680 if (data.size() != fb_oem::ipmi::sel::selRecordSize) 1681 { 1682 return ipmi::responseReqDataLenInvalid(); 1683 } 1684 1685 std::string ipmiRaw, logErr; 1686 toHexStr(data, ipmiRaw); 1687 1688 /* Parse sel data and get an error log to be filed */ 1689 fb_oem::ipmi::sel::parseSelData((ctx->hostIdx + 1), data, logErr); 1690 1691 static const std::string openBMCMessageRegistryVersion("0.1"); 1692 std::string messageID = 1693 "OpenBMC." + openBMCMessageRegistryVersion + ".SELEntryAdded"; 1694 1695 /* Log the Raw SEL message to the journal */ 1696 std::string journalMsg = "SEL Entry Added: " + ipmiRaw; 1697 1698 std::map<std::string, std::string> ad; 1699 std::string severity = "xyz.openbmc_project.Logging.Entry.Level.Critical"; 1700 ad.emplace("IPMI_RAW", ipmiRaw); 1701 1702 // Launch the logging thread 1703 std::thread([=]() { 1704 bool success = 1705 logWithRetry(journalMsg, messageID, logErr, severity, ad); 1706 if (!success) 1707 { 1708 lg2::error("Failed to log SEL entry added event after retries."); 1709 } 1710 }).detach(); 1711 1712 int responseID = selObj.addEntry(ipmiRaw.c_str()); 1713 if (responseID < 0) 1714 { 1715 return ipmi::responseUnspecifiedError(); 1716 } 1717 return ipmi::responseSuccess(static_cast<uint16_t>(responseID)); 1718 } 1719 1720 ipmi::RspType<uint8_t> ipmiStorageClearSEL(uint16_t reservationID, 1721 const std::array<uint8_t, 3>& clr, 1722 uint8_t eraseOperation) 1723 { 1724 if (!checkSELReservation(reservationID)) 1725 { 1726 return ipmi::responseInvalidReservationId(); 1727 } 1728 1729 static constexpr std::array<uint8_t, 3> clrExpected = {'C', 'L', 'R'}; 1730 if (clr != clrExpected) 1731 { 1732 return ipmi::responseInvalidFieldRequest(); 1733 } 1734 1735 /* If there is no sel then return erase complete */ 1736 if (selObj.getCount() == 0) 1737 { 1738 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete); 1739 } 1740 1741 /* Erasure status cannot be fetched, so always return erasure 1742 * status as `erase completed`. 1743 */ 1744 if (eraseOperation == fb_oem::ipmi::sel::getEraseStatus) 1745 { 1746 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete); 1747 } 1748 1749 /* Check that initiate erase is correct */ 1750 if (eraseOperation != fb_oem::ipmi::sel::initiateErase) 1751 { 1752 return ipmi::responseInvalidFieldRequest(); 1753 } 1754 1755 /* Per the IPMI spec, need to cancel any reservation when the 1756 * SEL is cleared 1757 */ 1758 cancelSELReservation(); 1759 1760 /* Clear the complete Sel Json object */ 1761 if (selObj.clear() < 0) 1762 { 1763 return ipmi::responseUnspecifiedError(); 1764 } 1765 1766 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete); 1767 } 1768 1769 ipmi::RspType<uint32_t> ipmiStorageGetSELTime() 1770 { 1771 struct timespec selTime = {}; 1772 1773 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0) 1774 { 1775 return ipmi::responseUnspecifiedError(); 1776 } 1777 1778 return ipmi::responseSuccess(selTime.tv_sec); 1779 } 1780 1781 ipmi::RspType<> ipmiStorageSetSELTime(uint32_t) 1782 { 1783 // Set SEL Time is not supported 1784 return ipmi::responseInvalidCommand(); 1785 } 1786 1787 ipmi::RspType<uint16_t> ipmiStorageGetSELTimeUtcOffset() 1788 { 1789 /* TODO: For now, the SEL time stamp is based on UTC time, 1790 * so return 0x0000 as offset. Might need to change once 1791 * supporting zones in SEL time stamps 1792 */ 1793 1794 uint16_t utcOffset = 0x0000; 1795 return ipmi::responseSuccess(utcOffset); 1796 } 1797 1798 void registerSELFunctions() 1799 { 1800 // <Get SEL Info> 1801 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1802 ipmi::storage::cmdGetSelInfo, ipmi::Privilege::User, 1803 ipmiStorageGetSELInfo); 1804 1805 // <Get SEL Entry> 1806 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1807 ipmi::storage::cmdGetSelEntry, ipmi::Privilege::User, 1808 ipmiStorageGetSELEntry); 1809 1810 // <Add SEL Entry> 1811 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1812 ipmi::storage::cmdAddSelEntry, 1813 ipmi::Privilege::Operator, ipmiStorageAddSELEntry); 1814 1815 // <Clear SEL> 1816 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1817 ipmi::storage::cmdClearSel, ipmi::Privilege::Operator, 1818 ipmiStorageClearSEL); 1819 1820 // <Get SEL Time> 1821 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1822 ipmi::storage::cmdGetSelTime, ipmi::Privilege::User, 1823 ipmiStorageGetSELTime); 1824 1825 // <Set SEL Time> 1826 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1827 ipmi::storage::cmdSetSelTime, 1828 ipmi::Privilege::Operator, ipmiStorageSetSELTime); 1829 1830 // <Get SEL Time UTC Offset> 1831 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1832 ipmi::storage::cmdGetSelTimeUtcOffset, 1833 ipmi::Privilege::User, 1834 ipmiStorageGetSELTimeUtcOffset); 1835 1836 return; 1837 } 1838 1839 } // namespace storage 1840 } // namespace ipmi 1841