1 /* 2 * Copyright (c) 2018 Intel Corporation. 3 * Copyright (c) 2018-present Facebook. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 #include <boost/algorithm/string/join.hpp> 19 #include <boost/container/flat_map.hpp> 20 #include <ipmid/api.hpp> 21 #include <nlohmann/json.hpp> 22 #include <phosphor-logging/log.hpp> 23 #include <sdbusplus/message/types.hpp> 24 #include <sdbusplus/timer.hpp> 25 #include <storagecommands.hpp> 26 27 #include <fstream> 28 #include <iostream> 29 #include <sstream> 30 31 enum class MemErrType 32 { 33 memTrainErr = 0, 34 memPmicErr = 7 35 }; 36 37 enum class PostEvtType 38 { 39 pxeBootFail = 0, 40 httpBootFail = 6, 41 getCertFail = 7, 42 amdAblFail = 10 43 }; 44 45 enum class PcieEvtType 46 { 47 dpc = 0 48 }; 49 50 enum class MemEvtType 51 { 52 ppr = 0, 53 adddc = 5, 54 noDimm = 7 55 }; 56 57 //---------------------------------------------------------------------- 58 // Platform specific functions for storing app data 59 //---------------------------------------------------------------------- 60 61 static std::string byteToStr(uint8_t byte) 62 { 63 std::stringstream ss; 64 65 ss << std::hex << std::uppercase << std::setfill('0'); 66 ss << std::setw(2) << (int)byte; 67 68 return ss.str(); 69 } 70 71 static void toHexStr(std::vector<uint8_t>& bytes, std::string& hexStr) 72 { 73 std::stringstream stream; 74 stream << std::hex << std::uppercase << std::setfill('0'); 75 for (const uint8_t byte : bytes) 76 { 77 stream << std::setw(2) << static_cast<int>(byte); 78 } 79 hexStr = stream.str(); 80 } 81 82 static int fromHexStr(const std::string hexStr, std::vector<uint8_t>& data) 83 { 84 for (unsigned int i = 0; i < hexStr.size(); i += 2) 85 { 86 try 87 { 88 data.push_back(static_cast<uint8_t>( 89 std::stoul(hexStr.substr(i, 2), nullptr, 16))); 90 } 91 catch (const std::invalid_argument& e) 92 { 93 phosphor::logging::log<phosphor::logging::level::ERR>(e.what()); 94 return -1; 95 } 96 catch (const std::out_of_range& e) 97 { 98 phosphor::logging::log<phosphor::logging::level::ERR>(e.what()); 99 return -1; 100 } 101 } 102 return 0; 103 } 104 105 namespace fb_oem::ipmi::sel 106 { 107 108 class SELData 109 { 110 private: 111 nlohmann::json selDataObj; 112 113 void flush() 114 { 115 std::ofstream file(SEL_JSON_DATA_FILE); 116 file << selDataObj; 117 file.close(); 118 } 119 120 void init() 121 { 122 selDataObj[KEY_SEL_VER] = 0x51; 123 selDataObj[KEY_SEL_COUNT] = 0; 124 selDataObj[KEY_ADD_TIME] = 0xFFFFFFFF; 125 selDataObj[KEY_ERASE_TIME] = 0xFFFFFFFF; 126 selDataObj[KEY_OPER_SUPP] = 0x02; 127 /* Spec indicates that more than 64kB is free */ 128 selDataObj[KEY_FREE_SPACE] = 0xFFFF; 129 } 130 131 void writeEmptyJson() 132 { 133 selDataObj = nlohmann::json::object(); // Create an empty JSON object 134 std::ofstream outFile(SEL_JSON_DATA_FILE); 135 if (outFile) 136 { 137 // Write empty JSON object to the file 138 outFile << selDataObj.dump(4); 139 outFile.close(); 140 } 141 else 142 { 143 lg2::info("Failed to create SEL JSON file with empty JSON."); 144 } 145 } 146 147 public: 148 SELData() 149 { 150 /* Get App data stored in json file */ 151 std::ifstream file(SEL_JSON_DATA_FILE); 152 if (file) 153 { 154 try 155 { 156 file >> selDataObj; 157 } 158 catch (const nlohmann::json::parse_error& e) 159 { 160 lg2::error("Error parsing SEL JSON file: {ERROR}", "ERROR", e); 161 writeEmptyJson(); 162 init(); // Initialize to default values 163 } 164 file.close(); 165 } 166 else 167 { 168 lg2::info("Failed to open SEL JSON file."); 169 writeEmptyJson(); 170 init(); 171 } 172 173 /* Initialize SelData object if no entries. */ 174 if (selDataObj.find(KEY_SEL_COUNT) == selDataObj.end()) 175 { 176 init(); 177 } 178 } 179 180 int clear() 181 { 182 /* Clear the complete Sel Json object */ 183 selDataObj.clear(); 184 /* Reinitialize it with basic data */ 185 init(); 186 /* Save the erase time */ 187 struct timespec selTime = {}; 188 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0) 189 { 190 return -1; 191 } 192 selDataObj[KEY_ERASE_TIME] = selTime.tv_sec; 193 flush(); 194 return 0; 195 } 196 197 uint32_t getCount() 198 { 199 return selDataObj[KEY_SEL_COUNT]; 200 } 201 202 void getInfo(GetSELInfoData& info) 203 { 204 info.selVersion = selDataObj[KEY_SEL_VER]; 205 info.entries = selDataObj[KEY_SEL_COUNT]; 206 info.freeSpace = selDataObj[KEY_FREE_SPACE]; 207 info.addTimeStamp = selDataObj[KEY_ADD_TIME]; 208 info.eraseTimeStamp = selDataObj[KEY_ERASE_TIME]; 209 info.operationSupport = selDataObj[KEY_OPER_SUPP]; 210 } 211 212 int getEntry(uint32_t index, std::string& rawStr) 213 { 214 std::stringstream ss; 215 ss << std::hex; 216 ss << std::setw(2) << std::setfill('0') << index; 217 218 /* Check or the requested SEL Entry, if record is available */ 219 if (selDataObj.find(ss.str()) == selDataObj.end()) 220 { 221 return -1; 222 } 223 224 rawStr = selDataObj[ss.str()][KEY_SEL_ENTRY_RAW]; 225 return 0; 226 } 227 228 int addEntry(std::string keyStr) 229 { 230 struct timespec selTime = {}; 231 232 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0) 233 { 234 return -1; 235 } 236 237 selDataObj[KEY_ADD_TIME] = selTime.tv_sec; 238 239 int selCount = selDataObj[KEY_SEL_COUNT]; 240 selDataObj[KEY_SEL_COUNT] = ++selCount; 241 242 std::stringstream ss; 243 ss << std::hex; 244 ss << std::setw(2) << std::setfill('0') << selCount; 245 246 selDataObj[ss.str()][KEY_SEL_ENTRY_RAW] = keyStr; 247 flush(); 248 return selCount; 249 } 250 }; 251 252 /* 253 * A Function to parse common SEL message, a helper function 254 * for parseStdSel. 255 * 256 * Note that this function __CANNOT__ be overridden. 257 * To add board specific routine, please override parseStdSel. 258 */ 259 260 /*Used by decoding ME event*/ 261 std::vector<std::string> nmDomName = { 262 "Entire Platform", "CPU Subsystem", 263 "Memory Subsystem", "HW Protection", 264 "High Power I/O subsystem", "Unknown"}; 265 266 /* Default log message for unknown type */ 267 static void logDefault(uint8_t*, std::string& errLog) 268 { 269 errLog = "Unknown"; 270 } 271 272 static void logSysEvent(uint8_t* data, std::string& errLog) 273 { 274 if (data[0] == 0xE5) 275 { 276 errLog = "Cause of Time change - "; 277 switch (data[2]) 278 { 279 case 0x00: 280 errLog += "NTP"; 281 break; 282 case 0x01: 283 errLog += "Host RTL"; 284 break; 285 case 0x02: 286 errLog += "Set SEL time cmd"; 287 break; 288 case 0x03: 289 errLog += "Set SEL time UTC offset cmd"; 290 break; 291 default: 292 errLog += "Unknown"; 293 } 294 295 if (data[1] == 0x00) 296 errLog += " - First Time"; 297 else if (data[1] == 0x80) 298 errLog += " - Second Time"; 299 } 300 else 301 { 302 errLog = "Unknown"; 303 } 304 } 305 306 static void logThermalEvent(uint8_t* data, std::string& errLog) 307 { 308 if (data[0] == 0x1) 309 { 310 errLog = "Limit Exceeded"; 311 } 312 else 313 { 314 errLog = "Unknown"; 315 } 316 } 317 318 static void logCritIrq(uint8_t* data, std::string& errLog) 319 { 320 if (data[0] == 0x0) 321 { 322 errLog = "NMI / Diagnostic Interrupt"; 323 } 324 else if (data[0] == 0x03) 325 { 326 errLog = "Software NMI"; 327 } 328 else 329 { 330 errLog = "Unknown"; 331 } 332 333 /* TODO: Call add_cri_sel for CRITICAL_IRQ */ 334 } 335 336 static void logPostErr(uint8_t* data, std::string& errLog) 337 { 338 if ((data[0] & 0x0F) == 0x0) 339 { 340 errLog = "System Firmware Error"; 341 } 342 else 343 { 344 errLog = "Unknown"; 345 } 346 347 if (((data[0] >> 6) & 0x03) == 0x3) 348 { 349 // TODO: Need to implement IPMI spec based Post Code 350 errLog += ", IPMI Post Code"; 351 } 352 else if (((data[0] >> 6) & 0x03) == 0x2) 353 { 354 errLog += ", OEM Post Code 0x" + byteToStr(data[2]) + 355 byteToStr(data[1]); 356 357 switch ((data[2] << 8) | data[1]) 358 { 359 case 0xA105: 360 errLog += ", BMC Failed (No Response)"; 361 break; 362 case 0xA106: 363 errLog += ", BMC Failed (Self Test Fail)"; 364 break; 365 case 0xA10A: 366 errLog += ", System Firmware Corruption Detected"; 367 break; 368 case 0xA10B: 369 errLog += ", TPM Self-Test FAIL Detected"; 370 } 371 } 372 } 373 374 static void logMchChkErr(uint8_t* data, std::string& errLog) 375 { 376 /* TODO: Call add_cri_sel for CRITICAL_IRQ */ 377 switch (data[0] & 0x0F) 378 { 379 case 0x0B: 380 switch ((data[1] >> 5) & 0x03) 381 { 382 case 0x00: 383 errLog = "Uncorrected Recoverable Error"; 384 break; 385 case 0x01: 386 errLog = "Uncorrected Thread Fatal Error"; 387 break; 388 case 0x02: 389 errLog = "Uncorrected System Fatal Error"; 390 break; 391 default: 392 errLog = "Unknown"; 393 } 394 break; 395 case 0x0C: 396 switch ((data[1] >> 5) & 0x03) 397 { 398 case 0x00: 399 errLog = "Correctable Error"; 400 break; 401 case 0x01: 402 errLog = "Deferred Error"; 403 break; 404 default: 405 errLog = "Unknown"; 406 } 407 break; 408 default: 409 errLog = "Unknown"; 410 } 411 412 errLog += ", Machine Check bank Number " + std::to_string(data[1]) + 413 ", CPU " + std::to_string(data[2] >> 5) + ", Core " + 414 std::to_string(data[2] & 0x1F); 415 } 416 417 static void logPcieErr(uint8_t* data, std::string& errLog) 418 { 419 std::stringstream tmp1, tmp2; 420 tmp1 << std::hex << std::uppercase << std::setfill('0'); 421 tmp2 << std::hex << std::uppercase << std::setfill('0'); 422 tmp1 << " (Bus " << std::setw(2) << (int)(data[2]) << " / Dev " 423 << std::setw(2) << (int)(data[1] >> 3) << " / Fun " << std::setw(2) 424 << (int)(data[1] & 0x7) << ")"; 425 426 switch (data[0] & 0xF) 427 { 428 case 0x4: 429 errLog = "PCI PERR" + tmp1.str(); 430 break; 431 case 0x5: 432 errLog = "PCI SERR" + tmp1.str(); 433 break; 434 case 0x7: 435 errLog = "Correctable" + tmp1.str(); 436 break; 437 case 0x8: 438 errLog = "Uncorrectable" + tmp1.str(); 439 break; 440 case 0xA: 441 errLog = "Bus Fatal" + tmp1.str(); 442 break; 443 case 0xD: 444 { 445 uint32_t venId = (uint32_t)data[1] << 8 | (uint32_t)data[2]; 446 tmp2 << "Vendor ID: 0x" << std::setw(4) << venId; 447 errLog = tmp2.str(); 448 } 449 break; 450 case 0xE: 451 { 452 uint32_t devId = (uint32_t)data[1] << 8 | (uint32_t)data[2]; 453 tmp2 << "Device ID: 0x" << std::setw(4) << devId; 454 errLog = tmp2.str(); 455 } 456 break; 457 case 0xF: 458 tmp2 << "Error ID from downstream: 0x" << std::setw(2) 459 << (int)(data[1]) << std::setw(2) << (int)(data[2]); 460 errLog = tmp2.str(); 461 break; 462 default: 463 errLog = "Unknown"; 464 } 465 } 466 467 static void logIioErr(uint8_t* data, std::string& errLog) 468 { 469 std::vector<std::string> tmpStr = { 470 "IRP0", "IRP1", " IIO-Core", "VT-d", "Intel Quick Data", 471 "Misc", " DMA", "ITC", "OTC", "CI"}; 472 473 if ((data[0] & 0xF) == 0) 474 { 475 errLog += "CPU " + std::to_string(data[2] >> 5) + ", Error ID 0x" + 476 byteToStr(data[1]) + " - "; 477 478 if ((data[2] & 0xF) <= 0x9) 479 { 480 errLog += tmpStr[(data[2] & 0xF)]; 481 } 482 else 483 { 484 errLog += "Reserved"; 485 } 486 } 487 else 488 { 489 errLog = "Unknown"; 490 } 491 } 492 493 [[maybe_unused]] static void logMemErr(uint8_t* dataPtr, std::string& errLog) 494 { 495 uint8_t snrType = dataPtr[0]; 496 uint8_t snrNum = dataPtr[1]; 497 uint8_t* data = &(dataPtr[3]); 498 499 /* TODO: add pal_add_cri_sel */ 500 501 if (snrNum == memoryEccError) 502 { 503 /* SEL from MEMORY_ECC_ERR Sensor */ 504 switch (data[0] & 0x0F) 505 { 506 case 0x0: 507 if (snrType == 0x0C) 508 { 509 errLog = "Correctable"; 510 } 511 else if (snrType == 0x10) 512 { 513 errLog = "Correctable ECC error Logging Disabled"; 514 } 515 break; 516 case 0x1: 517 errLog = "Uncorrectable"; 518 break; 519 case 0x5: 520 errLog = "Correctable ECC error Logging Limit Disabled"; 521 break; 522 default: 523 errLog = "Unknown"; 524 } 525 } 526 else if (snrNum == memoryErrLogDIS) 527 { 528 // SEL from MEMORY_ERR_LOG_DIS Sensor 529 if ((data[0] & 0x0F) == 0x0) 530 { 531 errLog = "Correctable Memory Error Logging Disabled"; 532 } 533 else 534 { 535 errLog = "Unknown"; 536 } 537 } 538 else 539 { 540 errLog = "Unknown"; 541 return; 542 } 543 544 /* Common routine for both MEM_ECC_ERR and MEMORY_ERR_LOG_DIS */ 545 546 errLog += " (DIMM " + byteToStr(data[2]) + ") Logical Rank " + 547 std::to_string(data[1] & 0x03); 548 549 /* DIMM number (data[2]): 550 * Bit[7:5]: Socket number (Range: 0-7) 551 * Bit[4:3]: Channel number (Range: 0-3) 552 * Bit[2:0]: DIMM number (Range: 0-7) 553 */ 554 555 /* TODO: Verify these bits */ 556 std::string cpuStr = "CPU# " + std::to_string((data[2] & 0xE0) >> 5); 557 std::string chStr = "CHN# " + std::to_string((data[2] & 0x18) >> 3); 558 std::string dimmStr = "DIMM#" + std::to_string(data[2] & 0x7); 559 560 switch ((data[1] & 0xC) >> 2) 561 { 562 case 0x0: 563 { 564 /* All Info Valid */ 565 [[maybe_unused]] uint8_t chnNum = (data[2] & 0x1C) >> 2; 566 [[maybe_unused]] uint8_t dimmNum = data[2] & 0x3; 567 568 /* TODO: If critical SEL logging is available, do it */ 569 if (snrType == 0x0C) 570 { 571 if ((data[0] & 0x0F) == 0x0) 572 { 573 /* TODO: add_cri_sel */ 574 /* "DIMM"+ 'A'+ chnNum + dimmNum + " ECC err,FRU:1" 575 */ 576 } 577 else if ((data[0] & 0x0F) == 0x1) 578 { 579 /* TODO: add_cri_sel */ 580 /* "DIMM"+ 'A'+ chnNum + dimmNum + " UECC err,FRU:1" 581 */ 582 } 583 } 584 /* Continue to parse the error into a string. All Info Valid 585 */ 586 errLog += " (" + cpuStr + ", " + chStr + ", " + dimmStr + ")"; 587 } 588 589 break; 590 case 0x1: 591 592 /* DIMM info not valid */ 593 errLog += " (" + cpuStr + ", " + chStr + ")"; 594 break; 595 case 0x2: 596 597 /* CHN info not valid */ 598 errLog += " (" + cpuStr + ", " + dimmStr + ")"; 599 break; 600 case 0x3: 601 602 /* CPU info not valid */ 603 errLog += " (" + chStr + ", " + dimmStr + ")"; 604 break; 605 } 606 } 607 608 static void logPwrErr(uint8_t* data, std::string& errLog) 609 { 610 if (data[0] == 0x1) 611 { 612 errLog = "SYS_PWROK failure"; 613 /* Also try logging to Critical log file, if available */ 614 /* "SYS_PWROK failure,FRU:1" */ 615 } 616 else if (data[0] == 0x2) 617 { 618 errLog = "PCH_PWROK failure"; 619 /* Also try logging to Critical log file, if available */ 620 /* "PCH_PWROK failure,FRU:1" */ 621 } 622 else 623 { 624 errLog = "Unknown"; 625 } 626 } 627 628 static void logCatErr(uint8_t* data, std::string& errLog) 629 { 630 if (data[0] == 0x0) 631 { 632 errLog = "IERR/CATERR"; 633 /* Also try logging to Critical log file, if available */ 634 /* "IERR,FRU:1 */ 635 } 636 else if (data[0] == 0xB) 637 { 638 errLog = "MCERR/CATERR"; 639 /* Also try logging to Critical log file, if available */ 640 /* "MCERR,FRU:1 */ 641 } 642 else 643 { 644 errLog = "Unknown"; 645 } 646 } 647 648 static void logDimmHot(uint8_t* data, std::string& errLog) 649 { 650 if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x01FFFF) 651 { 652 errLog = "SOC MEMHOT"; 653 } 654 else 655 { 656 errLog = "Unknown"; 657 /* Also try logging to Critical log file, if available */ 658 /* ""CPU_DIMM_HOT %s,FRU:1" */ 659 } 660 } 661 662 static void logSwNMI(uint8_t* data, std::string& errLog) 663 { 664 if ((data[0] << 16 | data[1] << 8 | data[2]) == 0x03FFFF) 665 { 666 errLog = "Software NMI"; 667 } 668 else 669 { 670 errLog = "Unknown SW NMI"; 671 } 672 } 673 674 static void logCPUThermalSts(uint8_t* data, std::string& errLog) 675 { 676 switch (data[0]) 677 { 678 case 0x0: 679 errLog = "CPU Critical Temperature"; 680 break; 681 case 0x1: 682 errLog = "PROCHOT#"; 683 break; 684 case 0x2: 685 errLog = "TCC Activation"; 686 break; 687 default: 688 errLog = "Unknown"; 689 } 690 } 691 692 static void logMEPwrState(uint8_t* data, std::string& errLog) 693 { 694 switch (data[0]) 695 { 696 case 0: 697 errLog = "RUNNING"; 698 break; 699 case 2: 700 errLog = "POWER_OFF"; 701 break; 702 default: 703 errLog = "Unknown[" + std::to_string(data[0]) + "]"; 704 break; 705 } 706 } 707 708 static void logSPSFwHealth(uint8_t* data, std::string& errLog) 709 { 710 if ((data[0] & 0x0F) == 0x00) 711 { 712 const std::vector<std::string> tmpStr = { 713 "Recovery GPIO forced", 714 "Image execution failed", 715 "Flash erase error", 716 "Flash state information", 717 "Internal error", 718 "BMC did not respond", 719 "Direct Flash update", 720 "Manufacturing error", 721 "Automatic Restore to Factory Presets", 722 "Firmware Exception", 723 "Flash Wear-Out Protection Warning", 724 "Unknown", 725 "Unknown", 726 "DMI interface error", 727 "MCTP interface error", 728 "Auto-configuration finished", 729 "Unsupported Segment Defined Feature", 730 "Unknown", 731 "CPU Debug Capability Disabled", 732 "UMA operation error"}; 733 734 if (data[1] < 0x14) 735 { 736 errLog = tmpStr[data[1]]; 737 } 738 else 739 { 740 errLog = "Unknown"; 741 } 742 } 743 else if ((data[0] & 0x0F) == 0x01) 744 { 745 errLog = "SMBus link failure"; 746 } 747 else 748 { 749 errLog = "Unknown"; 750 } 751 } 752 753 static void logNmExcA(uint8_t* data, std::string& errLog) 754 { 755 /*NM4.0 #550710, Revision 1.95, and turn to p.155*/ 756 if (data[0] == 0xA8) 757 { 758 errLog = "Policy Correction Time Exceeded"; 759 } 760 else 761 { 762 errLog = "Unknown"; 763 } 764 } 765 766 static void logPCHThermal(uint8_t* data, std::string& errLog) 767 { 768 const std::vector<std::string> thresEvtName = { 769 "Lower Non-critical", 770 "Unknown", 771 "Lower Critical", 772 "Unknown", 773 "Lower Non-recoverable", 774 "Unknown", 775 "Unknown", 776 "Upper Non-critical", 777 "Unknown", 778 "Upper Critical", 779 "Unknown", 780 "Upper Non-recoverable"}; 781 782 if ((data[0] & 0x0f) < 12) 783 { 784 errLog = thresEvtName[(data[0] & 0x0f)]; 785 } 786 else 787 { 788 errLog = "Unknown"; 789 } 790 791 errLog += ", curr_val: " + std::to_string(data[1]) + 792 " C, thresh_val: " + std::to_string(data[2]) + " C"; 793 } 794 795 static void logNmHealth(uint8_t* data, std::string& errLog) 796 { 797 std::vector<std::string> nmErrType = { 798 "Unknown", 799 "Unknown", 800 "Unknown", 801 "Unknown", 802 "Unknown", 803 "Unknown", 804 "Unknown", 805 "Extended Telemetry Device Reading Failure", 806 "Outlet Temperature Reading Failure", 807 "Volumetric Airflow Reading Failure", 808 "Policy Misconfiguration", 809 "Power Sensor Reading Failure", 810 "Inlet Temperature Reading Failure", 811 "Host Communication Error", 812 "Real-time Clock Synchronization Failure", 813 "Platform Shutdown Initiated by Intel NM Policy", 814 "Unknown"}; 815 uint8_t nmTypeIdx = (data[0] & 0xf); 816 uint8_t domIdx = (data[1] & 0xf); 817 uint8_t errIdx = ((data[1] >> 4) & 0xf); 818 819 if (nmTypeIdx == 2) 820 { 821 errLog = "SensorIntelNM"; 822 } 823 else 824 { 825 errLog = "Unknown"; 826 } 827 828 errLog += ", Domain:" + nmDomName[domIdx] + ", ErrType:" + 829 nmErrType[errIdx] + ", Err:0x" + byteToStr(data[2]); 830 } 831 832 static void logNmCap(uint8_t* data, std::string& errLog) 833 { 834 const std::vector<std::string> nmCapStsStr = {"Not Available", "Available"}; 835 if (data[0] & 0x7) // BIT1=policy, BIT2=monitoring, BIT3=pwr 836 // limit and the others are reserved 837 { 838 errLog = "PolicyInterface:" + nmCapStsStr[BIT(data[0], 0)] + 839 ",Monitoring:" + nmCapStsStr[BIT(data[0], 1)] + 840 ",PowerLimit:" + nmCapStsStr[BIT(data[0], 2)]; 841 } 842 else 843 { 844 errLog = "Unknown"; 845 } 846 } 847 848 static void logNmThreshold(uint8_t* data, std::string& errLog) 849 { 850 uint8_t thresNum = (data[0] & 0x3); 851 uint8_t domIdx = (data[1] & 0xf); 852 uint8_t polId = data[2]; 853 uint8_t polEvtIdx = BIT(data[0], 3); 854 const std::vector<std::string> polEvtStr = { 855 "Threshold Exceeded", "Policy Correction Time Exceeded"}; 856 857 errLog = "Threshold Number:" + std::to_string(thresNum) + "-" + 858 polEvtStr[polEvtIdx] + ", Domain:" + nmDomName[domIdx] + 859 ", PolicyID:0x" + byteToStr(polId); 860 } 861 862 static void logPwrThreshold(uint8_t* data, std::string& errLog) 863 { 864 if (data[0] == 0x00) 865 { 866 errLog = "Limit Not Exceeded"; 867 } 868 else if (data[0] == 0x01) 869 { 870 errLog = "Limit Exceeded"; 871 } 872 else 873 { 874 errLog = "Unknown"; 875 } 876 } 877 878 static void logMSMI(uint8_t* data, std::string& errLog) 879 { 880 if (data[0] == 0x0) 881 { 882 errLog = "IERR/MSMI"; 883 } 884 else if (data[0] == 0x0B) 885 { 886 errLog = "MCERR/MSMI"; 887 } 888 else 889 { 890 errLog = "Unknown"; 891 } 892 } 893 894 static void logHprWarn(uint8_t* data, std::string& errLog) 895 { 896 if (data[2] == 0x01) 897 { 898 if (data[1] == 0xFF) 899 { 900 errLog = "Infinite Time"; 901 } 902 else 903 { 904 errLog = std::to_string(data[1]) + " minutes"; 905 } 906 } 907 else 908 { 909 errLog = "Unknown"; 910 } 911 } 912 913 static const boost::container::flat_map< 914 uint8_t, 915 std::pair<std::string, std::function<void(uint8_t*, std::string&)>>> 916 sensorNameTable = { 917 {0xE9, {"SYSTEM_EVENT", logSysEvent}}, 918 {0x7D, {"THERM_THRESH_EVT", logThermalEvent}}, 919 {0xAA, {"BUTTON", logDefault}}, 920 {0xAB, {"POWER_STATE", logDefault}}, 921 {0xEA, {"CRITICAL_IRQ", logCritIrq}}, 922 {0x2B, {"POST_ERROR", logPostErr}}, 923 {0x40, {"MACHINE_CHK_ERR", logMchChkErr}}, 924 {0x41, {"PCIE_ERR", logPcieErr}}, 925 {0x43, {"IIO_ERR", logIioErr}}, 926 {0X63, {"MEMORY_ECC_ERR", logDefault}}, 927 {0X87, {"MEMORY_ERR_LOG_DIS", logDefault}}, 928 {0X51, {"PROCHOT_EXT", logDefault}}, 929 {0X56, {"PWR_ERR", logPwrErr}}, 930 {0xE6, {"CATERR_A", logCatErr}}, 931 {0xEB, {"CATERR_B", logCatErr}}, 932 {0xB3, {"CPU_DIMM_HOT", logDimmHot}}, 933 {0x90, {"SOFTWARE_NMI", logSwNMI}}, 934 {0x1C, {"CPU0_THERM_STATUS", logCPUThermalSts}}, 935 {0x1D, {"CPU1_THERM_STATUS", logCPUThermalSts}}, 936 {0x16, {"ME_POWER_STATE", logMEPwrState}}, 937 {0x17, {"SPS_FW_HEALTH", logSPSFwHealth}}, 938 {0x18, {"NM_EXCEPTION_A", logNmExcA}}, 939 {0x08, {"PCH_THERM_THRESHOLD", logPCHThermal}}, 940 {0x19, {"NM_HEALTH", logNmHealth}}, 941 {0x1A, {"NM_CAPABILITIES", logNmCap}}, 942 {0x1B, {"NM_THRESHOLD", logNmThreshold}}, 943 {0x3B, {"PWR_THRESH_EVT", logPwrThreshold}}, 944 {0xE7, {"MSMI", logMSMI}}, 945 {0xC5, {"HPR_WARNING", logHprWarn}}}; 946 947 static void parseSelHelper(StdSELEntry* data, std::string& errStr) 948 { 949 /* Check if sensor type is OS_BOOT (0x1f) */ 950 if (data->sensorType == 0x1F) 951 { 952 /* OS_BOOT used by OS */ 953 switch (data->eventData1 & 0xF) 954 { 955 case 0x07: 956 errStr = "Base OS/Hypervisor Installation started"; 957 break; 958 case 0x08: 959 errStr = "Base OS/Hypervisor Installation completed"; 960 break; 961 case 0x09: 962 errStr = "Base OS/Hypervisor Installation aborted"; 963 break; 964 case 0x0A: 965 errStr = "Base OS/Hypervisor Installation failed"; 966 break; 967 default: 968 errStr = "Unknown"; 969 } 970 return; 971 } 972 973 auto findSensorName = sensorNameTable.find(data->sensorNum); 974 if (findSensorName == sensorNameTable.end()) 975 { 976 errStr = "Unknown"; 977 return; 978 } 979 else 980 { 981 switch (data->sensorNum) 982 { 983 /* logMemErr function needs data from sensor type */ 984 case memoryEccError: 985 case memoryErrLogDIS: 986 findSensorName->second.second(&(data->sensorType), errStr); 987 break; 988 /* Other sensor function needs only event data for parsing */ 989 default: 990 findSensorName->second.second(&(data->eventData1), errStr); 991 } 992 } 993 994 if (((data->eventData3 & 0x80) >> 7) == 0) 995 { 996 errStr += " Assertion"; 997 } 998 else 999 { 1000 errStr += " Deassertion"; 1001 } 1002 } 1003 1004 static void parseDimmPhyloc(StdSELEntry* data, std::string& errStr) 1005 { 1006 // Log when " All info available" 1007 uint8_t chNum = (data->eventData3 & 0x18) >> 3; 1008 uint8_t dimmNum = data->eventData3 & 0x7; 1009 uint8_t rankNum = data->eventData2 & 0x03; 1010 uint8_t nodeNum = (data->eventData3 & 0xE0) >> 5; 1011 1012 if (chNum == 3 && dimmNum == 0) 1013 { 1014 errStr += " Node: " + std::to_string(nodeNum) + "," + 1015 " Card: " + std::to_string(chNum) + "," + 1016 " Module: " + std::to_string(dimmNum) + "," + 1017 " Rank Number: " + std::to_string(rankNum) + "," + 1018 " Location: DIMM A0"; 1019 } 1020 else if (chNum == 2 && dimmNum == 0) 1021 { 1022 errStr += " Node: " + std::to_string(nodeNum) + "," + 1023 " Card: " + std::to_string(chNum) + "," + 1024 " Module: " + std::to_string(dimmNum) + "," + 1025 " Rank Number: " + std::to_string(rankNum) + "," + 1026 " Location: DIMM B0"; 1027 } 1028 else if (chNum == 4 && dimmNum == 0) 1029 { 1030 errStr += " Node: " + std::to_string(nodeNum) + "," + 1031 " Card: " + std::to_string(chNum) + "," + 1032 " Module: " + std::to_string(dimmNum) + "," + 1033 " Rank Number: " + std::to_string(rankNum) + "," + 1034 " Location: DIMM C0 "; 1035 } 1036 else if (chNum == 5 && dimmNum == 0) 1037 { 1038 errStr += " Node: " + std::to_string(nodeNum) + "," + 1039 " Card: " + std::to_string(chNum) + "," + 1040 " Module: " + std::to_string(dimmNum) + "," + 1041 " Rank Number: " + std::to_string(rankNum) + "," + 1042 " Location: DIMM D0"; 1043 } 1044 else 1045 { 1046 errStr += " Node: " + std::to_string(nodeNum) + "," + 1047 " Card: " + std::to_string(chNum) + "," + 1048 " Module: " + std::to_string(dimmNum) + "," + 1049 " Rank Number: " + std::to_string(rankNum) + "," + 1050 " Location: DIMM Unknown"; 1051 } 1052 } 1053 1054 static void parseStdSel(StdSELEntry* data, std::string& errStr) 1055 { 1056 std::stringstream tmpStream; 1057 tmpStream << std::hex << std::uppercase; 1058 1059 /* TODO: add pal_add_cri_sel */ 1060 switch (data->sensorNum) 1061 { 1062 case memoryEccError: 1063 switch (data->eventData1 & 0x0F) 1064 { 1065 case 0x00: 1066 errStr = "Correctable"; 1067 tmpStream << "DIMM" << std::setw(2) << std::setfill('0') 1068 << data->eventData3 << " ECC err"; 1069 parseDimmPhyloc(data, errStr); 1070 break; 1071 case 0x01: 1072 errStr = "Uncorrectable"; 1073 tmpStream << "DIMM" << std::setw(2) << std::setfill('0') 1074 << data->eventData3 << " UECC err"; 1075 parseDimmPhyloc(data, errStr); 1076 break; 1077 case 0x02: 1078 errStr = "Parity"; 1079 break; 1080 case 0x05: 1081 errStr = "Correctable ECC error Logging Limit Reached"; 1082 break; 1083 default: 1084 errStr = "Unknown"; 1085 } 1086 break; 1087 case memoryErrLogDIS: 1088 if ((data->eventData1 & 0x0F) == 0) 1089 { 1090 errStr = "Correctable Memory Error Logging Disabled"; 1091 } 1092 else 1093 { 1094 errStr = "Unknown"; 1095 } 1096 break; 1097 default: 1098 parseSelHelper(data, errStr); 1099 return; 1100 } 1101 1102 errStr += " (DIMM " + std::to_string(data->eventData3) + ")"; 1103 errStr += " Logical Rank " + std::to_string(data->eventData2 & 0x03); 1104 1105 switch ((data->eventData2 & 0x0C) >> 2) 1106 { 1107 case 0x00: 1108 // Ignore when " All info available" 1109 break; 1110 case 0x01: 1111 errStr += " DIMM info not valid"; 1112 break; 1113 case 0x02: 1114 errStr += " CHN info not valid"; 1115 break; 1116 case 0x03: 1117 errStr += " CPU info not valid"; 1118 break; 1119 default: 1120 errStr += " Unknown"; 1121 } 1122 1123 if (((data->eventType & 0x80) >> 7) == 0) 1124 { 1125 errStr += " Assertion"; 1126 } 1127 else 1128 { 1129 errStr += " Deassertion"; 1130 } 1131 1132 return; 1133 } 1134 1135 static void parseOemSel(TsOemSELEntry* data, std::string& errStr) 1136 { 1137 std::stringstream tmpStream; 1138 tmpStream << std::hex << std::uppercase << std::setfill('0'); 1139 1140 switch (data->recordType) 1141 { 1142 case 0xC0: 1143 tmpStream << "VID:0x" << std::setw(2) << (int)data->oemData[1] 1144 << std::setw(2) << (int)data->oemData[0] << " DID:0x" 1145 << std::setw(2) << (int)data->oemData[3] << std::setw(2) 1146 << (int)data->oemData[2] << " Slot:0x" << std::setw(2) 1147 << (int)data->oemData[4] << " Error ID:0x" << std::setw(2) 1148 << (int)data->oemData[5]; 1149 break; 1150 case 0xC2: 1151 tmpStream << "Extra info:0x" << std::setw(2) 1152 << (int)data->oemData[1] << " MSCOD:0x" << std::setw(2) 1153 << (int)data->oemData[3] << std::setw(2) 1154 << (int)data->oemData[2] << " MCACOD:0x" << std::setw(2) 1155 << (int)data->oemData[5] << std::setw(2) 1156 << (int)data->oemData[4]; 1157 break; 1158 case 0xC3: 1159 int bank = (data->oemData[1] & 0xf0) >> 4; 1160 int col = ((data->oemData[1] & 0x0f) << 8) | data->oemData[2]; 1161 1162 tmpStream << "Fail Device:0x" << std::setw(2) 1163 << (int)data->oemData[0] << " Bank:0x" << std::setw(2) 1164 << bank << " Column:0x" << std::setw(2) << col 1165 << " Failed Row:0x" << std::setw(2) 1166 << (int)data->oemData[3] << std::setw(2) 1167 << (int)data->oemData[4] << std::setw(2) 1168 << (int)data->oemData[5]; 1169 } 1170 1171 errStr = tmpStream.str(); 1172 1173 return; 1174 } 1175 1176 static std::string dimmLocationStr(uint8_t socket, uint8_t channel, 1177 uint8_t slot) 1178 { 1179 uint8_t sled = (socket >> 4) & 0x3; 1180 1181 socket &= 0xf; 1182 if (channel == 0xFF && slot == 0xFF) 1183 { 1184 return std::format( 1185 "DIMM Slot Location: Sled {:02}/Socket {:02}, Channel unknown" 1186 ", Slot unknown, DIMM unknown", 1187 sled, socket); 1188 } 1189 else 1190 { 1191 channel &= 0xf; 1192 slot &= 0xf; 1193 const char label[] = {'A', 'C', 'B', 'D'}; 1194 uint8_t idx = socket * 2 + slot; 1195 return std::format("DIMM Slot Location: Sled {:02}/Socket {:02}" 1196 ", Channel {:02}, Slot {:02} DIMM {}", 1197 sled, socket, channel, slot, 1198 (idx < sizeof(label)) 1199 ? label[idx] + std::to_string(channel) 1200 : "NA"); 1201 } 1202 } 1203 1204 static void parseOemUnifiedSel(NtsOemSELEntry* data, std::string& errStr) 1205 { 1206 uint8_t* ptr = data->oemData; 1207 uint8_t eventType = ptr[5] & 0xf; 1208 int genInfo = ptr[0]; 1209 int errType = genInfo & 0x0f; 1210 std::vector<std::string> dimmErr = { 1211 "Memory training failure", 1212 "Memory correctable error", 1213 "Memory uncorrectable error", 1214 "Memory correctable error (Patrol scrub)", 1215 "Memory uncorrectable error (Patrol scrub)", 1216 "Memory Parity Error (PCC=0)", 1217 "Memory Parity Error (PCC=1)", 1218 "Memory PMIC Error", 1219 "CXL Memory training error", 1220 "Reserved"}; 1221 std::vector<std::string> postEvent = { 1222 "System PXE boot fail", 1223 "CMOS/NVRAM configuration cleared", 1224 "TPM Self-Test Fail", 1225 "Boot Drive failure", 1226 "Data Drive failure", 1227 "Received invalid boot order request from BMC", 1228 "System HTTP boot fail", 1229 "BIOS fails to get the certificate from BMC", 1230 "Password cleared by jumper", 1231 "DXE FV check failure", 1232 "AMD ABL failure", 1233 "Reserved"}; 1234 std::vector<std::string> certErr = { 1235 "No certificate at BMC", "IPMI transaction fail", 1236 "Certificate data corrupted", "Reserved"}; 1237 std::vector<std::string> pcieEvent = { 1238 "PCIe DPC Event", 1239 "PCIe LER Event", 1240 "PCIe Link Retraining and Recovery", 1241 "PCIe Link CRC Error Check and Retry", 1242 "PCIe Corrupt Data Containment", 1243 "PCIe Express ECRC", 1244 "Reserved"}; 1245 std::vector<std::string> memEvent = { 1246 "Memory PPR event", 1247 "Memory Correctable Error logging limit reached", 1248 "Memory disable/map-out for FRB", 1249 "Memory SDDC", 1250 "Memory Address range/Partial mirroring", 1251 "Memory ADDDC", 1252 "Memory SMBus hang recovery", 1253 "No DIMM in System", 1254 "Reserved"}; 1255 std::vector<std::string> memPprTime = {"Boot time", "Autonomous", 1256 "Run time", "Reserved"}; 1257 std::vector<std::string> memPpr = {"PPR success", "PPR fail", "PPR request", 1258 "Reserved"}; 1259 std::vector<std::string> memAdddc = { 1260 "Bank VLS", "r-Bank VLS + re-buddy", "r-Bank VLS + Rank VLS", 1261 "r-Rank VLS + re-buddy", "Reserved"}; 1262 std::vector<std::string> pprEvent = {"PPR disable", "Soft PPR", "Hard PPR", 1263 "Reserved"}; 1264 1265 std::stringstream tmpStream; 1266 1267 switch (errType) 1268 { 1269 case unifiedPcieErr: 1270 tmpStream << std::format( 1271 "GeneralInfo: x86/PCIeErr(0x{:02X})" 1272 ", Bus {:02X}/Dev {:02X}/Fun {:02X}, TotalErrID1Cnt: 0x{:04X}" 1273 ", ErrID2: 0x{:02X}, ErrID1: 0x{:02X}", 1274 genInfo, ptr[8], ptr[7] >> 3, ptr[7] & 0x7, 1275 (ptr[10] << 8) | ptr[9], ptr[11], ptr[12]); 1276 break; 1277 case unifiedMemErr: 1278 eventType = ptr[9] & 0xf; 1279 tmpStream << std::format( 1280 "GeneralInfo: MemErr(0x{:02X}), {}, DIMM Failure Event: {}", 1281 genInfo, dimmLocationStr(ptr[5], ptr[6], ptr[7]), 1282 dimmErr[std::min(eventType, 1283 static_cast<uint8_t>(dimmErr.size() - 1))]); 1284 1285 if (static_cast<MemErrType>(eventType) == MemErrType::memTrainErr || 1286 static_cast<MemErrType>(eventType) == MemErrType::memPmicErr) 1287 { 1288 bool amd = ptr[9] & 0x80; 1289 tmpStream << std::format( 1290 ", Major Code: 0x{:02X}, Minor Code: 0x{:0{}X}", ptr[10], 1291 amd ? (ptr[12] << 8 | ptr[11]) : ptr[11], amd ? 4 : 2); 1292 } 1293 break; 1294 case unifiedIioErr: 1295 tmpStream << std::format( 1296 "GeneralInfo: IIOErr(0x{:02X})" 1297 ", IIO Port Location: Sled {:02}/Socket {:02}, Stack 0x{:02X}" 1298 ", Error Type: 0x{:02X}, Error Severity: 0x{:02X}" 1299 ", Error ID: 0x{:02X}", 1300 genInfo, (ptr[5] >> 4) & 0x3, ptr[5] & 0xf, ptr[6], ptr[10], 1301 ptr[11] & 0xf, ptr[12]); 1302 break; 1303 case unifiedPostEvt: 1304 tmpStream << std::format( 1305 "GeneralInfo: POST(0x{:02X}), POST Failure Event: {}", genInfo, 1306 postEvent[std::min( 1307 eventType, static_cast<uint8_t>(postEvent.size() - 1))]); 1308 1309 switch (static_cast<PostEvtType>(eventType)) 1310 { 1311 case PostEvtType::pxeBootFail: 1312 case PostEvtType::httpBootFail: 1313 { 1314 uint8_t failType = ptr[10] & 0xf; 1315 tmpStream 1316 << std::format(", Fail Type: {}, Error Code: 0x{:02X}", 1317 (failType == 4 || failType == 6) 1318 ? std::format("IPv{} fail", failType) 1319 : std::format("0x{:02X}", ptr[10]), 1320 ptr[11]); 1321 break; 1322 } 1323 case PostEvtType::getCertFail: 1324 tmpStream << std::format( 1325 ", Failure Detail: {}", 1326 certErr[std::min( 1327 ptr[9], static_cast<uint8_t>(certErr.size() - 1))]); 1328 break; 1329 case PostEvtType::amdAblFail: 1330 tmpStream << std::format(", ABL Error Code: 0x{:04X}", 1331 (ptr[12] << 8) | ptr[11]); 1332 break; 1333 } 1334 break; 1335 case unifiedPcieEvt: 1336 tmpStream << std::format( 1337 "GeneralInfo: PCIeEvent(0x{:02X}), PCIe Failure Event: {}", 1338 genInfo, 1339 pcieEvent[std::min( 1340 eventType, static_cast<uint8_t>(pcieEvent.size() - 1))]); 1341 1342 if (static_cast<PcieEvtType>(eventType) == PcieEvtType::dpc) 1343 { 1344 tmpStream << std::format( 1345 ", Status: 0x{:04X}, Source ID: 0x{:04X}", 1346 (ptr[8] << 8) | ptr[7], (ptr[10] << 8) | ptr[9]); 1347 } 1348 break; 1349 case unifiedMemEvt: 1350 eventType = ptr[9] & 0xf; 1351 tmpStream 1352 << std::format("GeneralInfo: MemEvent(0x{:02X})", genInfo) 1353 << (static_cast<MemEvtType>(eventType) != MemEvtType::noDimm 1354 ? std::format(", {}", 1355 dimmLocationStr(ptr[5], ptr[6], ptr[7])) 1356 : "") 1357 << ", DIMM Failure Event: "; 1358 1359 switch (static_cast<MemEvtType>(eventType)) 1360 { 1361 case MemEvtType::ppr: 1362 tmpStream << std::format("{} {}", 1363 memPprTime[(ptr[10] >> 2) & 0x3], 1364 memPpr[ptr[10] & 0x3]); 1365 break; 1366 case MemEvtType::adddc: 1367 tmpStream << std::format( 1368 "{} {}", 1369 memEvent[std::min(eventType, static_cast<uint8_t>( 1370 memEvent.size() - 1))], 1371 memAdddc[std::min( 1372 static_cast<uint8_t>(ptr[11] & 0xf), 1373 static_cast<uint8_t>(memAdddc.size() - 1))]); 1374 break; 1375 default: 1376 tmpStream << std::format( 1377 "{}", memEvent[std::min( 1378 eventType, 1379 static_cast<uint8_t>(memEvent.size() - 1))]); 1380 break; 1381 } 1382 break; 1383 case unifiedBootGuard: 1384 tmpStream << std::format( 1385 "GeneralInfo: Boot Guard ACM Failure Events(0x{:02X})" 1386 ", Error Class: 0x{:02X}, Error Code: 0x{:02X}", 1387 genInfo, ptr[9], ptr[10]); 1388 break; 1389 case unifiedPprEvt: 1390 tmpStream << std::format( 1391 "GeneralInfo: PPREvent(0x{:02X}), {}" 1392 ", DIMM Info: {:02X}{:02X}{:02X}{:02X}{:02X}{:02X}{:02X}", 1393 genInfo, 1394 pprEvent[std::min(eventType, 1395 static_cast<uint8_t>(pprEvent.size() - 1))], 1396 ptr[6], ptr[7], ptr[8], ptr[9], ptr[10], ptr[11], ptr[12]); 1397 break; 1398 default: 1399 std::vector<uint8_t> oemData(ptr, ptr + 13); 1400 std::string oemDataStr; 1401 toHexStr(oemData, oemDataStr); 1402 tmpStream << std::format("Undefined Error Type(0x{:02X}), Raw: {}", 1403 errType, oemDataStr); 1404 } 1405 1406 errStr = tmpStream.str(); 1407 1408 return; 1409 } 1410 1411 static void parseSelData(uint8_t fruId, std::vector<uint8_t>& reqData, 1412 std::string& msgLog) 1413 { 1414 /* Get record type */ 1415 int recType = reqData[2]; 1416 std::string errType, errLog; 1417 1418 uint8_t* ptr = NULL; 1419 1420 std::stringstream recTypeStream; 1421 recTypeStream << std::hex << std::uppercase << std::setfill('0') 1422 << std::setw(2) << recType; 1423 1424 msgLog = "SEL Entry: FRU: " + std::to_string(fruId) + ", Record: "; 1425 1426 if (recType == stdErrType) 1427 { 1428 StdSELEntry* data = reinterpret_cast<StdSELEntry*>(&reqData[0]); 1429 std::string sensorName; 1430 1431 errType = stdErr; 1432 if (data->sensorType == 0x1F) 1433 { 1434 sensorName = "OS"; 1435 } 1436 else 1437 { 1438 auto findSensorName = sensorNameTable.find(data->sensorNum); 1439 if (findSensorName == sensorNameTable.end()) 1440 { 1441 sensorName = "Unknown"; 1442 } 1443 else 1444 { 1445 sensorName = findSensorName->second.first; 1446 } 1447 } 1448 1449 parseStdSel(data, errLog); 1450 ptr = &(data->eventData1); 1451 std::vector<uint8_t> evtData(ptr, ptr + 3); 1452 std::string eventData; 1453 toHexStr(evtData, eventData); 1454 1455 std::stringstream senNumStream; 1456 senNumStream << std::hex << std::uppercase << std::setfill('0') 1457 << std::setw(2) << (int)(data->sensorNum); 1458 1459 msgLog += errType + " (0x" + recTypeStream.str() + 1460 "), Sensor: " + sensorName + " (0x" + senNumStream.str() + 1461 "), Event Data: (" + eventData + ") " + errLog; 1462 } 1463 else if ((recType >= oemTSErrTypeMin) && (recType <= oemTSErrTypeMax)) 1464 { 1465 /* timestamped OEM SEL records */ 1466 TsOemSELEntry* data = reinterpret_cast<TsOemSELEntry*>(&reqData[0]); 1467 ptr = data->mfrId; 1468 std::vector<uint8_t> mfrIdData(ptr, ptr + 3); 1469 std::string mfrIdStr; 1470 toHexStr(mfrIdData, mfrIdStr); 1471 1472 ptr = data->oemData; 1473 std::vector<uint8_t> oemData(ptr, ptr + 6); 1474 std::string oemDataStr; 1475 toHexStr(oemData, oemDataStr); 1476 1477 errType = oemTSErr; 1478 parseOemSel(data, errLog); 1479 1480 msgLog += errType + " (0x" + recTypeStream.str() + "), MFG ID: " + 1481 mfrIdStr + ", OEM Data: (" + oemDataStr + ") " + errLog; 1482 } 1483 else if (recType == fbUniErrType) 1484 { 1485 NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]); 1486 errType = fbUniSELErr; 1487 parseOemUnifiedSel(data, errLog); 1488 msgLog += errType + " (0x" + recTypeStream.str() + "), " + errLog; 1489 } 1490 else if ((recType >= oemNTSErrTypeMin) && (recType <= oemNTSErrTypeMax)) 1491 { 1492 /* Non timestamped OEM SEL records */ 1493 NtsOemSELEntry* data = reinterpret_cast<NtsOemSELEntry*>(&reqData[0]); 1494 errType = oemNTSErr; 1495 1496 ptr = data->oemData; 1497 std::vector<uint8_t> oemData(ptr, ptr + 13); 1498 std::string oemDataStr; 1499 toHexStr(oemData, oemDataStr); 1500 1501 parseOemSel((TsOemSELEntry*)data, errLog); 1502 msgLog += errType + " (0x" + recTypeStream.str() + "), OEM Data: (" + 1503 oemDataStr + ") " + errLog; 1504 } 1505 else 1506 { 1507 errType = unknownErr; 1508 toHexStr(reqData, errLog); 1509 msgLog += errType + " (0x" + recTypeStream.str() + 1510 ") RawData: " + errLog; 1511 } 1512 } 1513 1514 } // namespace fb_oem::ipmi::sel 1515 1516 namespace ipmi 1517 { 1518 1519 namespace storage 1520 { 1521 1522 static void registerSELFunctions() __attribute__((constructor)); 1523 static fb_oem::ipmi::sel::SELData selObj __attribute__((init_priority(101))); 1524 1525 ipmi::RspType<uint8_t, // SEL version 1526 uint16_t, // SEL entry count 1527 uint16_t, // free space 1528 uint32_t, // last add timestamp 1529 uint32_t, // last erase timestamp 1530 uint8_t> // operation support 1531 ipmiStorageGetSELInfo() 1532 { 1533 fb_oem::ipmi::sel::GetSELInfoData info; 1534 1535 selObj.getInfo(info); 1536 return ipmi::responseSuccess(info.selVersion, info.entries, info.freeSpace, 1537 info.addTimeStamp, info.eraseTimeStamp, 1538 info.operationSupport); 1539 } 1540 1541 ipmi::RspType<uint16_t, std::vector<uint8_t>> 1542 ipmiStorageGetSELEntry(std::vector<uint8_t> data) 1543 { 1544 if (data.size() != sizeof(fb_oem::ipmi::sel::GetSELEntryRequest)) 1545 { 1546 return ipmi::responseReqDataLenInvalid(); 1547 } 1548 1549 fb_oem::ipmi::sel::GetSELEntryRequest* reqData = 1550 reinterpret_cast<fb_oem::ipmi::sel::GetSELEntryRequest*>(&data[0]); 1551 1552 if (reqData->reservID != 0) 1553 { 1554 if (!checkSELReservation(reqData->reservID)) 1555 { 1556 return ipmi::responseInvalidReservationId(); 1557 } 1558 } 1559 1560 uint16_t selCnt = selObj.getCount(); 1561 if (selCnt == 0) 1562 { 1563 return ipmi::responseSensorInvalid(); 1564 } 1565 1566 /* If it is asked for first entry */ 1567 if (reqData->recordID == fb_oem::ipmi::sel::firstEntry) 1568 { 1569 /* First Entry (0x0000) as per Spec */ 1570 reqData->recordID = 1; 1571 } 1572 else if (reqData->recordID == fb_oem::ipmi::sel::lastEntry) 1573 { 1574 /* Last entry (0xFFFF) as per Spec */ 1575 reqData->recordID = selCnt; 1576 } 1577 1578 std::string ipmiRaw; 1579 1580 if (selObj.getEntry(reqData->recordID, ipmiRaw) < 0) 1581 { 1582 return ipmi::responseSensorInvalid(); 1583 } 1584 1585 std::vector<uint8_t> recDataBytes; 1586 if (fromHexStr(ipmiRaw, recDataBytes) < 0) 1587 { 1588 return ipmi::responseUnspecifiedError(); 1589 } 1590 1591 /* Identify the next SEL record ID. If recordID is same as 1592 * total SeL count then next id should be last entry else 1593 * it should be incremented by 1 to current RecordID 1594 */ 1595 uint16_t nextRecord; 1596 if (reqData->recordID == selCnt) 1597 { 1598 nextRecord = fb_oem::ipmi::sel::lastEntry; 1599 } 1600 else 1601 { 1602 nextRecord = reqData->recordID + 1; 1603 } 1604 1605 if (reqData->readLen == fb_oem::ipmi::sel::entireRecord) 1606 { 1607 return ipmi::responseSuccess(nextRecord, recDataBytes); 1608 } 1609 else 1610 { 1611 if (reqData->offset >= fb_oem::ipmi::sel::selRecordSize || 1612 reqData->readLen > fb_oem::ipmi::sel::selRecordSize) 1613 { 1614 return ipmi::responseUnspecifiedError(); 1615 } 1616 std::vector<uint8_t> recPartData; 1617 1618 auto diff = fb_oem::ipmi::sel::selRecordSize - reqData->offset; 1619 auto readLength = std::min(diff, static_cast<int>(reqData->readLen)); 1620 1621 for (int i = 0; i < readLength; i++) 1622 { 1623 recPartData.push_back(recDataBytes[i + reqData->offset]); 1624 } 1625 return ipmi::responseSuccess(nextRecord, recPartData); 1626 } 1627 } 1628 1629 ipmi::RspType<uint16_t> 1630 ipmiStorageAddSELEntry(ipmi::Context::ptr ctx, std::vector<uint8_t> data) 1631 { 1632 /* Per the IPMI spec, need to cancel any reservation when a 1633 * SEL entry is added 1634 */ 1635 cancelSELReservation(); 1636 1637 if (data.size() != fb_oem::ipmi::sel::selRecordSize) 1638 { 1639 return ipmi::responseReqDataLenInvalid(); 1640 } 1641 1642 std::string ipmiRaw, logErr; 1643 toHexStr(data, ipmiRaw); 1644 1645 /* Parse sel data and get an error log to be filed */ 1646 fb_oem::ipmi::sel::parseSelData((ctx->hostIdx + 1), data, logErr); 1647 1648 static const std::string openBMCMessageRegistryVersion("0.1"); 1649 std::string messageID = 1650 "OpenBMC." + openBMCMessageRegistryVersion + ".SELEntryAdded"; 1651 1652 /* Log the Raw SEL message to the journal */ 1653 std::string journalMsg = "SEL Entry Added: " + ipmiRaw; 1654 1655 phosphor::logging::log<phosphor::logging::level::INFO>( 1656 journalMsg.c_str(), 1657 phosphor::logging::entry("IPMISEL_MESSAGE_ID=%s", messageID.c_str()), 1658 phosphor::logging::entry("IPMISEL_MESSAGE_ARGS=%s", logErr.c_str())); 1659 1660 std::map<std::string, std::string> ad; 1661 std::string severity = "xyz.openbmc_project.Logging.Entry.Level.Critical"; 1662 ad.emplace("IPMI_RAW", ipmiRaw); 1663 1664 auto bus = sdbusplus::bus::new_default(); 1665 auto reqMsg = bus.new_method_call( 1666 "xyz.openbmc_project.Logging", "/xyz/openbmc_project/logging", 1667 "xyz.openbmc_project.Logging.Create", "Create"); 1668 reqMsg.append(logErr, severity, ad); 1669 1670 try 1671 { 1672 bus.call(reqMsg); 1673 } 1674 catch (sdbusplus::exception_t& e) 1675 { 1676 phosphor::logging::log<phosphor::logging::level::ERR>(e.what()); 1677 } 1678 1679 int responseID = selObj.addEntry(ipmiRaw.c_str()); 1680 if (responseID < 0) 1681 { 1682 return ipmi::responseUnspecifiedError(); 1683 } 1684 return ipmi::responseSuccess((uint16_t)responseID); 1685 } 1686 1687 ipmi::RspType<uint8_t> ipmiStorageClearSEL(uint16_t reservationID, 1688 const std::array<uint8_t, 3>& clr, 1689 uint8_t eraseOperation) 1690 { 1691 if (!checkSELReservation(reservationID)) 1692 { 1693 return ipmi::responseInvalidReservationId(); 1694 } 1695 1696 static constexpr std::array<uint8_t, 3> clrExpected = {'C', 'L', 'R'}; 1697 if (clr != clrExpected) 1698 { 1699 return ipmi::responseInvalidFieldRequest(); 1700 } 1701 1702 /* If there is no sel then return erase complete */ 1703 if (selObj.getCount() == 0) 1704 { 1705 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete); 1706 } 1707 1708 /* Erasure status cannot be fetched, so always return erasure 1709 * status as `erase completed`. 1710 */ 1711 if (eraseOperation == fb_oem::ipmi::sel::getEraseStatus) 1712 { 1713 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete); 1714 } 1715 1716 /* Check that initiate erase is correct */ 1717 if (eraseOperation != fb_oem::ipmi::sel::initiateErase) 1718 { 1719 return ipmi::responseInvalidFieldRequest(); 1720 } 1721 1722 /* Per the IPMI spec, need to cancel any reservation when the 1723 * SEL is cleared 1724 */ 1725 cancelSELReservation(); 1726 1727 /* Clear the complete Sel Json object */ 1728 if (selObj.clear() < 0) 1729 { 1730 return ipmi::responseUnspecifiedError(); 1731 } 1732 1733 return ipmi::responseSuccess(fb_oem::ipmi::sel::eraseComplete); 1734 } 1735 1736 ipmi::RspType<uint32_t> ipmiStorageGetSELTime() 1737 { 1738 struct timespec selTime = {}; 1739 1740 if (clock_gettime(CLOCK_REALTIME, &selTime) < 0) 1741 { 1742 return ipmi::responseUnspecifiedError(); 1743 } 1744 1745 return ipmi::responseSuccess(selTime.tv_sec); 1746 } 1747 1748 ipmi::RspType<> ipmiStorageSetSELTime(uint32_t) 1749 { 1750 // Set SEL Time is not supported 1751 return ipmi::responseInvalidCommand(); 1752 } 1753 1754 ipmi::RspType<uint16_t> ipmiStorageGetSELTimeUtcOffset() 1755 { 1756 /* TODO: For now, the SEL time stamp is based on UTC time, 1757 * so return 0x0000 as offset. Might need to change once 1758 * supporting zones in SEL time stamps 1759 */ 1760 1761 uint16_t utcOffset = 0x0000; 1762 return ipmi::responseSuccess(utcOffset); 1763 } 1764 1765 void registerSELFunctions() 1766 { 1767 // <Get SEL Info> 1768 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1769 ipmi::storage::cmdGetSelInfo, ipmi::Privilege::User, 1770 ipmiStorageGetSELInfo); 1771 1772 // <Get SEL Entry> 1773 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1774 ipmi::storage::cmdGetSelEntry, ipmi::Privilege::User, 1775 ipmiStorageGetSELEntry); 1776 1777 // <Add SEL Entry> 1778 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1779 ipmi::storage::cmdAddSelEntry, 1780 ipmi::Privilege::Operator, ipmiStorageAddSELEntry); 1781 1782 // <Clear SEL> 1783 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1784 ipmi::storage::cmdClearSel, ipmi::Privilege::Operator, 1785 ipmiStorageClearSEL); 1786 1787 // <Get SEL Time> 1788 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1789 ipmi::storage::cmdGetSelTime, ipmi::Privilege::User, 1790 ipmiStorageGetSELTime); 1791 1792 // <Set SEL Time> 1793 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1794 ipmi::storage::cmdSetSelTime, 1795 ipmi::Privilege::Operator, ipmiStorageSetSELTime); 1796 1797 // <Get SEL Time UTC Offset> 1798 ipmi::registerHandler(ipmi::prioOpenBmcBase, ipmi::netFnStorage, 1799 ipmi::storage::cmdGetSelTimeUtcOffset, 1800 ipmi::Privilege::User, 1801 ipmiStorageGetSELTimeUtcOffset); 1802 1803 return; 1804 } 1805 1806 } // namespace storage 1807 } // namespace ipmi 1808