1 #include "oem_event_manager.hpp" 2 3 #include "requester/handler.hpp" 4 #include "requester/request.hpp" 5 6 #include <config.h> 7 #include <libpldm/pldm.h> 8 #include <libpldm/utils.h> 9 #include <systemd/sd-journal.h> 10 11 #include <phosphor-logging/lg2.hpp> 12 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 13 14 #include <algorithm> 15 #include <map> 16 #include <sstream> 17 #include <string> 18 #include <unordered_map> 19 20 namespace pldm 21 { 22 namespace oem_ampere 23 { 24 namespace boot_stage = boot::stage; 25 namespace ddr_status = ddr::status; 26 namespace dimm_status = dimm::status; 27 namespace dimm_syndrome = dimm::training_failure::dimm_syndrome; 28 namespace phy_syndrome = dimm::training_failure::phy_syndrome; 29 namespace training_failure = dimm::training_failure; 30 31 constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK"; 32 constexpr const char* ampereWarningRegistry = 33 "OpenBMC.0.1.AmpereWarning.Warning"; 34 constexpr const char* ampereCriticalRegistry = 35 "OpenBMC.0.1.AmpereCritical.Critical"; 36 constexpr const char* BIOSFWPanicRegistry = 37 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning"; 38 constexpr auto maxDIMMIdxBitNum = 24; 39 constexpr auto maxDIMMInstantNum = 24; 40 41 /* 42 An array of possible boot status of a boot stage. 43 The index maps with byte 0 of boot code. 44 */ 45 std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"}; 46 47 /* 48 An array of possible boot status of DDR training stage. 49 The index maps with byte 0 of boot code. 50 */ 51 std::array<std::string, 3> ddrTrainingMsg = { 52 " progress started", " in-progress", " progress completed"}; 53 54 /* 55 A map between PMIC status and logging strings. 56 */ 57 std::array<std::string, 8> pmicTempAlertMsg = { 58 "Below 85°C", "85°C", "95°C", "105°C", 59 "115°C", "125°C", "135°C", "Equal or greater than 140°C"}; 60 61 /* 62 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC 63 EPs through SMBus and PCIe. When host boots up, SMBUS interface 64 comes up first. In this interface, BMC is bus owner. 65 66 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available). 67 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available). 68 */ 69 EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}}; 70 71 /* 72 A map between sensor IDs and their names in string. 73 Using pldm::oem::sensor_ids 74 */ 75 EventToMsgMap_t sensorIdToStrMap = { 76 {DDR_STATUS, "DDR_STATUS"}, {PCP_VR_STATE, "PCP_VR_STATE"}, 77 {SOC_VR_STATE, "SOC_VR_STATE"}, {DPHY_VR1_STATE, "DPHY_VR1_STATE"}, 78 {DPHY_VR2_STATE, "DPHY_VR2_STATE"}, {D2D_VR_STATE, "D2D_VR_STATE"}, 79 {IOC_VR1_STATE, "IOC_VR1_STATE"}, {IOC_VR2_STATE, "IOC_VR2_STATE"}, 80 {PCI_D_VR_STATE, "PCI_D_VR_STATE"}, {PCI_A_VR_STATE, "PCI_A_VR_STATE"}, 81 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"}, {BOOT_OVERALL, "BOOT_OVERALL"}}; 82 83 /* 84 A map between the boot stages and logging strings. 85 Using pldm::oem::boot::stage::boot_stage 86 */ 87 EventToMsgMap_t bootStageToMsgMap = { 88 {boot_stage::SECPRO, "SECpro"}, 89 {boot_stage::MPRO, "Mpro"}, 90 {boot_stage::ATF_BL1, "ATF BL1"}, 91 {boot_stage::ATF_BL2, "ATF BL2"}, 92 {boot_stage::DDR_INITIALIZATION, "DDR initialization"}, 93 {boot_stage::DDR_TRAINING, "DDR training"}, 94 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"}, 95 {boot_stage::ATF_BL31, "ATF BL31"}, 96 {boot_stage::ATF_BL32, "ATF BL32"}, 97 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"}, 98 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN, 99 "ATF BL33 (UEFI) booting status = "}}; 100 101 /* 102 A map between DDR status and logging strings. 103 Using pldm::oem::ddr::status::ddr_status 104 */ 105 EventToMsgMap_t ddrStatusToMsgMap = { 106 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"}, 107 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"}, 108 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"}, 109 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"}, 110 {ddr_status::OTHER_FAILURE, "has other failure"}, 111 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG, 112 "has boot failure due to no configuration"}, 113 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS, 114 "failsafe activated but boot success with the next valid configuration"}}; 115 116 /* 117 A map between DIMM status and logging strings. 118 Using pldm::oem::dimm::status::dimm_status 119 */ 120 EventToMsgMap_t dimmStatusToMsgMap = { 121 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"}, 122 {dimm_status::NOT_INSTALLED, "is not installed"}, 123 {dimm_status::OTHER_FAILURE, "has other failure"}, 124 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"}, 125 {dimm_status::TRAINING_FAILURE, "has training failure; "}, 126 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}}; 127 128 /* 129 A map between PHY training failure syndrome and logging strings. 130 Using 131 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome 132 */ 133 EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = { 134 {phy_syndrome::NA, "(N/A)"}, 135 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"}, 136 {phy_syndrome::CA_LEVELING, "(CA leveling)"}, 137 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE, 138 "(PHY write level failure - see syndrome 1)"}, 139 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE, 140 "(PHY read gate leveling failure)"}, 141 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"}, 142 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"}, 143 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}}; 144 145 /* 146 A map between DIMM training failure syndrome and logging strings. 147 Using 148 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome 149 */ 150 EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = { 151 {dimm_syndrome::NA, "(N/A)"}, 152 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE, 153 "(DRAM VREFDQ training failure)"}, 154 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"}, 155 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE, 156 "(LRDRIMM DB SW training failure)"}}; 157 158 /* 159 A map between DIMM training failure type and a pair of <logging strings - 160 syndrome map>. Using 161 pldm::oem::dimm::training_faillure::dimm_training_failure_type 162 */ 163 std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>> 164 dimmTrainingFailureTypeMap = { 165 {training_failure::PHY_TRAINING_FAILURE_TYPE, 166 std::make_pair("PHY training failure", 167 phyTrainingFailureSyndromeToMsgMap)}, 168 {training_failure::DIMM_TRAINING_FAILURE_TYPE, 169 std::make_pair("DIMM training failure", 170 dimmTrainingFailureSyndromeToMsgMap)}}; 171 172 /* 173 A map between log level and the registry used for Redfish SEL log 174 Using pldm::oem::log_level 175 */ 176 std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = { 177 {log_level::OK, ampereEventRegistry}, 178 {log_level::WARNING, ampereWarningRegistry}, 179 {log_level::CRITICAL, ampereCriticalRegistry}, 180 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}}; 181 182 std::string 183 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId) 184 { 185 std::string description; 186 if (!tidToSocketNameMap.contains(tid)) 187 { 188 description += "TID " + std::to_string(tid) + ": "; 189 } 190 else 191 { 192 description += tidToSocketNameMap[tid] + ": "; 193 } 194 195 if (!sensorIdToStrMap.contains(sensorId)) 196 { 197 description += "Sensor ID " + std::to_string(sensorId) + ": "; 198 } 199 else 200 { 201 description += sensorIdToStrMap[sensorId] + ": "; 202 } 203 204 return description; 205 } 206 207 void OemEventManager::sendJournalRedfish(const std::string& description, 208 log_level& logLevel) 209 { 210 if (description.empty()) 211 { 212 return; 213 } 214 215 if (!logLevelToRedfishMsgIdMap.contains(logLevel)) 216 { 217 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel, 218 "DES", description); 219 return; 220 } 221 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel]; 222 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID", 223 redfishMsgId, "REDFISH_MESSAGE_ARGS", description); 224 } 225 226 std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs) 227 { 228 std::string description; 229 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum)) 230 { 231 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx)) 232 { 233 description += " #" + std::to_string(bitIdx); 234 } 235 } 236 return description; 237 } 238 239 void OemEventManager::handleBootOverallEvent( 240 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading) 241 { 242 log_level logLevel{log_level::OK}; 243 std::string description; 244 std::stringstream strStream; 245 246 uint8_t byte0 = (presentReading & 0x000000ff); 247 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8; 248 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16; 249 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 250 /* 251 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31, 252 * ATF BL32 and DDR initialization 253 */ 254 if (bootStageToMsgMap.contains(byte3)) 255 { 256 // Boot stage adding 257 description += bootStageToMsgMap[byte3]; 258 259 switch (byte3) 260 { 261 case boot_stage::DDR_TRAINING: 262 if (byte0 >= ddrTrainingMsg.size()) 263 { 264 logLevel = log_level::BIOSFWPANIC; 265 description += " unknown status"; 266 } 267 else 268 { 269 description += ddrTrainingMsg[byte0]; 270 } 271 if (0x01 == byte0) 272 { 273 // Add complete percentage 274 description += " at " + std::to_string(byte1) + "%"; 275 } 276 break; 277 case boot_stage::S0_DDR_TRAINING_FAILURE: 278 case boot_stage::S1_DDR_TRAINING_FAILURE: 279 // ddr_training_status_msg() 280 logLevel = log_level::BIOSFWPANIC; 281 description += " at DIMMs:"; 282 // dimmIdxs = presentReading & 0x00ffffff; 283 description += dimmIdxsToString(presentReading & 0x00ffffff); 284 description += " of socket "; 285 description += 286 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1"; 287 break; 288 default: 289 if (byte0 >= bootStatMsg.size()) 290 { 291 logLevel = log_level::BIOSFWPANIC; 292 description += " unknown status"; 293 } 294 else 295 { 296 description += bootStatMsg[byte0]; 297 } 298 break; 299 } 300 301 // Sensor report action is fail 302 if (boot::status::BOOT_STATUS_FAILURE == byte2) 303 { 304 logLevel = log_level::BIOSFWPANIC; 305 } 306 } 307 else 308 { 309 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX) 310 { 311 description += 312 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN]; 313 314 strStream 315 << "Segment (0x" << std::setfill('0') << std::hex 316 << std::setw(8) << static_cast<uint32_t>(presentReading) 317 << "); Status Class (0x" << std::setw(2) 318 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x" 319 << std::setw(2) << static_cast<uint32_t>(byte2) 320 << "); Operation Code (0x" << std::setw(4) 321 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16) 322 << ")" << std::dec; 323 324 description += strStream.str(); 325 } 326 } 327 328 // Log to Redfish event 329 sendJournalRedfish(description, logLevel); 330 } 331 332 int OemEventManager::processNumericSensorEvent( 333 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 334 size_t sensorDataLength) 335 { 336 uint8_t eventState = 0; 337 uint8_t previousEventState = 0; 338 uint8_t sensorDataSize = 0; 339 uint32_t presentReading; 340 auto rc = decode_numeric_sensor_data( 341 sensorData, sensorDataLength, &eventState, &previousEventState, 342 &sensorDataSize, &presentReading); 343 if (rc) 344 { 345 lg2::error( 346 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ", 347 "TID", tid, "RC", rc); 348 return rc; 349 } 350 351 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) 352 if (auto dimmIdx = (sensorId - 4) / 2; 353 sensorId >= 4 && dimmIdx >= 0 && dimmIdx < maxDIMMInstantNum) 354 { 355 handleDIMMStatusEvent(tid, sensorId, presentReading); 356 return PLDM_SUCCESS; 357 } 358 359 switch (sensorId) 360 { 361 case BOOT_OVERALL: 362 handleBootOverallEvent(tid, sensorId, presentReading); 363 break; 364 case PCIE_HOT_PLUG: 365 handlePCIeHotPlugEvent(tid, sensorId, presentReading); 366 break; 367 case DDR_STATUS: 368 handleDDRStatusEvent(tid, sensorId, presentReading); 369 break; 370 case PCP_VR_STATE: 371 case SOC_VR_STATE: 372 case DPHY_VR1_STATE: 373 case DPHY_VR2_STATE: 374 case D2D_VR_STATE: 375 case IOC_VR1_STATE: 376 case IOC_VR2_STATE: 377 case PCI_D_VR_STATE: 378 case PCI_A_VR_STATE: 379 handleVRDStatusEvent(tid, sensorId, presentReading); 380 break; 381 default: 382 std::string description; 383 std::stringstream strStream; 384 log_level logLevel = log_level::OK; 385 386 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: "; 387 description += prefixMsgStrCreation(tid, sensorId); 388 strStream << std::setfill('0') << std::hex << "eventState 0x" 389 << std::setw(2) << static_cast<uint32_t>(eventState) 390 << " previousEventState 0x" << std::setw(2) 391 << static_cast<uint32_t>(previousEventState) 392 << " sensorDataSize 0x" << std::setw(2) 393 << static_cast<uint32_t>(sensorDataSize) 394 << " presentReading 0x" << std::setw(8) 395 << static_cast<uint32_t>(presentReading) << std::dec; 396 description += strStream.str(); 397 398 sendJournalRedfish(description, logLevel); 399 break; 400 } 401 return PLDM_SUCCESS; 402 } 403 404 int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId, 405 const uint8_t* sensorData, 406 size_t sensorDataLength) 407 { 408 uint8_t sensorOffset = 0; 409 uint8_t eventState = 0; 410 uint8_t previousEventState = 0; 411 412 auto rc = 413 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset, 414 &eventState, &previousEventState); 415 if (rc) 416 { 417 lg2::error( 418 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}", 419 "TID", tid, "RC", rc); 420 return rc; 421 } 422 423 std::string description; 424 std::stringstream strStream; 425 log_level logLevel = log_level::OK; 426 427 description += "SENSOR_EVENT : STATE_SENSOR_STATE: "; 428 description += prefixMsgStrCreation(tid, sensorId); 429 strStream << std::setfill('0') << std::hex << "sensorOffset 0x" 430 << std::setw(2) << static_cast<uint32_t>(sensorOffset) 431 << "eventState 0x" << std::setw(2) 432 << static_cast<uint32_t>(eventState) << " previousEventState 0x" 433 << std::setw(2) << static_cast<uint32_t>(previousEventState) 434 << std::dec; 435 description += strStream.str(); 436 437 sendJournalRedfish(description, logLevel); 438 439 return PLDM_SUCCESS; 440 } 441 442 int OemEventManager::processSensorOpStateEvent( 443 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 444 size_t sensorDataLength) 445 { 446 uint8_t present_op_state = 0; 447 uint8_t previous_op_state = 0; 448 449 auto rc = decode_sensor_op_data(sensorData, sensorDataLength, 450 &present_op_state, &previous_op_state); 451 if (rc) 452 { 453 lg2::error( 454 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}", 455 "TID", tid, "RC", rc); 456 return rc; 457 } 458 459 std::string description; 460 std::stringstream strStream; 461 log_level logLevel = log_level::OK; 462 463 description += "SENSOR_EVENT : SENSOR_OP_STATE: "; 464 description += prefixMsgStrCreation(tid, sensorId); 465 strStream << std::setfill('0') << std::hex << "present_op_state 0x" 466 << std::setw(2) << static_cast<uint32_t>(present_op_state) 467 << "previous_op_state 0x" << std::setw(2) 468 << static_cast<uint32_t>(previous_op_state) << std::dec; 469 description += strStream.str(); 470 471 sendJournalRedfish(description, logLevel); 472 473 return PLDM_SUCCESS; 474 } 475 476 int OemEventManager::handleSensorEvent( 477 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */, 478 pldm_tid_t tid, size_t eventDataOffset) 479 { 480 /* This OEM event handler is only used for SoC terminus*/ 481 if (!tidToSocketNameMap.contains(tid)) 482 { 483 return PLDM_SUCCESS; 484 } 485 auto eventData = 486 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset; 487 auto eventDataSize = payloadLength - eventDataOffset; 488 489 uint16_t sensorId = 0; 490 uint8_t sensorEventClassType = 0; 491 size_t eventClassDataOffset = 0; 492 auto rc = 493 decode_sensor_event_data(eventData, eventDataSize, &sensorId, 494 &sensorEventClassType, &eventClassDataOffset); 495 if (rc) 496 { 497 lg2::error("Failed to decode sensor event data return code {RC}.", "RC", 498 rc); 499 return rc; 500 } 501 const uint8_t* sensorData = eventData + eventClassDataOffset; 502 size_t sensorDataLength = eventDataSize - eventClassDataOffset; 503 504 switch (sensorEventClassType) 505 { 506 case PLDM_NUMERIC_SENSOR_STATE: 507 { 508 return processNumericSensorEvent(tid, sensorId, sensorData, 509 sensorDataLength); 510 } 511 case PLDM_STATE_SENSOR_STATE: 512 { 513 return processStateSensorEvent(tid, sensorId, sensorData, 514 sensorDataLength); 515 } 516 case PLDM_SENSOR_OP_STATE: 517 { 518 return processSensorOpStateEvent(tid, sensorId, sensorData, 519 sensorDataLength); 520 } 521 default: 522 std::string description; 523 std::stringstream strStream; 524 log_level logLevel = log_level::OK; 525 526 description += "SENSOR_EVENT : Unsupported Sensor Class " + 527 std::to_string(sensorEventClassType) + ": "; 528 description += prefixMsgStrCreation(tid, sensorId); 529 strStream << std::setfill('0') << std::hex 530 << std::setw(sizeof(sensorData) * 2) << "Sensor data: "; 531 532 auto dataPtr = sensorData; 533 for ([[maybe_unused]] const auto& i : 534 std::views::iota(0, (int)sensorDataLength)) 535 { 536 strStream << "0x" << static_cast<uint32_t>(*dataPtr); 537 dataPtr += sizeof(sensorData); 538 } 539 540 description += strStream.str(); 541 542 sendJournalRedfish(description, logLevel); 543 } 544 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE", 545 sensorEventClassType); 546 return PLDM_ERROR; 547 } 548 549 void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId, 550 uint32_t presentReading) 551 { 552 std::string description; 553 std::stringstream strStream; 554 PCIeHotPlugEventRecord_t record{presentReading}; 555 556 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal"; 557 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed"; 558 log_level logLevel = 559 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING; 560 561 description += prefixMsgStrCreation(tid, sensorId); 562 563 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2) 564 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x" 565 << std::setw(2) << static_cast<uint32_t>(record.bits.bus) 566 << "); Device (0x" << std::setw(2) 567 << static_cast<uint32_t>(record.bits.device) << "); Function (0x" 568 << std::setw(2) << static_cast<uint32_t>(record.bits.function) 569 << "); Action (" << sAction << "); Operation status (" 570 << sOpStatus << "); Media slot number (" << std::dec 571 << static_cast<uint32_t>(record.bits.mediaSlot) << ")"; 572 573 description += strStream.str(); 574 575 // Log to Redfish event 576 sendJournalRedfish(description, logLevel); 577 } 578 579 std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo) 580 { 581 std::string description; 582 DIMMTrainingFailure_t failure{failureInfo}; 583 584 if (dimmTrainingFailureTypeMap.contains(failure.bits.type)) 585 { 586 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type]; 587 588 description += std::get<0>(failureInfoMap); 589 590 description += "; MCU rank index " + 591 std::to_string(failure.bits.mcuRankIdx); 592 593 description += "; Slice number " + 594 std::to_string(failure.bits.sliceNum); 595 596 description += "; Upper nibble error status: "; 597 description += (!failure.bits.upperNibbStatErr) 598 ? "No error" 599 : "Found no rising edge"; 600 601 description += "; Lower nibble error status: "; 602 description += (!failure.bits.lowerNibbStatErr) 603 ? "No error" 604 : "Found no rising edge"; 605 606 description += "; Failure syndrome 0: "; 607 608 auto& syndromeMap = std::get<1>(failureInfoMap); 609 if (syndromeMap.contains(failure.bits.syndrome)) 610 { 611 description += syndromeMap[failure.bits.syndrome]; 612 } 613 else 614 { 615 description += "(Unknown syndrome)"; 616 } 617 } 618 else 619 { 620 description += "Unknown training failure type " + 621 std::to_string(failure.bits.type); 622 } 623 624 return description; 625 } 626 627 void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId, 628 uint32_t presentReading) 629 { 630 log_level logLevel{log_level::WARNING}; 631 std::string description; 632 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 633 uint32_t byte012 = presentReading & 0xffffff; 634 635 description += prefixMsgStrCreation(tid, sensorId); 636 637 uint8_t dimmIdx = (sensorId - 4) / 2; 638 639 description += "DIMM " + std::to_string(dimmIdx) + " "; 640 641 if (dimmStatusToMsgMap.contains(byte3)) 642 { 643 if (byte3 == dimm_status::INSTALLED_NO_ERROR || 644 byte3 == dimm_status::INSTALLED_BUT_DISABLED) 645 { 646 logLevel = log_level::OK; 647 } 648 649 description += dimmStatusToMsgMap[byte3]; 650 651 if (byte3 == dimm_status::TRAINING_FAILURE) 652 { 653 description += "; " + dimmTrainingFailureToMsg(byte012); 654 } 655 else if (byte3 == dimm_status::PMIC_TEMP_ALERT) 656 { 657 uint8_t byte0 = (byte012 & 0xff); 658 if (byte0 < pmicTempAlertMsg.size()) 659 { 660 description += ": " + pmicTempAlertMsg[byte0]; 661 } 662 } 663 } 664 else 665 { 666 switch (byte3) 667 { 668 case dimm_status::PMIC_HIGH_TEMP: 669 if (byte012 == 0x01) 670 { 671 description += "has PMIC high temp condition"; 672 } 673 break; 674 case dimm_status::TSx_HIGH_TEMP: 675 switch (byte012) 676 { 677 case 0x01: 678 description += "has TS0"; 679 break; 680 case 0x02: 681 description += "has TS1"; 682 break; 683 case 0x03: 684 description += "has TS0 and TS1"; 685 break; 686 } 687 description += " exceeding their high temperature threshold"; 688 break; 689 case dimm_status::SPD_HUB_HIGH_TEMP: 690 if (byte012 == 0x01) 691 { 692 description += "has SPD/HUB high temp condition"; 693 } 694 break; 695 default: 696 description += "has unsupported status " + 697 std::to_string(byte3); 698 break; 699 } 700 } 701 702 // Log to Redfish event 703 sendJournalRedfish(description, logLevel); 704 } 705 706 void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId, 707 uint32_t presentReading) 708 { 709 log_level logLevel{log_level::WARNING}; 710 std::string description; 711 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 712 uint32_t byte012 = presentReading & 0xffffff; 713 714 description += prefixMsgStrCreation(tid, sensorId); 715 716 description += "DDR "; 717 if (ddrStatusToMsgMap.contains(byte3)) 718 { 719 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR) 720 { 721 logLevel = log_level::OK; 722 } 723 724 description += ddrStatusToMsgMap[byte3]; 725 726 if (byte3 == ddr_status::CONFIGURATION_FAILURE || 727 byte3 == ddr_status::TRAINING_FAILURE) 728 { 729 // List out failed DIMMs 730 description += dimmIdxsToString(byte012); 731 } 732 } 733 else 734 { 735 description += "has unsupported status " + std::to_string(byte3); 736 } 737 738 // Log to Redfish event 739 sendJournalRedfish(description, logLevel); 740 } 741 742 void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId, 743 uint32_t presentReading) 744 { 745 log_level logLevel{log_level::WARNING}; 746 std::string description; 747 std::stringstream strStream; 748 749 description += prefixMsgStrCreation(tid, sensorId); 750 751 VRDStatus_t status{presentReading}; 752 753 if (status.bits.warning && status.bits.critical) 754 { 755 description += "A VR warning and a VR critical"; 756 logLevel = log_level::CRITICAL; 757 } 758 else 759 { 760 if (status.bits.warning) 761 { 762 description += "A VR warning"; 763 } 764 else if (status.bits.critical) 765 { 766 description += "A VR critical"; 767 logLevel = log_level::CRITICAL; 768 } 769 else 770 { 771 description += "No VR warning or critical"; 772 logLevel = log_level::OK; 773 } 774 } 775 description += " condition observed"; 776 777 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex 778 << std::setw(2) 779 << static_cast<uint32_t>(status.bits.vr_status_byte_high) 780 << "; VR status byte low is 0x" << std::setw(2) 781 << static_cast<uint32_t>(status.bits.vr_status_byte_low) 782 << "; Reading is 0x" << std::setw(2) 783 << static_cast<uint32_t>(presentReading) << ";"; 784 785 description += strStream.str(); 786 787 // Log to Redfish event 788 sendJournalRedfish(description, logLevel); 789 } 790 791 } // namespace oem_ampere 792 } // namespace pldm 793