1 #include "oem_event_manager.hpp" 2 3 #include "requester/handler.hpp" 4 #include "requester/request.hpp" 5 6 #include <config.h> 7 #include <libpldm/pldm.h> 8 #include <libpldm/utils.h> 9 #include <systemd/sd-journal.h> 10 11 #include <phosphor-logging/lg2.hpp> 12 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 13 14 #include <algorithm> 15 #include <map> 16 #include <sstream> 17 #include <string> 18 #include <unordered_map> 19 20 namespace pldm 21 { 22 namespace oem_ampere 23 { 24 namespace boot_stage = boot::stage; 25 namespace ddr_status = ddr::status; 26 namespace dimm_status = dimm::status; 27 namespace dimm_syndrome = dimm::training_failure::dimm_syndrome; 28 namespace phy_syndrome = dimm::training_failure::phy_syndrome; 29 namespace training_failure = dimm::training_failure; 30 31 constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK"; 32 constexpr const char* ampereWarningRegistry = 33 "OpenBMC.0.1.AmpereWarning.Warning"; 34 constexpr const char* ampereCriticalRegistry = 35 "OpenBMC.0.1.AmpereCritical.Critical"; 36 constexpr const char* BIOSFWPanicRegistry = 37 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning"; 38 constexpr auto maxDIMMIdxBitNum = 24; 39 constexpr auto maxDIMMInstantNum = 24; 40 41 /* 42 An array of possible boot status of a boot stage. 43 The index maps with byte 0 of boot code. 44 */ 45 std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"}; 46 47 /* 48 An array of possible boot status of DDR training stage. 49 The index maps with byte 0 of boot code. 50 */ 51 std::array<std::string, 3> ddrTrainingMsg = { 52 " progress started", " in-progress", " progress completed"}; 53 54 /* 55 A map between PMIC status and logging strings. 56 */ 57 std::array<std::string, 8> pmicTempAlertMsg = { 58 "Below 85°C", "85°C", "95°C", "105°C", 59 "115°C", "125°C", "135°C", "Equal or greater than 140°C"}; 60 61 /* 62 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC 63 EPs through SMBus and PCIe. When host boots up, SMBUS interface 64 comes up first. In this interface, BMC is bus owner. 65 66 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available). 67 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available). 68 */ 69 EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}}; 70 71 /* 72 A map between sensor IDs and their names in string. 73 Using pldm::oem::sensor_ids 74 */ 75 EventToMsgMap_t sensorIdToStrMap = { 76 {DDR_STATUS, "DDR_STATUS"}, 77 {PCP_VR_STATE, "PCP_VR_STATE"}, 78 {SOC_VR_STATE, "SOC_VR_STATE"}, 79 {DPHY_VR1_STATE, "DPHY_VR1_STATE"}, 80 {DPHY_VR2_STATE, "DPHY_VR2_STATE"}, 81 {D2D_VR_STATE, "D2D_VR_STATE"}, 82 {IOC_VR1_STATE, "IOC_VR1_STATE"}, 83 {IOC_VR2_STATE, "IOC_VR2_STATE"}, 84 {PCI_D_VR_STATE, "PCI_D_VR_STATE"}, 85 {PCI_A_VR_STATE, "PCI_A_VR_STATE"}, 86 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"}, 87 {BOOT_OVERALL, "BOOT_OVERALL"}, 88 {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"}}; 89 90 /* 91 A map between the boot stages and logging strings. 92 Using pldm::oem::boot::stage::boot_stage 93 */ 94 EventToMsgMap_t bootStageToMsgMap = { 95 {boot_stage::SECPRO, "SECpro"}, 96 {boot_stage::MPRO, "Mpro"}, 97 {boot_stage::ATF_BL1, "ATF BL1"}, 98 {boot_stage::ATF_BL2, "ATF BL2"}, 99 {boot_stage::DDR_INITIALIZATION, "DDR initialization"}, 100 {boot_stage::DDR_TRAINING, "DDR training"}, 101 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"}, 102 {boot_stage::ATF_BL31, "ATF BL31"}, 103 {boot_stage::ATF_BL32, "ATF BL32"}, 104 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"}, 105 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN, 106 "ATF BL33 (UEFI) booting status = "}}; 107 108 /* 109 A map between DDR status and logging strings. 110 Using pldm::oem::ddr::status::ddr_status 111 */ 112 EventToMsgMap_t ddrStatusToMsgMap = { 113 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"}, 114 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"}, 115 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"}, 116 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"}, 117 {ddr_status::OTHER_FAILURE, "has other failure"}, 118 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG, 119 "has boot failure due to no configuration"}, 120 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS, 121 "failsafe activated but boot success with the next valid configuration"}}; 122 123 /* 124 A map between DIMM status and logging strings. 125 Using pldm::oem::dimm::status::dimm_status 126 */ 127 EventToMsgMap_t dimmStatusToMsgMap = { 128 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"}, 129 {dimm_status::NOT_INSTALLED, "is not installed"}, 130 {dimm_status::OTHER_FAILURE, "has other failure"}, 131 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"}, 132 {dimm_status::TRAINING_FAILURE, "has training failure; "}, 133 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}}; 134 135 /* 136 A map between PHY training failure syndrome and logging strings. 137 Using 138 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome 139 */ 140 EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = { 141 {phy_syndrome::NA, "(N/A)"}, 142 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"}, 143 {phy_syndrome::CA_LEVELING, "(CA leveling)"}, 144 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE, 145 "(PHY write level failure - see syndrome 1)"}, 146 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE, 147 "(PHY read gate leveling failure)"}, 148 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"}, 149 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"}, 150 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}}; 151 152 /* 153 A map between DIMM training failure syndrome and logging strings. 154 Using 155 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome 156 */ 157 EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = { 158 {dimm_syndrome::NA, "(N/A)"}, 159 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE, 160 "(DRAM VREFDQ training failure)"}, 161 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"}, 162 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE, 163 "(LRDRIMM DB SW training failure)"}}; 164 165 /* 166 A map between DIMM training failure type and a pair of <logging strings - 167 syndrome map>. Using 168 pldm::oem::dimm::training_faillure::dimm_training_failure_type 169 */ 170 std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>> 171 dimmTrainingFailureTypeMap = { 172 {training_failure::PHY_TRAINING_FAILURE_TYPE, 173 std::make_pair("PHY training failure", 174 phyTrainingFailureSyndromeToMsgMap)}, 175 {training_failure::DIMM_TRAINING_FAILURE_TYPE, 176 std::make_pair("DIMM training failure", 177 dimmTrainingFailureSyndromeToMsgMap)}}; 178 179 /* 180 A map between log level and the registry used for Redfish SEL log 181 Using pldm::oem::log_level 182 */ 183 std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = { 184 {log_level::OK, ampereEventRegistry}, 185 {log_level::WARNING, ampereWarningRegistry}, 186 {log_level::CRITICAL, ampereCriticalRegistry}, 187 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}}; 188 189 std::unordered_map< 190 uint16_t, 191 std::vector<std::pair< 192 std::string, 193 std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>> 194 stateSensorToMsgMap = { 195 {SOC_HEALTH_AVAILABILITY, 196 {{"SoC Health", 197 {{1, {log_level::OK, "Normal"}}, 198 {2, {log_level::WARNING, "Non-Critical"}}, 199 {3, {log_level::CRITICAL, "Critical"}}, 200 {4, {log_level::CRITICAL, "Fatal"}}}}, 201 {"SoC Availability", 202 {{1, {log_level::OK, "Enabled"}}, 203 {2, {log_level::WARNING, "Disabled"}}, 204 {3, {log_level::CRITICAL, "Shutdown"}}}}}}}; 205 206 std::string 207 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId) 208 { 209 std::string description; 210 if (!tidToSocketNameMap.contains(tid)) 211 { 212 description += "TID " + std::to_string(tid) + ": "; 213 } 214 else 215 { 216 description += tidToSocketNameMap[tid] + ": "; 217 } 218 219 if (!sensorIdToStrMap.contains(sensorId)) 220 { 221 description += "Sensor ID " + std::to_string(sensorId) + ": "; 222 } 223 else 224 { 225 description += sensorIdToStrMap[sensorId] + ": "; 226 } 227 228 return description; 229 } 230 231 void OemEventManager::sendJournalRedfish(const std::string& description, 232 log_level& logLevel) 233 { 234 if (description.empty()) 235 { 236 return; 237 } 238 239 if (!logLevelToRedfishMsgIdMap.contains(logLevel)) 240 { 241 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel, 242 "DES", description); 243 return; 244 } 245 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel]; 246 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID", 247 redfishMsgId, "REDFISH_MESSAGE_ARGS", description); 248 } 249 250 std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs) 251 { 252 std::string description; 253 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum)) 254 { 255 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx)) 256 { 257 description += " #" + std::to_string(bitIdx); 258 } 259 } 260 return description; 261 } 262 263 void OemEventManager::handleBootOverallEvent( 264 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading) 265 { 266 log_level logLevel{log_level::OK}; 267 std::string description; 268 std::stringstream strStream; 269 270 uint8_t byte0 = (presentReading & 0x000000ff); 271 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8; 272 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16; 273 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 274 /* 275 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31, 276 * ATF BL32 and DDR initialization 277 */ 278 if (bootStageToMsgMap.contains(byte3)) 279 { 280 // Boot stage adding 281 description += bootStageToMsgMap[byte3]; 282 283 switch (byte3) 284 { 285 case boot_stage::DDR_TRAINING: 286 if (byte0 >= ddrTrainingMsg.size()) 287 { 288 logLevel = log_level::BIOSFWPANIC; 289 description += " unknown status"; 290 } 291 else 292 { 293 description += ddrTrainingMsg[byte0]; 294 } 295 if (0x01 == byte0) 296 { 297 // Add complete percentage 298 description += " at " + std::to_string(byte1) + "%"; 299 } 300 break; 301 case boot_stage::S0_DDR_TRAINING_FAILURE: 302 case boot_stage::S1_DDR_TRAINING_FAILURE: 303 // ddr_training_status_msg() 304 logLevel = log_level::BIOSFWPANIC; 305 description += " at DIMMs:"; 306 // dimmIdxs = presentReading & 0x00ffffff; 307 description += dimmIdxsToString(presentReading & 0x00ffffff); 308 description += " of socket "; 309 description += 310 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1"; 311 break; 312 default: 313 if (byte0 >= bootStatMsg.size()) 314 { 315 logLevel = log_level::BIOSFWPANIC; 316 description += " unknown status"; 317 } 318 else 319 { 320 description += bootStatMsg[byte0]; 321 } 322 break; 323 } 324 325 // Sensor report action is fail 326 if (boot::status::BOOT_STATUS_FAILURE == byte2) 327 { 328 logLevel = log_level::BIOSFWPANIC; 329 } 330 } 331 else 332 { 333 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX) 334 { 335 description += 336 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN]; 337 338 strStream 339 << "Segment (0x" << std::setfill('0') << std::hex 340 << std::setw(8) << static_cast<uint32_t>(presentReading) 341 << "); Status Class (0x" << std::setw(2) 342 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x" 343 << std::setw(2) << static_cast<uint32_t>(byte2) 344 << "); Operation Code (0x" << std::setw(4) 345 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16) 346 << ")" << std::dec; 347 348 description += strStream.str(); 349 } 350 } 351 352 // Log to Redfish event 353 sendJournalRedfish(description, logLevel); 354 } 355 356 int OemEventManager::processNumericSensorEvent( 357 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 358 size_t sensorDataLength) 359 { 360 uint8_t eventState = 0; 361 uint8_t previousEventState = 0; 362 uint8_t sensorDataSize = 0; 363 uint32_t presentReading; 364 auto rc = decode_numeric_sensor_data( 365 sensorData, sensorDataLength, &eventState, &previousEventState, 366 &sensorDataSize, &presentReading); 367 if (rc) 368 { 369 lg2::error( 370 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ", 371 "TID", tid, "RC", rc); 372 return rc; 373 } 374 375 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) 376 if (auto dimmIdx = (sensorId - 4) / 2; 377 sensorId >= 4 && dimmIdx >= 0 && dimmIdx < maxDIMMInstantNum) 378 { 379 handleDIMMStatusEvent(tid, sensorId, presentReading); 380 return PLDM_SUCCESS; 381 } 382 383 switch (sensorId) 384 { 385 case BOOT_OVERALL: 386 handleBootOverallEvent(tid, sensorId, presentReading); 387 break; 388 case PCIE_HOT_PLUG: 389 handlePCIeHotPlugEvent(tid, sensorId, presentReading); 390 break; 391 case DDR_STATUS: 392 handleDDRStatusEvent(tid, sensorId, presentReading); 393 break; 394 case PCP_VR_STATE: 395 case SOC_VR_STATE: 396 case DPHY_VR1_STATE: 397 case DPHY_VR2_STATE: 398 case D2D_VR_STATE: 399 case IOC_VR1_STATE: 400 case IOC_VR2_STATE: 401 case PCI_D_VR_STATE: 402 case PCI_A_VR_STATE: 403 handleVRDStatusEvent(tid, sensorId, presentReading); 404 break; 405 default: 406 std::string description; 407 std::stringstream strStream; 408 log_level logLevel = log_level::OK; 409 410 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: "; 411 description += prefixMsgStrCreation(tid, sensorId); 412 strStream << std::setfill('0') << std::hex << "eventState 0x" 413 << std::setw(2) << static_cast<uint32_t>(eventState) 414 << " previousEventState 0x" << std::setw(2) 415 << static_cast<uint32_t>(previousEventState) 416 << " sensorDataSize 0x" << std::setw(2) 417 << static_cast<uint32_t>(sensorDataSize) 418 << " presentReading 0x" << std::setw(8) 419 << static_cast<uint32_t>(presentReading) << std::dec; 420 description += strStream.str(); 421 422 sendJournalRedfish(description, logLevel); 423 break; 424 } 425 return PLDM_SUCCESS; 426 } 427 428 int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId, 429 const uint8_t* sensorData, 430 size_t sensorDataLength) 431 { 432 uint8_t sensorOffset = 0; 433 uint8_t eventState = 0; 434 uint8_t previousEventState = 0; 435 436 auto rc = 437 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset, 438 &eventState, &previousEventState); 439 if (rc) 440 { 441 lg2::error( 442 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}", 443 "TID", tid, "RC", rc); 444 return rc; 445 } 446 447 std::string description; 448 log_level logLevel = log_level::OK; 449 450 if (stateSensorToMsgMap.contains(sensorId)) 451 { 452 description += prefixMsgStrCreation(tid, sensorId); 453 auto componentMap = stateSensorToMsgMap[sensorId]; 454 if (sensorOffset < componentMap.size()) 455 { 456 description += std::get<0>(componentMap[sensorOffset]); 457 auto stateMap = std::get<1>(componentMap[sensorOffset]); 458 if (stateMap.contains(eventState)) 459 { 460 logLevel = std::get<0>(stateMap[eventState]); 461 description += " state : " + std::get<1>(stateMap[eventState]); 462 if (stateMap.contains(previousEventState)) 463 { 464 description += "; previous state: " + 465 std::get<1>(stateMap[previousEventState]); 466 } 467 } 468 else 469 { 470 description += " sends unsupported event state: " + 471 std::to_string(eventState); 472 if (stateMap.contains(previousEventState)) 473 { 474 description += "; previous state: " + 475 std::get<1>(stateMap[previousEventState]); 476 } 477 } 478 } 479 else 480 { 481 description += "sends unsupported component sensor offset " + 482 std::to_string(sensorOffset); 483 } 484 } 485 else 486 { 487 std::stringstream strStream; 488 description += "SENSOR_EVENT : STATE_SENSOR_STATE: "; 489 description += prefixMsgStrCreation(tid, sensorId); 490 strStream << std::setfill('0') << std::hex << "sensorOffset 0x" 491 << std::setw(2) << static_cast<uint32_t>(sensorOffset) 492 << "eventState 0x" << std::setw(2) 493 << static_cast<uint32_t>(eventState) 494 << " previousEventState 0x" << std::setw(2) 495 << static_cast<uint32_t>(previousEventState) << std::dec; 496 description += strStream.str(); 497 } 498 499 sendJournalRedfish(description, logLevel); 500 501 return PLDM_SUCCESS; 502 } 503 504 int OemEventManager::processSensorOpStateEvent( 505 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 506 size_t sensorDataLength) 507 { 508 uint8_t present_op_state = 0; 509 uint8_t previous_op_state = 0; 510 511 auto rc = decode_sensor_op_data(sensorData, sensorDataLength, 512 &present_op_state, &previous_op_state); 513 if (rc) 514 { 515 lg2::error( 516 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}", 517 "TID", tid, "RC", rc); 518 return rc; 519 } 520 521 std::string description; 522 std::stringstream strStream; 523 log_level logLevel = log_level::OK; 524 525 description += "SENSOR_EVENT : SENSOR_OP_STATE: "; 526 description += prefixMsgStrCreation(tid, sensorId); 527 strStream << std::setfill('0') << std::hex << "present_op_state 0x" 528 << std::setw(2) << static_cast<uint32_t>(present_op_state) 529 << "previous_op_state 0x" << std::setw(2) 530 << static_cast<uint32_t>(previous_op_state) << std::dec; 531 description += strStream.str(); 532 533 sendJournalRedfish(description, logLevel); 534 535 return PLDM_SUCCESS; 536 } 537 538 int OemEventManager::handleSensorEvent( 539 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */, 540 pldm_tid_t tid, size_t eventDataOffset) 541 { 542 /* This OEM event handler is only used for SoC terminus*/ 543 if (!tidToSocketNameMap.contains(tid)) 544 { 545 return PLDM_SUCCESS; 546 } 547 auto eventData = 548 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset; 549 auto eventDataSize = payloadLength - eventDataOffset; 550 551 uint16_t sensorId = 0; 552 uint8_t sensorEventClassType = 0; 553 size_t eventClassDataOffset = 0; 554 auto rc = 555 decode_sensor_event_data(eventData, eventDataSize, &sensorId, 556 &sensorEventClassType, &eventClassDataOffset); 557 if (rc) 558 { 559 lg2::error("Failed to decode sensor event data return code {RC}.", "RC", 560 rc); 561 return rc; 562 } 563 const uint8_t* sensorData = eventData + eventClassDataOffset; 564 size_t sensorDataLength = eventDataSize - eventClassDataOffset; 565 566 switch (sensorEventClassType) 567 { 568 case PLDM_NUMERIC_SENSOR_STATE: 569 { 570 return processNumericSensorEvent(tid, sensorId, sensorData, 571 sensorDataLength); 572 } 573 case PLDM_STATE_SENSOR_STATE: 574 { 575 return processStateSensorEvent(tid, sensorId, sensorData, 576 sensorDataLength); 577 } 578 case PLDM_SENSOR_OP_STATE: 579 { 580 return processSensorOpStateEvent(tid, sensorId, sensorData, 581 sensorDataLength); 582 } 583 default: 584 std::string description; 585 std::stringstream strStream; 586 log_level logLevel = log_level::OK; 587 588 description += "SENSOR_EVENT : Unsupported Sensor Class " + 589 std::to_string(sensorEventClassType) + ": "; 590 description += prefixMsgStrCreation(tid, sensorId); 591 strStream << std::setfill('0') << std::hex 592 << std::setw(sizeof(sensorData) * 2) << "Sensor data: "; 593 594 auto dataPtr = sensorData; 595 for ([[maybe_unused]] const auto& i : 596 std::views::iota(0, (int)sensorDataLength)) 597 { 598 strStream << "0x" << static_cast<uint32_t>(*dataPtr); 599 dataPtr += sizeof(sensorData); 600 } 601 602 description += strStream.str(); 603 604 sendJournalRedfish(description, logLevel); 605 } 606 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE", 607 sensorEventClassType); 608 return PLDM_ERROR; 609 } 610 611 void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId, 612 uint32_t presentReading) 613 { 614 std::string description; 615 std::stringstream strStream; 616 PCIeHotPlugEventRecord_t record{presentReading}; 617 618 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal"; 619 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed"; 620 log_level logLevel = 621 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING; 622 623 description += prefixMsgStrCreation(tid, sensorId); 624 625 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2) 626 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x" 627 << std::setw(2) << static_cast<uint32_t>(record.bits.bus) 628 << "); Device (0x" << std::setw(2) 629 << static_cast<uint32_t>(record.bits.device) << "); Function (0x" 630 << std::setw(2) << static_cast<uint32_t>(record.bits.function) 631 << "); Action (" << sAction << "); Operation status (" 632 << sOpStatus << "); Media slot number (" << std::dec 633 << static_cast<uint32_t>(record.bits.mediaSlot) << ")"; 634 635 description += strStream.str(); 636 637 // Log to Redfish event 638 sendJournalRedfish(description, logLevel); 639 } 640 641 std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo) 642 { 643 std::string description; 644 DIMMTrainingFailure_t failure{failureInfo}; 645 646 if (dimmTrainingFailureTypeMap.contains(failure.bits.type)) 647 { 648 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type]; 649 650 description += std::get<0>(failureInfoMap); 651 652 description += "; MCU rank index " + 653 std::to_string(failure.bits.mcuRankIdx); 654 655 description += "; Slice number " + 656 std::to_string(failure.bits.sliceNum); 657 658 description += "; Upper nibble error status: "; 659 description += (!failure.bits.upperNibbStatErr) 660 ? "No error" 661 : "Found no rising edge"; 662 663 description += "; Lower nibble error status: "; 664 description += (!failure.bits.lowerNibbStatErr) 665 ? "No error" 666 : "Found no rising edge"; 667 668 description += "; Failure syndrome 0: "; 669 670 auto& syndromeMap = std::get<1>(failureInfoMap); 671 if (syndromeMap.contains(failure.bits.syndrome)) 672 { 673 description += syndromeMap[failure.bits.syndrome]; 674 } 675 else 676 { 677 description += "(Unknown syndrome)"; 678 } 679 } 680 else 681 { 682 description += "Unknown training failure type " + 683 std::to_string(failure.bits.type); 684 } 685 686 return description; 687 } 688 689 void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId, 690 uint32_t presentReading) 691 { 692 log_level logLevel{log_level::WARNING}; 693 std::string description; 694 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 695 uint32_t byte012 = presentReading & 0xffffff; 696 697 description += prefixMsgStrCreation(tid, sensorId); 698 699 uint8_t dimmIdx = (sensorId - 4) / 2; 700 701 description += "DIMM " + std::to_string(dimmIdx) + " "; 702 703 if (dimmStatusToMsgMap.contains(byte3)) 704 { 705 if (byte3 == dimm_status::INSTALLED_NO_ERROR || 706 byte3 == dimm_status::INSTALLED_BUT_DISABLED) 707 { 708 logLevel = log_level::OK; 709 } 710 711 description += dimmStatusToMsgMap[byte3]; 712 713 if (byte3 == dimm_status::TRAINING_FAILURE) 714 { 715 description += "; " + dimmTrainingFailureToMsg(byte012); 716 } 717 else if (byte3 == dimm_status::PMIC_TEMP_ALERT) 718 { 719 uint8_t byte0 = (byte012 & 0xff); 720 if (byte0 < pmicTempAlertMsg.size()) 721 { 722 description += ": " + pmicTempAlertMsg[byte0]; 723 } 724 } 725 } 726 else 727 { 728 switch (byte3) 729 { 730 case dimm_status::PMIC_HIGH_TEMP: 731 if (byte012 == 0x01) 732 { 733 description += "has PMIC high temp condition"; 734 } 735 break; 736 case dimm_status::TSx_HIGH_TEMP: 737 switch (byte012) 738 { 739 case 0x01: 740 description += "has TS0"; 741 break; 742 case 0x02: 743 description += "has TS1"; 744 break; 745 case 0x03: 746 description += "has TS0 and TS1"; 747 break; 748 } 749 description += " exceeding their high temperature threshold"; 750 break; 751 case dimm_status::SPD_HUB_HIGH_TEMP: 752 if (byte012 == 0x01) 753 { 754 description += "has SPD/HUB high temp condition"; 755 } 756 break; 757 default: 758 description += "has unsupported status " + 759 std::to_string(byte3); 760 break; 761 } 762 } 763 764 // Log to Redfish event 765 sendJournalRedfish(description, logLevel); 766 } 767 768 void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId, 769 uint32_t presentReading) 770 { 771 log_level logLevel{log_level::WARNING}; 772 std::string description; 773 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 774 uint32_t byte012 = presentReading & 0xffffff; 775 776 description += prefixMsgStrCreation(tid, sensorId); 777 778 description += "DDR "; 779 if (ddrStatusToMsgMap.contains(byte3)) 780 { 781 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR) 782 { 783 logLevel = log_level::OK; 784 } 785 786 description += ddrStatusToMsgMap[byte3]; 787 788 if (byte3 == ddr_status::CONFIGURATION_FAILURE || 789 byte3 == ddr_status::TRAINING_FAILURE) 790 { 791 // List out failed DIMMs 792 description += dimmIdxsToString(byte012); 793 } 794 } 795 else 796 { 797 description += "has unsupported status " + std::to_string(byte3); 798 } 799 800 // Log to Redfish event 801 sendJournalRedfish(description, logLevel); 802 } 803 804 void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId, 805 uint32_t presentReading) 806 { 807 log_level logLevel{log_level::WARNING}; 808 std::string description; 809 std::stringstream strStream; 810 811 description += prefixMsgStrCreation(tid, sensorId); 812 813 VRDStatus_t status{presentReading}; 814 815 if (status.bits.warning && status.bits.critical) 816 { 817 description += "A VR warning and a VR critical"; 818 logLevel = log_level::CRITICAL; 819 } 820 else 821 { 822 if (status.bits.warning) 823 { 824 description += "A VR warning"; 825 } 826 else if (status.bits.critical) 827 { 828 description += "A VR critical"; 829 logLevel = log_level::CRITICAL; 830 } 831 else 832 { 833 description += "No VR warning or critical"; 834 logLevel = log_level::OK; 835 } 836 } 837 description += " condition observed"; 838 839 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex 840 << std::setw(2) 841 << static_cast<uint32_t>(status.bits.vr_status_byte_high) 842 << "; VR status byte low is 0x" << std::setw(2) 843 << static_cast<uint32_t>(status.bits.vr_status_byte_low) 844 << "; Reading is 0x" << std::setw(2) 845 << static_cast<uint32_t>(presentReading) << ";"; 846 847 description += strStream.str(); 848 849 // Log to Redfish event 850 sendJournalRedfish(description, logLevel); 851 } 852 853 } // namespace oem_ampere 854 } // namespace pldm 855