1 #include "oem_event_manager.hpp" 2 3 #include "requester/handler.hpp" 4 #include "requester/request.hpp" 5 6 #include <config.h> 7 #include <libpldm/pldm.h> 8 #include <libpldm/utils.h> 9 #include <systemd/sd-journal.h> 10 11 #include <phosphor-logging/lg2.hpp> 12 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 13 14 #include <algorithm> 15 #include <map> 16 #include <sstream> 17 #include <string> 18 #include <unordered_map> 19 20 namespace pldm 21 { 22 namespace oem_ampere 23 { 24 namespace boot_stage = boot::stage; 25 namespace ddr_status = ddr::status; 26 namespace dimm_status = dimm::status; 27 namespace dimm_syndrome = dimm::training_failure::dimm_syndrome; 28 namespace phy_syndrome = dimm::training_failure::phy_syndrome; 29 namespace training_failure = dimm::training_failure; 30 31 constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK"; 32 constexpr const char* ampereWarningRegistry = 33 "OpenBMC.0.1.AmpereWarning.Warning"; 34 constexpr const char* ampereCriticalRegistry = 35 "OpenBMC.0.1.AmpereCritical.Critical"; 36 constexpr const char* BIOSFWPanicRegistry = 37 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning"; 38 constexpr auto maxDIMMIdxBitNum = 24; 39 constexpr auto maxDIMMInstantNum = 24; 40 41 /* 42 An array of possible boot status of a boot stage. 43 The index maps with byte 0 of boot code. 44 */ 45 std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"}; 46 47 /* 48 An array of possible boot status of DDR training stage. 49 The index maps with byte 0 of boot code. 50 */ 51 std::array<std::string, 3> ddrTrainingMsg = { 52 " progress started", " in-progress", " progress completed"}; 53 54 /* 55 A map between PMIC status and logging strings. 56 */ 57 std::array<std::string, 8> pmicTempAlertMsg = { 58 "Below 85°C", "85°C", "95°C", "105°C", 59 "115°C", "125°C", "135°C", "Equal or greater than 140°C"}; 60 61 /* 62 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC 63 EPs through SMBus and PCIe. When host boots up, SMBUS interface 64 comes up first. In this interface, BMC is bus owner. 65 66 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available). 67 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available). 68 */ 69 EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}}; 70 71 /* 72 A map between sensor IDs and their names in string. 73 Using pldm::oem::sensor_ids 74 */ 75 EventToMsgMap_t sensorIdToStrMap = { 76 {DDR_STATUS, "DDR_STATUS"}, 77 {PCP_VR_STATE, "PCP_VR_STATE"}, 78 {SOC_VR_STATE, "SOC_VR_STATE"}, 79 {DPHY_VR1_STATE, "DPHY_VR1_STATE"}, 80 {DPHY_VR2_STATE, "DPHY_VR2_STATE"}, 81 {D2D_VR_STATE, "D2D_VR_STATE"}, 82 {IOC_VR1_STATE, "IOC_VR1_STATE"}, 83 {IOC_VR2_STATE, "IOC_VR2_STATE"}, 84 {PCI_D_VR_STATE, "PCI_D_VR_STATE"}, 85 {PCI_A_VR_STATE, "PCI_A_VR_STATE"}, 86 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"}, 87 {BOOT_OVERALL, "BOOT_OVERALL"}, 88 {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"}, 89 {WATCH_DOG, "WATCH_DOG"}}; 90 91 /* 92 A map between the boot stages and logging strings. 93 Using pldm::oem::boot::stage::boot_stage 94 */ 95 EventToMsgMap_t bootStageToMsgMap = { 96 {boot_stage::SECPRO, "SECpro"}, 97 {boot_stage::MPRO, "Mpro"}, 98 {boot_stage::ATF_BL1, "ATF BL1"}, 99 {boot_stage::ATF_BL2, "ATF BL2"}, 100 {boot_stage::DDR_INITIALIZATION, "DDR initialization"}, 101 {boot_stage::DDR_TRAINING, "DDR training"}, 102 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"}, 103 {boot_stage::ATF_BL31, "ATF BL31"}, 104 {boot_stage::ATF_BL32, "ATF BL32"}, 105 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"}, 106 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN, 107 "ATF BL33 (UEFI) booting status = "}}; 108 109 /* 110 A map between DDR status and logging strings. 111 Using pldm::oem::ddr::status::ddr_status 112 */ 113 EventToMsgMap_t ddrStatusToMsgMap = { 114 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"}, 115 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"}, 116 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"}, 117 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"}, 118 {ddr_status::OTHER_FAILURE, "has other failure"}, 119 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG, 120 "has boot failure due to no configuration"}, 121 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS, 122 "failsafe activated but boot success with the next valid configuration"}}; 123 124 /* 125 A map between DIMM status and logging strings. 126 Using pldm::oem::dimm::status::dimm_status 127 */ 128 EventToMsgMap_t dimmStatusToMsgMap = { 129 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"}, 130 {dimm_status::NOT_INSTALLED, "is not installed"}, 131 {dimm_status::OTHER_FAILURE, "has other failure"}, 132 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"}, 133 {dimm_status::TRAINING_FAILURE, "has training failure; "}, 134 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}}; 135 136 /* 137 A map between PHY training failure syndrome and logging strings. 138 Using 139 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome 140 */ 141 EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = { 142 {phy_syndrome::NA, "(N/A)"}, 143 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"}, 144 {phy_syndrome::CA_LEVELING, "(CA leveling)"}, 145 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE, 146 "(PHY write level failure - see syndrome 1)"}, 147 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE, 148 "(PHY read gate leveling failure)"}, 149 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"}, 150 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"}, 151 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}}; 152 153 /* 154 A map between DIMM training failure syndrome and logging strings. 155 Using 156 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome 157 */ 158 EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = { 159 {dimm_syndrome::NA, "(N/A)"}, 160 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE, 161 "(DRAM VREFDQ training failure)"}, 162 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"}, 163 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE, 164 "(LRDRIMM DB SW training failure)"}}; 165 166 /* 167 A map between DIMM training failure type and a pair of <logging strings - 168 syndrome map>. Using 169 pldm::oem::dimm::training_faillure::dimm_training_failure_type 170 */ 171 std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>> 172 dimmTrainingFailureTypeMap = { 173 {training_failure::PHY_TRAINING_FAILURE_TYPE, 174 std::make_pair("PHY training failure", 175 phyTrainingFailureSyndromeToMsgMap)}, 176 {training_failure::DIMM_TRAINING_FAILURE_TYPE, 177 std::make_pair("DIMM training failure", 178 dimmTrainingFailureSyndromeToMsgMap)}}; 179 180 /* 181 A map between log level and the registry used for Redfish SEL log 182 Using pldm::oem::log_level 183 */ 184 std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = { 185 {log_level::OK, ampereEventRegistry}, 186 {log_level::WARNING, ampereWarningRegistry}, 187 {log_level::CRITICAL, ampereCriticalRegistry}, 188 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}}; 189 190 std::unordered_map< 191 uint16_t, 192 std::vector<std::pair< 193 std::string, 194 std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>> 195 stateSensorToMsgMap = { 196 {SOC_HEALTH_AVAILABILITY, 197 {{"SoC Health", 198 {{1, {log_level::OK, "Normal"}}, 199 {2, {log_level::WARNING, "Non-Critical"}}, 200 {3, {log_level::CRITICAL, "Critical"}}, 201 {4, {log_level::CRITICAL, "Fatal"}}}}, 202 {"SoC Availability", 203 {{1, {log_level::OK, "Enabled"}}, 204 {2, {log_level::WARNING, "Disabled"}}, 205 {3, {log_level::CRITICAL, "Shutdown"}}}}}}, 206 {WATCH_DOG, 207 {{"Global Watch Dog", 208 {{1, {log_level::OK, "Normal"}}, 209 {2, {log_level::CRITICAL, "Timer Expired"}}}}, 210 {"Secure Watch Dog", 211 {{1, {log_level::OK, "Normal"}}, 212 {2, {log_level::CRITICAL, "Timer Expired"}}}}, 213 {"Non-secure Watch Dog", 214 {{1, {log_level::OK, "Normal"}}, 215 {2, {log_level::CRITICAL, "Timer Expired"}}}}}}}; 216 217 std::string 218 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId) 219 { 220 std::string description; 221 if (!tidToSocketNameMap.contains(tid)) 222 { 223 description += "TID " + std::to_string(tid) + ": "; 224 } 225 else 226 { 227 description += tidToSocketNameMap[tid] + ": "; 228 } 229 230 if (!sensorIdToStrMap.contains(sensorId)) 231 { 232 description += "Sensor ID " + std::to_string(sensorId) + ": "; 233 } 234 else 235 { 236 description += sensorIdToStrMap[sensorId] + ": "; 237 } 238 239 return description; 240 } 241 242 void OemEventManager::sendJournalRedfish(const std::string& description, 243 log_level& logLevel) 244 { 245 if (description.empty()) 246 { 247 return; 248 } 249 250 if (!logLevelToRedfishMsgIdMap.contains(logLevel)) 251 { 252 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel, 253 "DES", description); 254 return; 255 } 256 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel]; 257 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID", 258 redfishMsgId, "REDFISH_MESSAGE_ARGS", description); 259 } 260 261 std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs) 262 { 263 std::string description; 264 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum)) 265 { 266 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx)) 267 { 268 description += " #" + std::to_string(bitIdx); 269 } 270 } 271 return description; 272 } 273 274 void OemEventManager::handleBootOverallEvent( 275 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading) 276 { 277 log_level logLevel{log_level::OK}; 278 std::string description; 279 std::stringstream strStream; 280 281 uint8_t byte0 = (presentReading & 0x000000ff); 282 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8; 283 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16; 284 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 285 /* 286 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31, 287 * ATF BL32 and DDR initialization 288 */ 289 if (bootStageToMsgMap.contains(byte3)) 290 { 291 // Boot stage adding 292 description += bootStageToMsgMap[byte3]; 293 294 switch (byte3) 295 { 296 case boot_stage::DDR_TRAINING: 297 if (byte0 >= ddrTrainingMsg.size()) 298 { 299 logLevel = log_level::BIOSFWPANIC; 300 description += " unknown status"; 301 } 302 else 303 { 304 description += ddrTrainingMsg[byte0]; 305 } 306 if (0x01 == byte0) 307 { 308 // Add complete percentage 309 description += " at " + std::to_string(byte1) + "%"; 310 } 311 break; 312 case boot_stage::S0_DDR_TRAINING_FAILURE: 313 case boot_stage::S1_DDR_TRAINING_FAILURE: 314 // ddr_training_status_msg() 315 logLevel = log_level::BIOSFWPANIC; 316 description += " at DIMMs:"; 317 // dimmIdxs = presentReading & 0x00ffffff; 318 description += dimmIdxsToString(presentReading & 0x00ffffff); 319 description += " of socket "; 320 description += 321 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1"; 322 break; 323 default: 324 if (byte0 >= bootStatMsg.size()) 325 { 326 logLevel = log_level::BIOSFWPANIC; 327 description += " unknown status"; 328 } 329 else 330 { 331 description += bootStatMsg[byte0]; 332 } 333 break; 334 } 335 336 // Sensor report action is fail 337 if (boot::status::BOOT_STATUS_FAILURE == byte2) 338 { 339 logLevel = log_level::BIOSFWPANIC; 340 } 341 } 342 else 343 { 344 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX) 345 { 346 description += 347 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN]; 348 349 strStream 350 << "Segment (0x" << std::setfill('0') << std::hex 351 << std::setw(8) << static_cast<uint32_t>(presentReading) 352 << "); Status Class (0x" << std::setw(2) 353 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x" 354 << std::setw(2) << static_cast<uint32_t>(byte2) 355 << "); Operation Code (0x" << std::setw(4) 356 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16) 357 << ")" << std::dec; 358 359 description += strStream.str(); 360 } 361 } 362 363 // Log to Redfish event 364 sendJournalRedfish(description, logLevel); 365 } 366 367 int OemEventManager::processNumericSensorEvent( 368 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 369 size_t sensorDataLength) 370 { 371 uint8_t eventState = 0; 372 uint8_t previousEventState = 0; 373 uint8_t sensorDataSize = 0; 374 uint32_t presentReading; 375 auto rc = decode_numeric_sensor_data( 376 sensorData, sensorDataLength, &eventState, &previousEventState, 377 &sensorDataSize, &presentReading); 378 if (rc) 379 { 380 lg2::error( 381 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ", 382 "TID", tid, "RC", rc); 383 return rc; 384 } 385 386 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) 387 if (auto dimmIdx = (sensorId - 4) / 2; 388 sensorId >= 4 && dimmIdx >= 0 && dimmIdx < maxDIMMInstantNum) 389 { 390 handleDIMMStatusEvent(tid, sensorId, presentReading); 391 return PLDM_SUCCESS; 392 } 393 394 switch (sensorId) 395 { 396 case BOOT_OVERALL: 397 handleBootOverallEvent(tid, sensorId, presentReading); 398 break; 399 case PCIE_HOT_PLUG: 400 handlePCIeHotPlugEvent(tid, sensorId, presentReading); 401 break; 402 case DDR_STATUS: 403 handleDDRStatusEvent(tid, sensorId, presentReading); 404 break; 405 case PCP_VR_STATE: 406 case SOC_VR_STATE: 407 case DPHY_VR1_STATE: 408 case DPHY_VR2_STATE: 409 case D2D_VR_STATE: 410 case IOC_VR1_STATE: 411 case IOC_VR2_STATE: 412 case PCI_D_VR_STATE: 413 case PCI_A_VR_STATE: 414 handleVRDStatusEvent(tid, sensorId, presentReading); 415 break; 416 case WATCH_DOG: 417 handleNumericWatchdogEvent(tid, sensorId, presentReading); 418 break; 419 default: 420 std::string description; 421 std::stringstream strStream; 422 log_level logLevel = log_level::OK; 423 424 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: "; 425 description += prefixMsgStrCreation(tid, sensorId); 426 strStream << std::setfill('0') << std::hex << "eventState 0x" 427 << std::setw(2) << static_cast<uint32_t>(eventState) 428 << " previousEventState 0x" << std::setw(2) 429 << static_cast<uint32_t>(previousEventState) 430 << " sensorDataSize 0x" << std::setw(2) 431 << static_cast<uint32_t>(sensorDataSize) 432 << " presentReading 0x" << std::setw(8) 433 << static_cast<uint32_t>(presentReading) << std::dec; 434 description += strStream.str(); 435 436 sendJournalRedfish(description, logLevel); 437 break; 438 } 439 return PLDM_SUCCESS; 440 } 441 442 int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId, 443 const uint8_t* sensorData, 444 size_t sensorDataLength) 445 { 446 uint8_t sensorOffset = 0; 447 uint8_t eventState = 0; 448 uint8_t previousEventState = 0; 449 450 auto rc = 451 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset, 452 &eventState, &previousEventState); 453 if (rc) 454 { 455 lg2::error( 456 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}", 457 "TID", tid, "RC", rc); 458 return rc; 459 } 460 461 std::string description; 462 log_level logLevel = log_level::OK; 463 464 if (stateSensorToMsgMap.contains(sensorId)) 465 { 466 description += prefixMsgStrCreation(tid, sensorId); 467 auto componentMap = stateSensorToMsgMap[sensorId]; 468 if (sensorOffset < componentMap.size()) 469 { 470 description += std::get<0>(componentMap[sensorOffset]); 471 auto stateMap = std::get<1>(componentMap[sensorOffset]); 472 if (stateMap.contains(eventState)) 473 { 474 logLevel = std::get<0>(stateMap[eventState]); 475 description += " state : " + std::get<1>(stateMap[eventState]); 476 if (stateMap.contains(previousEventState)) 477 { 478 description += "; previous state: " + 479 std::get<1>(stateMap[previousEventState]); 480 } 481 } 482 else 483 { 484 description += " sends unsupported event state: " + 485 std::to_string(eventState); 486 if (stateMap.contains(previousEventState)) 487 { 488 description += "; previous state: " + 489 std::get<1>(stateMap[previousEventState]); 490 } 491 } 492 } 493 else 494 { 495 description += "sends unsupported component sensor offset " + 496 std::to_string(sensorOffset); 497 } 498 } 499 else 500 { 501 std::stringstream strStream; 502 description += "SENSOR_EVENT : STATE_SENSOR_STATE: "; 503 description += prefixMsgStrCreation(tid, sensorId); 504 strStream << std::setfill('0') << std::hex << "sensorOffset 0x" 505 << std::setw(2) << static_cast<uint32_t>(sensorOffset) 506 << "eventState 0x" << std::setw(2) 507 << static_cast<uint32_t>(eventState) 508 << " previousEventState 0x" << std::setw(2) 509 << static_cast<uint32_t>(previousEventState) << std::dec; 510 description += strStream.str(); 511 } 512 513 sendJournalRedfish(description, logLevel); 514 515 return PLDM_SUCCESS; 516 } 517 518 int OemEventManager::processSensorOpStateEvent( 519 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 520 size_t sensorDataLength) 521 { 522 uint8_t present_op_state = 0; 523 uint8_t previous_op_state = 0; 524 525 auto rc = decode_sensor_op_data(sensorData, sensorDataLength, 526 &present_op_state, &previous_op_state); 527 if (rc) 528 { 529 lg2::error( 530 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}", 531 "TID", tid, "RC", rc); 532 return rc; 533 } 534 535 std::string description; 536 std::stringstream strStream; 537 log_level logLevel = log_level::OK; 538 539 description += "SENSOR_EVENT : SENSOR_OP_STATE: "; 540 description += prefixMsgStrCreation(tid, sensorId); 541 strStream << std::setfill('0') << std::hex << "present_op_state 0x" 542 << std::setw(2) << static_cast<uint32_t>(present_op_state) 543 << "previous_op_state 0x" << std::setw(2) 544 << static_cast<uint32_t>(previous_op_state) << std::dec; 545 description += strStream.str(); 546 547 sendJournalRedfish(description, logLevel); 548 549 return PLDM_SUCCESS; 550 } 551 552 int OemEventManager::handleSensorEvent( 553 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */, 554 pldm_tid_t tid, size_t eventDataOffset) 555 { 556 /* This OEM event handler is only used for SoC terminus*/ 557 if (!tidToSocketNameMap.contains(tid)) 558 { 559 return PLDM_SUCCESS; 560 } 561 auto eventData = 562 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset; 563 auto eventDataSize = payloadLength - eventDataOffset; 564 565 uint16_t sensorId = 0; 566 uint8_t sensorEventClassType = 0; 567 size_t eventClassDataOffset = 0; 568 auto rc = 569 decode_sensor_event_data(eventData, eventDataSize, &sensorId, 570 &sensorEventClassType, &eventClassDataOffset); 571 if (rc) 572 { 573 lg2::error("Failed to decode sensor event data return code {RC}.", "RC", 574 rc); 575 return rc; 576 } 577 const uint8_t* sensorData = eventData + eventClassDataOffset; 578 size_t sensorDataLength = eventDataSize - eventClassDataOffset; 579 580 switch (sensorEventClassType) 581 { 582 case PLDM_NUMERIC_SENSOR_STATE: 583 { 584 return processNumericSensorEvent(tid, sensorId, sensorData, 585 sensorDataLength); 586 } 587 case PLDM_STATE_SENSOR_STATE: 588 { 589 return processStateSensorEvent(tid, sensorId, sensorData, 590 sensorDataLength); 591 } 592 case PLDM_SENSOR_OP_STATE: 593 { 594 return processSensorOpStateEvent(tid, sensorId, sensorData, 595 sensorDataLength); 596 } 597 default: 598 std::string description; 599 std::stringstream strStream; 600 log_level logLevel = log_level::OK; 601 602 description += "SENSOR_EVENT : Unsupported Sensor Class " + 603 std::to_string(sensorEventClassType) + ": "; 604 description += prefixMsgStrCreation(tid, sensorId); 605 strStream << std::setfill('0') << std::hex 606 << std::setw(sizeof(sensorData) * 2) << "Sensor data: "; 607 608 auto dataPtr = sensorData; 609 for ([[maybe_unused]] const auto& i : 610 std::views::iota(0, (int)sensorDataLength)) 611 { 612 strStream << "0x" << static_cast<uint32_t>(*dataPtr); 613 dataPtr += sizeof(sensorData); 614 } 615 616 description += strStream.str(); 617 618 sendJournalRedfish(description, logLevel); 619 } 620 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE", 621 sensorEventClassType); 622 return PLDM_ERROR; 623 } 624 625 void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId, 626 uint32_t presentReading) 627 { 628 std::string description; 629 std::stringstream strStream; 630 PCIeHotPlugEventRecord_t record{presentReading}; 631 632 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal"; 633 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed"; 634 log_level logLevel = 635 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING; 636 637 description += prefixMsgStrCreation(tid, sensorId); 638 639 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2) 640 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x" 641 << std::setw(2) << static_cast<uint32_t>(record.bits.bus) 642 << "); Device (0x" << std::setw(2) 643 << static_cast<uint32_t>(record.bits.device) << "); Function (0x" 644 << std::setw(2) << static_cast<uint32_t>(record.bits.function) 645 << "); Action (" << sAction << "); Operation status (" 646 << sOpStatus << "); Media slot number (" << std::dec 647 << static_cast<uint32_t>(record.bits.mediaSlot) << ")"; 648 649 description += strStream.str(); 650 651 // Log to Redfish event 652 sendJournalRedfish(description, logLevel); 653 } 654 655 std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo) 656 { 657 std::string description; 658 DIMMTrainingFailure_t failure{failureInfo}; 659 660 if (dimmTrainingFailureTypeMap.contains(failure.bits.type)) 661 { 662 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type]; 663 664 description += std::get<0>(failureInfoMap); 665 666 description += "; MCU rank index " + 667 std::to_string(failure.bits.mcuRankIdx); 668 669 description += "; Slice number " + 670 std::to_string(failure.bits.sliceNum); 671 672 description += "; Upper nibble error status: "; 673 description += (!failure.bits.upperNibbStatErr) 674 ? "No error" 675 : "Found no rising edge"; 676 677 description += "; Lower nibble error status: "; 678 description += (!failure.bits.lowerNibbStatErr) 679 ? "No error" 680 : "Found no rising edge"; 681 682 description += "; Failure syndrome 0: "; 683 684 auto& syndromeMap = std::get<1>(failureInfoMap); 685 if (syndromeMap.contains(failure.bits.syndrome)) 686 { 687 description += syndromeMap[failure.bits.syndrome]; 688 } 689 else 690 { 691 description += "(Unknown syndrome)"; 692 } 693 } 694 else 695 { 696 description += "Unknown training failure type " + 697 std::to_string(failure.bits.type); 698 } 699 700 return description; 701 } 702 703 void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId, 704 uint32_t presentReading) 705 { 706 log_level logLevel{log_level::WARNING}; 707 std::string description; 708 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 709 uint32_t byte012 = presentReading & 0xffffff; 710 711 description += prefixMsgStrCreation(tid, sensorId); 712 713 uint8_t dimmIdx = (sensorId - 4) / 2; 714 715 description += "DIMM " + std::to_string(dimmIdx) + " "; 716 717 if (dimmStatusToMsgMap.contains(byte3)) 718 { 719 if (byte3 == dimm_status::INSTALLED_NO_ERROR || 720 byte3 == dimm_status::INSTALLED_BUT_DISABLED) 721 { 722 logLevel = log_level::OK; 723 } 724 725 description += dimmStatusToMsgMap[byte3]; 726 727 if (byte3 == dimm_status::TRAINING_FAILURE) 728 { 729 description += "; " + dimmTrainingFailureToMsg(byte012); 730 } 731 else if (byte3 == dimm_status::PMIC_TEMP_ALERT) 732 { 733 uint8_t byte0 = (byte012 & 0xff); 734 if (byte0 < pmicTempAlertMsg.size()) 735 { 736 description += ": " + pmicTempAlertMsg[byte0]; 737 } 738 } 739 } 740 else 741 { 742 switch (byte3) 743 { 744 case dimm_status::PMIC_HIGH_TEMP: 745 if (byte012 == 0x01) 746 { 747 description += "has PMIC high temp condition"; 748 } 749 break; 750 case dimm_status::TSx_HIGH_TEMP: 751 switch (byte012) 752 { 753 case 0x01: 754 description += "has TS0"; 755 break; 756 case 0x02: 757 description += "has TS1"; 758 break; 759 case 0x03: 760 description += "has TS0 and TS1"; 761 break; 762 } 763 description += " exceeding their high temperature threshold"; 764 break; 765 case dimm_status::SPD_HUB_HIGH_TEMP: 766 if (byte012 == 0x01) 767 { 768 description += "has SPD/HUB high temp condition"; 769 } 770 break; 771 default: 772 description += "has unsupported status " + 773 std::to_string(byte3); 774 break; 775 } 776 } 777 778 // Log to Redfish event 779 sendJournalRedfish(description, logLevel); 780 } 781 782 void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId, 783 uint32_t presentReading) 784 { 785 log_level logLevel{log_level::WARNING}; 786 std::string description; 787 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 788 uint32_t byte012 = presentReading & 0xffffff; 789 790 description += prefixMsgStrCreation(tid, sensorId); 791 792 description += "DDR "; 793 if (ddrStatusToMsgMap.contains(byte3)) 794 { 795 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR) 796 { 797 logLevel = log_level::OK; 798 } 799 800 description += ddrStatusToMsgMap[byte3]; 801 802 if (byte3 == ddr_status::CONFIGURATION_FAILURE || 803 byte3 == ddr_status::TRAINING_FAILURE) 804 { 805 // List out failed DIMMs 806 description += dimmIdxsToString(byte012); 807 } 808 } 809 else 810 { 811 description += "has unsupported status " + std::to_string(byte3); 812 } 813 814 // Log to Redfish event 815 sendJournalRedfish(description, logLevel); 816 } 817 818 void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId, 819 uint32_t presentReading) 820 { 821 log_level logLevel{log_level::WARNING}; 822 std::string description; 823 std::stringstream strStream; 824 825 description += prefixMsgStrCreation(tid, sensorId); 826 827 VRDStatus_t status{presentReading}; 828 829 if (status.bits.warning && status.bits.critical) 830 { 831 description += "A VR warning and a VR critical"; 832 logLevel = log_level::CRITICAL; 833 } 834 else 835 { 836 if (status.bits.warning) 837 { 838 description += "A VR warning"; 839 } 840 else if (status.bits.critical) 841 { 842 description += "A VR critical"; 843 logLevel = log_level::CRITICAL; 844 } 845 else 846 { 847 description += "No VR warning or critical"; 848 logLevel = log_level::OK; 849 } 850 } 851 description += " condition observed"; 852 853 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex 854 << std::setw(2) 855 << static_cast<uint32_t>(status.bits.vr_status_byte_high) 856 << "; VR status byte low is 0x" << std::setw(2) 857 << static_cast<uint32_t>(status.bits.vr_status_byte_low) 858 << "; Reading is 0x" << std::setw(2) 859 << static_cast<uint32_t>(presentReading) << ";"; 860 861 description += strStream.str(); 862 863 // Log to Redfish event 864 sendJournalRedfish(description, logLevel); 865 } 866 867 void OemEventManager::handleNumericWatchdogEvent( 868 pldm_tid_t tid, uint16_t sensorId, uint32_t presentReading) 869 { 870 std::string description; 871 log_level logLevel = log_level::CRITICAL; 872 873 description += prefixMsgStrCreation(tid, sensorId); 874 875 if (presentReading & 0x01) 876 { 877 description += "Global watchdog expired;"; 878 } 879 if (presentReading & 0x02) 880 { 881 description += "Secure watchdog expired;"; 882 } 883 if (presentReading & 0x04) 884 { 885 description += "Non-secure watchdog expired;"; 886 } 887 888 // Log to Redfish event 889 sendJournalRedfish(description, logLevel); 890 } 891 892 } // namespace oem_ampere 893 } // namespace pldm 894