1 #include "oem_event_manager.hpp" 2 3 #include "requester/handler.hpp" 4 #include "requester/request.hpp" 5 6 #include <config.h> 7 #include <libpldm/pldm.h> 8 #include <libpldm/utils.h> 9 #include <systemd/sd-journal.h> 10 11 #include <phosphor-logging/lg2.hpp> 12 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 13 14 #include <algorithm> 15 #include <map> 16 #include <sstream> 17 #include <string> 18 #include <unordered_map> 19 20 namespace pldm 21 { 22 namespace oem_ampere 23 { 24 namespace boot_stage = boot::stage; 25 namespace ddr_status = ddr::status; 26 namespace dimm_status = dimm::status; 27 namespace dimm_syndrome = dimm::training_failure::dimm_syndrome; 28 namespace phy_syndrome = dimm::training_failure::phy_syndrome; 29 namespace training_failure = dimm::training_failure; 30 31 constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK"; 32 constexpr const char* ampereWarningRegistry = 33 "OpenBMC.0.1.AmpereWarning.Warning"; 34 constexpr const char* ampereCriticalRegistry = 35 "OpenBMC.0.1.AmpereCritical.Critical"; 36 constexpr const char* BIOSFWPanicRegistry = 37 "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning"; 38 constexpr auto maxDIMMIdxBitNum = 24; 39 constexpr auto maxDIMMInstantNum = 24; 40 41 /* 42 An array of possible boot status of a boot stage. 43 The index maps with byte 0 of boot code. 44 */ 45 std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"}; 46 47 /* 48 An array of possible boot status of DDR training stage. 49 The index maps with byte 0 of boot code. 50 */ 51 std::array<std::string, 3> ddrTrainingMsg = { 52 " progress started", " in-progress", " progress completed"}; 53 54 /* 55 A map between PMIC status and logging strings. 56 */ 57 std::array<std::string, 8> pmicTempAlertMsg = { 58 "Below 85°C", "85°C", "95°C", "105°C", 59 "115°C", "125°C", "135°C", "Equal or greater than 140°C"}; 60 61 /* 62 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC 63 EPs through SMBus and PCIe. When host boots up, SMBUS interface 64 comes up first. In this interface, BMC is bus owner. 65 66 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available). 67 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available). 68 */ 69 EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}}; 70 71 /* 72 A map between sensor IDs and their names in string. 73 Using pldm::oem::sensor_ids 74 */ 75 EventToMsgMap_t sensorIdToStrMap = { 76 {DDR_STATUS, "DDR_STATUS"}, 77 {PCP_VR_STATE, "PCP_VR_STATE"}, 78 {SOC_VR_STATE, "SOC_VR_STATE"}, 79 {DPHY_VR1_STATE, "DPHY_VR1_STATE"}, 80 {DPHY_VR2_STATE, "DPHY_VR2_STATE"}, 81 {D2D_VR_STATE, "D2D_VR_STATE"}, 82 {IOC_VR1_STATE, "IOC_VR1_STATE"}, 83 {IOC_VR2_STATE, "IOC_VR2_STATE"}, 84 {PCI_D_VR_STATE, "PCI_D_VR_STATE"}, 85 {PCI_A_VR_STATE, "PCI_A_VR_STATE"}, 86 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"}, 87 {BOOT_OVERALL, "BOOT_OVERALL"}, 88 {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"}, 89 {WATCH_DOG, "WATCH_DOG"}}; 90 91 /* 92 A map between the boot stages and logging strings. 93 Using pldm::oem::boot::stage::boot_stage 94 */ 95 EventToMsgMap_t bootStageToMsgMap = { 96 {boot_stage::SECPRO, "SECpro"}, 97 {boot_stage::MPRO, "Mpro"}, 98 {boot_stage::ATF_BL1, "ATF BL1"}, 99 {boot_stage::ATF_BL2, "ATF BL2"}, 100 {boot_stage::DDR_INITIALIZATION, "DDR initialization"}, 101 {boot_stage::DDR_TRAINING, "DDR training"}, 102 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"}, 103 {boot_stage::ATF_BL31, "ATF BL31"}, 104 {boot_stage::ATF_BL32, "ATF BL32"}, 105 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"}, 106 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN, 107 "ATF BL33 (UEFI) booting status = "}}; 108 109 /* 110 A map between DDR status and logging strings. 111 Using pldm::oem::ddr::status::ddr_status 112 */ 113 EventToMsgMap_t ddrStatusToMsgMap = { 114 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"}, 115 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"}, 116 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"}, 117 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"}, 118 {ddr_status::OTHER_FAILURE, "has other failure"}, 119 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG, 120 "has boot failure due to no configuration"}, 121 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS, 122 "failsafe activated but boot success with the next valid configuration"}}; 123 124 /* 125 A map between DIMM status and logging strings. 126 Using pldm::oem::dimm::status::dimm_status 127 */ 128 EventToMsgMap_t dimmStatusToMsgMap = { 129 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"}, 130 {dimm_status::NOT_INSTALLED, "is not installed"}, 131 {dimm_status::OTHER_FAILURE, "has other failure"}, 132 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"}, 133 {dimm_status::TRAINING_FAILURE, "has training failure; "}, 134 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}}; 135 136 /* 137 A map between PHY training failure syndrome and logging strings. 138 Using 139 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome 140 */ 141 EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = { 142 {phy_syndrome::NA, "(N/A)"}, 143 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"}, 144 {phy_syndrome::CA_LEVELING, "(CA leveling)"}, 145 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE, 146 "(PHY write level failure - see syndrome 1)"}, 147 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE, 148 "(PHY read gate leveling failure)"}, 149 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"}, 150 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"}, 151 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}}; 152 153 /* 154 A map between DIMM training failure syndrome and logging strings. 155 Using 156 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome 157 */ 158 EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = { 159 {dimm_syndrome::NA, "(N/A)"}, 160 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE, 161 "(DRAM VREFDQ training failure)"}, 162 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"}, 163 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE, 164 "(LRDRIMM DB SW training failure)"}}; 165 166 /* 167 A map between DIMM training failure type and a pair of <logging strings - 168 syndrome map>. Using 169 pldm::oem::dimm::training_faillure::dimm_training_failure_type 170 */ 171 std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>> 172 dimmTrainingFailureTypeMap = { 173 {training_failure::PHY_TRAINING_FAILURE_TYPE, 174 std::make_pair("PHY training failure", 175 phyTrainingFailureSyndromeToMsgMap)}, 176 {training_failure::DIMM_TRAINING_FAILURE_TYPE, 177 std::make_pair("DIMM training failure", 178 dimmTrainingFailureSyndromeToMsgMap)}}; 179 180 /* 181 A map between log level and the registry used for Redfish SEL log 182 Using pldm::oem::log_level 183 */ 184 std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = { 185 {log_level::OK, ampereEventRegistry}, 186 {log_level::WARNING, ampereWarningRegistry}, 187 {log_level::CRITICAL, ampereCriticalRegistry}, 188 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}}; 189 190 std::unordered_map< 191 uint16_t, 192 std::vector<std::pair< 193 std::string, 194 std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>> 195 stateSensorToMsgMap = { 196 {SOC_HEALTH_AVAILABILITY, 197 {{"SoC Health", 198 {{1, {log_level::OK, "Normal"}}, 199 {2, {log_level::WARNING, "Non-Critical"}}, 200 {3, {log_level::CRITICAL, "Critical"}}, 201 {4, {log_level::CRITICAL, "Fatal"}}}}, 202 {"SoC Availability", 203 {{1, {log_level::OK, "Enabled"}}, 204 {2, {log_level::WARNING, "Disabled"}}, 205 {3, {log_level::CRITICAL, "Shutdown"}}}}}}, 206 {WATCH_DOG, 207 {{"Global Watch Dog", 208 {{1, {log_level::OK, "Normal"}}, 209 {2, {log_level::CRITICAL, "Timer Expired"}}}}, 210 {"Secure Watch Dog", 211 {{1, {log_level::OK, "Normal"}}, 212 {2, {log_level::CRITICAL, "Timer Expired"}}}}, 213 {"Non-secure Watch Dog", 214 {{1, {log_level::OK, "Normal"}}, 215 {2, {log_level::CRITICAL, "Timer Expired"}}}}}}}; 216 217 std::string 218 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId) 219 { 220 std::string description; 221 if (!tidToSocketNameMap.contains(tid)) 222 { 223 description += "TID " + std::to_string(tid) + ": "; 224 } 225 else 226 { 227 description += tidToSocketNameMap[tid] + ": "; 228 } 229 230 if (!sensorIdToStrMap.contains(sensorId)) 231 { 232 description += "Sensor ID " + std::to_string(sensorId) + ": "; 233 } 234 else 235 { 236 description += sensorIdToStrMap[sensorId] + ": "; 237 } 238 239 return description; 240 } 241 242 void OemEventManager::sendJournalRedfish(const std::string& description, 243 log_level& logLevel) 244 { 245 if (description.empty()) 246 { 247 return; 248 } 249 250 if (!logLevelToRedfishMsgIdMap.contains(logLevel)) 251 { 252 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel, 253 "DES", description); 254 return; 255 } 256 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel]; 257 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID", 258 redfishMsgId, "REDFISH_MESSAGE_ARGS", description); 259 } 260 261 std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs) 262 { 263 std::string description; 264 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum)) 265 { 266 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx)) 267 { 268 description += " #" + std::to_string(bitIdx); 269 } 270 } 271 return description; 272 } 273 274 uint8_t OemEventManager::sensorIdToDIMMIdx(const uint16_t& sensorId) 275 { 276 uint8_t dimmIdx = maxDIMMInstantNum; 277 int sensorId_Off = sensorId - 4; 278 if ((sensorId_Off >= 0) && ((sensorId_Off % 2) == 0) && 279 ((sensorId_Off / 2) < maxDIMMInstantNum)) 280 { 281 dimmIdx = sensorId_Off / 2; 282 } 283 return dimmIdx; 284 } 285 286 void OemEventManager::handleBootOverallEvent( 287 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading) 288 { 289 log_level logLevel{log_level::OK}; 290 std::string description; 291 std::stringstream strStream; 292 293 uint8_t byte0 = (presentReading & 0x000000ff); 294 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8; 295 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16; 296 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 297 /* 298 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31, 299 * ATF BL32 and DDR initialization 300 */ 301 if (bootStageToMsgMap.contains(byte3)) 302 { 303 // Boot stage adding 304 description += bootStageToMsgMap[byte3]; 305 306 switch (byte3) 307 { 308 case boot_stage::DDR_TRAINING: 309 if (byte0 >= ddrTrainingMsg.size()) 310 { 311 logLevel = log_level::BIOSFWPANIC; 312 description += " unknown status"; 313 } 314 else 315 { 316 description += ddrTrainingMsg[byte0]; 317 } 318 if (0x01 == byte0) 319 { 320 // Add complete percentage 321 description += " at " + std::to_string(byte1) + "%"; 322 } 323 break; 324 case boot_stage::S0_DDR_TRAINING_FAILURE: 325 case boot_stage::S1_DDR_TRAINING_FAILURE: 326 // ddr_training_status_msg() 327 logLevel = log_level::BIOSFWPANIC; 328 description += " at DIMMs:"; 329 // dimmIdxs = presentReading & 0x00ffffff; 330 description += dimmIdxsToString(presentReading & 0x00ffffff); 331 description += " of socket "; 332 description += 333 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1"; 334 break; 335 default: 336 if (byte0 >= bootStatMsg.size()) 337 { 338 logLevel = log_level::BIOSFWPANIC; 339 description += " unknown status"; 340 } 341 else 342 { 343 description += bootStatMsg[byte0]; 344 } 345 break; 346 } 347 348 // Sensor report action is fail 349 if (boot::status::BOOT_STATUS_FAILURE == byte2) 350 { 351 logLevel = log_level::BIOSFWPANIC; 352 } 353 } 354 else 355 { 356 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX) 357 { 358 description += 359 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN]; 360 361 strStream 362 << "Segment (0x" << std::setfill('0') << std::hex 363 << std::setw(8) << static_cast<uint32_t>(presentReading) 364 << "); Status Class (0x" << std::setw(2) 365 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x" 366 << std::setw(2) << static_cast<uint32_t>(byte2) 367 << "); Operation Code (0x" << std::setw(4) 368 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16) 369 << ")" << std::dec; 370 371 description += strStream.str(); 372 } 373 } 374 375 // Log to Redfish event 376 sendJournalRedfish(description, logLevel); 377 } 378 379 int OemEventManager::processNumericSensorEvent( 380 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 381 size_t sensorDataLength) 382 { 383 uint8_t eventState = 0; 384 uint8_t previousEventState = 0; 385 uint8_t sensorDataSize = 0; 386 uint32_t presentReading; 387 auto rc = decode_numeric_sensor_data( 388 sensorData, sensorDataLength, &eventState, &previousEventState, 389 &sensorDataSize, &presentReading); 390 if (rc) 391 { 392 lg2::error( 393 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ", 394 "TID", tid, "RC", rc); 395 return rc; 396 } 397 398 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) 399 if (auto dimmIdx = sensorIdToDIMMIdx(sensorId); dimmIdx < maxDIMMInstantNum) 400 { 401 handleDIMMStatusEvent(tid, sensorId, presentReading); 402 return PLDM_SUCCESS; 403 } 404 405 switch (sensorId) 406 { 407 case BOOT_OVERALL: 408 handleBootOverallEvent(tid, sensorId, presentReading); 409 break; 410 case PCIE_HOT_PLUG: 411 handlePCIeHotPlugEvent(tid, sensorId, presentReading); 412 break; 413 case DDR_STATUS: 414 handleDDRStatusEvent(tid, sensorId, presentReading); 415 break; 416 case PCP_VR_STATE: 417 case SOC_VR_STATE: 418 case DPHY_VR1_STATE: 419 case DPHY_VR2_STATE: 420 case D2D_VR_STATE: 421 case IOC_VR1_STATE: 422 case IOC_VR2_STATE: 423 case PCI_D_VR_STATE: 424 case PCI_A_VR_STATE: 425 handleVRDStatusEvent(tid, sensorId, presentReading); 426 break; 427 case WATCH_DOG: 428 handleNumericWatchdogEvent(tid, sensorId, presentReading); 429 break; 430 default: 431 std::string description; 432 std::stringstream strStream; 433 log_level logLevel = log_level::OK; 434 435 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: "; 436 description += prefixMsgStrCreation(tid, sensorId); 437 strStream << std::setfill('0') << std::hex << "eventState 0x" 438 << std::setw(2) << static_cast<uint32_t>(eventState) 439 << " previousEventState 0x" << std::setw(2) 440 << static_cast<uint32_t>(previousEventState) 441 << " sensorDataSize 0x" << std::setw(2) 442 << static_cast<uint32_t>(sensorDataSize) 443 << " presentReading 0x" << std::setw(8) 444 << static_cast<uint32_t>(presentReading) << std::dec; 445 description += strStream.str(); 446 447 sendJournalRedfish(description, logLevel); 448 break; 449 } 450 return PLDM_SUCCESS; 451 } 452 453 int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId, 454 const uint8_t* sensorData, 455 size_t sensorDataLength) 456 { 457 uint8_t sensorOffset = 0; 458 uint8_t eventState = 0; 459 uint8_t previousEventState = 0; 460 461 auto rc = 462 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset, 463 &eventState, &previousEventState); 464 if (rc) 465 { 466 lg2::error( 467 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}", 468 "TID", tid, "RC", rc); 469 return rc; 470 } 471 472 std::string description; 473 log_level logLevel = log_level::OK; 474 475 if (stateSensorToMsgMap.contains(sensorId)) 476 { 477 description += prefixMsgStrCreation(tid, sensorId); 478 auto componentMap = stateSensorToMsgMap[sensorId]; 479 if (sensorOffset < componentMap.size()) 480 { 481 description += std::get<0>(componentMap[sensorOffset]); 482 auto stateMap = std::get<1>(componentMap[sensorOffset]); 483 if (stateMap.contains(eventState)) 484 { 485 logLevel = std::get<0>(stateMap[eventState]); 486 description += " state : " + std::get<1>(stateMap[eventState]); 487 if (stateMap.contains(previousEventState)) 488 { 489 description += "; previous state: " + 490 std::get<1>(stateMap[previousEventState]); 491 } 492 } 493 else 494 { 495 description += " sends unsupported event state: " + 496 std::to_string(eventState); 497 if (stateMap.contains(previousEventState)) 498 { 499 description += "; previous state: " + 500 std::get<1>(stateMap[previousEventState]); 501 } 502 } 503 } 504 else 505 { 506 description += "sends unsupported component sensor offset " + 507 std::to_string(sensorOffset); 508 } 509 } 510 else 511 { 512 std::stringstream strStream; 513 description += "SENSOR_EVENT : STATE_SENSOR_STATE: "; 514 description += prefixMsgStrCreation(tid, sensorId); 515 strStream << std::setfill('0') << std::hex << "sensorOffset 0x" 516 << std::setw(2) << static_cast<uint32_t>(sensorOffset) 517 << "eventState 0x" << std::setw(2) 518 << static_cast<uint32_t>(eventState) 519 << " previousEventState 0x" << std::setw(2) 520 << static_cast<uint32_t>(previousEventState) << std::dec; 521 description += strStream.str(); 522 } 523 524 sendJournalRedfish(description, logLevel); 525 526 return PLDM_SUCCESS; 527 } 528 529 int OemEventManager::processSensorOpStateEvent( 530 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 531 size_t sensorDataLength) 532 { 533 uint8_t present_op_state = 0; 534 uint8_t previous_op_state = 0; 535 536 auto rc = decode_sensor_op_data(sensorData, sensorDataLength, 537 &present_op_state, &previous_op_state); 538 if (rc) 539 { 540 lg2::error( 541 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}", 542 "TID", tid, "RC", rc); 543 return rc; 544 } 545 546 std::string description; 547 std::stringstream strStream; 548 log_level logLevel = log_level::OK; 549 550 description += "SENSOR_EVENT : SENSOR_OP_STATE: "; 551 description += prefixMsgStrCreation(tid, sensorId); 552 strStream << std::setfill('0') << std::hex << "present_op_state 0x" 553 << std::setw(2) << static_cast<uint32_t>(present_op_state) 554 << "previous_op_state 0x" << std::setw(2) 555 << static_cast<uint32_t>(previous_op_state) << std::dec; 556 description += strStream.str(); 557 558 sendJournalRedfish(description, logLevel); 559 560 return PLDM_SUCCESS; 561 } 562 563 int OemEventManager::handleSensorEvent( 564 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */, 565 pldm_tid_t tid, size_t eventDataOffset) 566 { 567 /* This OEM event handler is only used for SoC terminus*/ 568 if (!tidToSocketNameMap.contains(tid)) 569 { 570 return PLDM_SUCCESS; 571 } 572 auto eventData = 573 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset; 574 auto eventDataSize = payloadLength - eventDataOffset; 575 576 uint16_t sensorId = 0; 577 uint8_t sensorEventClassType = 0; 578 size_t eventClassDataOffset = 0; 579 auto rc = 580 decode_sensor_event_data(eventData, eventDataSize, &sensorId, 581 &sensorEventClassType, &eventClassDataOffset); 582 if (rc) 583 { 584 lg2::error("Failed to decode sensor event data return code {RC}.", "RC", 585 rc); 586 return rc; 587 } 588 const uint8_t* sensorData = eventData + eventClassDataOffset; 589 size_t sensorDataLength = eventDataSize - eventClassDataOffset; 590 591 switch (sensorEventClassType) 592 { 593 case PLDM_NUMERIC_SENSOR_STATE: 594 { 595 return processNumericSensorEvent(tid, sensorId, sensorData, 596 sensorDataLength); 597 } 598 case PLDM_STATE_SENSOR_STATE: 599 { 600 return processStateSensorEvent(tid, sensorId, sensorData, 601 sensorDataLength); 602 } 603 case PLDM_SENSOR_OP_STATE: 604 { 605 return processSensorOpStateEvent(tid, sensorId, sensorData, 606 sensorDataLength); 607 } 608 default: 609 std::string description; 610 std::stringstream strStream; 611 log_level logLevel = log_level::OK; 612 613 description += "SENSOR_EVENT : Unsupported Sensor Class " + 614 std::to_string(sensorEventClassType) + ": "; 615 description += prefixMsgStrCreation(tid, sensorId); 616 strStream << std::setfill('0') << std::hex 617 << std::setw(sizeof(sensorData) * 2) << "Sensor data: "; 618 619 auto dataPtr = sensorData; 620 for ([[maybe_unused]] const auto& i : 621 std::views::iota(0, (int)sensorDataLength)) 622 { 623 strStream << "0x" << static_cast<uint32_t>(*dataPtr); 624 dataPtr += sizeof(sensorData); 625 } 626 627 description += strStream.str(); 628 629 sendJournalRedfish(description, logLevel); 630 } 631 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE", 632 sensorEventClassType); 633 return PLDM_ERROR; 634 } 635 636 void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId, 637 uint32_t presentReading) 638 { 639 std::string description; 640 std::stringstream strStream; 641 PCIeHotPlugEventRecord_t record{presentReading}; 642 643 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal"; 644 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed"; 645 log_level logLevel = 646 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING; 647 648 description += prefixMsgStrCreation(tid, sensorId); 649 650 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2) 651 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x" 652 << std::setw(2) << static_cast<uint32_t>(record.bits.bus) 653 << "); Device (0x" << std::setw(2) 654 << static_cast<uint32_t>(record.bits.device) << "); Function (0x" 655 << std::setw(2) << static_cast<uint32_t>(record.bits.function) 656 << "); Action (" << sAction << "); Operation status (" 657 << sOpStatus << "); Media slot number (" << std::dec 658 << static_cast<uint32_t>(record.bits.mediaSlot) << ")"; 659 660 description += strStream.str(); 661 662 // Log to Redfish event 663 sendJournalRedfish(description, logLevel); 664 } 665 666 std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo) 667 { 668 std::string description; 669 DIMMTrainingFailure_t failure{failureInfo}; 670 671 if (dimmTrainingFailureTypeMap.contains(failure.bits.type)) 672 { 673 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type]; 674 675 description += std::get<0>(failureInfoMap); 676 677 description += "; MCU rank index " + 678 std::to_string(failure.bits.mcuRankIdx); 679 680 description += "; Slice number " + 681 std::to_string(failure.bits.sliceNum); 682 683 description += "; Upper nibble error status: "; 684 description += (!failure.bits.upperNibbStatErr) 685 ? "No error" 686 : "Found no rising edge"; 687 688 description += "; Lower nibble error status: "; 689 description += (!failure.bits.lowerNibbStatErr) 690 ? "No error" 691 : "Found no rising edge"; 692 693 description += "; Failure syndrome 0: "; 694 695 auto& syndromeMap = std::get<1>(failureInfoMap); 696 if (syndromeMap.contains(failure.bits.syndrome)) 697 { 698 description += syndromeMap[failure.bits.syndrome]; 699 } 700 else 701 { 702 description += "(Unknown syndrome)"; 703 } 704 } 705 else 706 { 707 description += "Unknown training failure type " + 708 std::to_string(failure.bits.type); 709 } 710 711 return description; 712 } 713 714 void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId, 715 uint32_t presentReading) 716 { 717 log_level logLevel{log_level::WARNING}; 718 std::string description; 719 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 720 uint32_t byte012 = presentReading & 0xffffff; 721 722 description += prefixMsgStrCreation(tid, sensorId); 723 724 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) 725 auto dimmIdx = sensorIdToDIMMIdx(sensorId); 726 if (dimmIdx >= maxDIMMIdxBitNum) 727 { 728 return; 729 } 730 731 description += "DIMM " + std::to_string(dimmIdx) + " "; 732 733 if (dimmStatusToMsgMap.contains(byte3)) 734 { 735 if (byte3 == dimm_status::INSTALLED_NO_ERROR || 736 byte3 == dimm_status::INSTALLED_BUT_DISABLED) 737 { 738 logLevel = log_level::OK; 739 } 740 741 description += dimmStatusToMsgMap[byte3]; 742 743 if (byte3 == dimm_status::TRAINING_FAILURE) 744 { 745 description += "; " + dimmTrainingFailureToMsg(byte012); 746 } 747 else if (byte3 == dimm_status::PMIC_TEMP_ALERT) 748 { 749 uint8_t byte0 = (byte012 & 0xff); 750 if (byte0 < pmicTempAlertMsg.size()) 751 { 752 description += ": " + pmicTempAlertMsg[byte0]; 753 } 754 } 755 } 756 else 757 { 758 switch (byte3) 759 { 760 case dimm_status::PMIC_HIGH_TEMP: 761 if (byte012 == 0x01) 762 { 763 description += "has PMIC high temp condition"; 764 } 765 break; 766 case dimm_status::TSx_HIGH_TEMP: 767 switch (byte012) 768 { 769 case 0x01: 770 description += "has TS0"; 771 break; 772 case 0x02: 773 description += "has TS1"; 774 break; 775 case 0x03: 776 description += "has TS0 and TS1"; 777 break; 778 } 779 description += " exceeding their high temperature threshold"; 780 break; 781 case dimm_status::SPD_HUB_HIGH_TEMP: 782 if (byte012 == 0x01) 783 { 784 description += "has SPD/HUB high temp condition"; 785 } 786 break; 787 default: 788 description += "has unsupported status " + 789 std::to_string(byte3); 790 break; 791 } 792 } 793 794 // Log to Redfish event 795 sendJournalRedfish(description, logLevel); 796 } 797 798 void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId, 799 uint32_t presentReading) 800 { 801 log_level logLevel{log_level::WARNING}; 802 std::string description; 803 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 804 uint32_t byte012 = presentReading & 0xffffff; 805 806 description += prefixMsgStrCreation(tid, sensorId); 807 808 description += "DDR "; 809 if (ddrStatusToMsgMap.contains(byte3)) 810 { 811 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR) 812 { 813 logLevel = log_level::OK; 814 } 815 816 description += ddrStatusToMsgMap[byte3]; 817 818 if (byte3 == ddr_status::CONFIGURATION_FAILURE || 819 byte3 == ddr_status::TRAINING_FAILURE) 820 { 821 // List out failed DIMMs 822 description += dimmIdxsToString(byte012); 823 } 824 } 825 else 826 { 827 description += "has unsupported status " + std::to_string(byte3); 828 } 829 830 // Log to Redfish event 831 sendJournalRedfish(description, logLevel); 832 } 833 834 void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId, 835 uint32_t presentReading) 836 { 837 log_level logLevel{log_level::WARNING}; 838 std::string description; 839 std::stringstream strStream; 840 841 description += prefixMsgStrCreation(tid, sensorId); 842 843 VRDStatus_t status{presentReading}; 844 845 if (status.bits.warning && status.bits.critical) 846 { 847 description += "A VR warning and a VR critical"; 848 logLevel = log_level::CRITICAL; 849 } 850 else 851 { 852 if (status.bits.warning) 853 { 854 description += "A VR warning"; 855 } 856 else if (status.bits.critical) 857 { 858 description += "A VR critical"; 859 logLevel = log_level::CRITICAL; 860 } 861 else 862 { 863 description += "No VR warning or critical"; 864 logLevel = log_level::OK; 865 } 866 } 867 description += " condition observed"; 868 869 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex 870 << std::setw(2) 871 << static_cast<uint32_t>(status.bits.vr_status_byte_high) 872 << "; VR status byte low is 0x" << std::setw(2) 873 << static_cast<uint32_t>(status.bits.vr_status_byte_low) 874 << "; Reading is 0x" << std::setw(2) 875 << static_cast<uint32_t>(presentReading) << ";"; 876 877 description += strStream.str(); 878 879 // Log to Redfish event 880 sendJournalRedfish(description, logLevel); 881 } 882 883 void OemEventManager::handleNumericWatchdogEvent( 884 pldm_tid_t tid, uint16_t sensorId, uint32_t presentReading) 885 { 886 std::string description; 887 log_level logLevel = log_level::CRITICAL; 888 889 description += prefixMsgStrCreation(tid, sensorId); 890 891 if (presentReading & 0x01) 892 { 893 description += "Global watchdog expired;"; 894 } 895 if (presentReading & 0x02) 896 { 897 description += "Secure watchdog expired;"; 898 } 899 if (presentReading & 0x04) 900 { 901 description += "Non-secure watchdog expired;"; 902 } 903 904 // Log to Redfish event 905 sendJournalRedfish(description, logLevel); 906 } 907 908 } // namespace oem_ampere 909 } // namespace pldm 910