1 #include "oem_event_manager.hpp" 2 3 #include "libcper/Cper.h" 4 5 #include "cper.hpp" 6 #include "requester/handler.hpp" 7 #include "requester/request.hpp" 8 9 #include <config.h> 10 #include <libpldm/pldm.h> 11 #include <libpldm/utils.h> 12 #include <systemd/sd-journal.h> 13 14 #include <phosphor-logging/lg2.hpp> 15 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 16 17 #include <algorithm> 18 #include <map> 19 #include <set> 20 #include <sstream> 21 #include <string> 22 #include <unordered_map> 23 24 namespace pldm 25 { 26 namespace oem_ampere 27 { 28 namespace boot_stage = boot::stage; 29 namespace ddr_status = ddr::status; 30 namespace dimm_status = dimm::status; 31 namespace dimm_syndrome = dimm::training_failure::dimm_syndrome; 32 namespace phy_syndrome = dimm::training_failure::phy_syndrome; 33 namespace training_failure = dimm::training_failure; 34 35 constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent"; 36 constexpr const char* ampereWarningRegistry = "OpenBMC.0.1.AmpereWarning"; 37 constexpr const char* ampereCriticalRegistry = "OpenBMC.0.1.AmpereCritical"; 38 constexpr const char* BIOSFWPanicRegistry = 39 "OpenBMC.0.1.BIOSFirmwarePanicReason"; 40 constexpr auto maxDIMMIdxBitNum = 24; 41 constexpr auto maxDIMMInstantNum = 24; 42 43 const std::set<uint16_t> rasUESensorIDs = {CORE_UE, MCU_UE, PCIE_UE, SOC_UE}; 44 45 /* 46 An array of possible boot status of a boot stage. 47 The index maps with byte 0 of boot code. 48 */ 49 std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"}; 50 51 /* 52 An array of possible boot status of DDR training stage. 53 The index maps with byte 0 of boot code. 54 */ 55 std::array<std::string, 3> ddrTrainingMsg = { 56 " progress started", " in-progress", " progress completed"}; 57 58 /* 59 A map between PMIC status and logging strings. 60 */ 61 std::array<std::string, 8> pmicTempAlertMsg = { 62 "Below 85°C", "85°C", "95°C", "105°C", 63 "115°C", "125°C", "135°C", "Equal or greater than 140°C"}; 64 65 /* 66 In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC 67 EPs through SMBus and PCIe. When host boots up, SMBUS interface 68 comes up first. In this interface, BMC is bus owner. 69 70 mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available). 71 pldmd will always use TID 1 for S0 and TID 2 for S1 (if available). 72 */ 73 EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}}; 74 75 /* 76 A map between sensor IDs and their names in string. 77 Using pldm::oem::sensor_ids 78 */ 79 EventToMsgMap_t sensorIdToStrMap = { 80 {DDR_STATUS, "DDR_STATUS"}, 81 {PCP_VR_STATE, "PCP_VR_STATE"}, 82 {SOC_VR_STATE, "SOC_VR_STATE"}, 83 {DPHY_VR1_STATE, "DPHY_VR1_STATE"}, 84 {DPHY_VR2_STATE, "DPHY_VR2_STATE"}, 85 {D2D_VR_STATE, "D2D_VR_STATE"}, 86 {IOC_VR1_STATE, "IOC_VR1_STATE"}, 87 {IOC_VR2_STATE, "IOC_VR2_STATE"}, 88 {PCI_D_VR_STATE, "PCI_D_VR_STATE"}, 89 {PCI_A_VR_STATE, "PCI_A_VR_STATE"}, 90 {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"}, 91 {BOOT_OVERALL, "BOOT_OVERALL"}, 92 {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"}, 93 {WATCH_DOG, "WATCH_DOG"}}; 94 95 /* 96 A map between the boot stages and logging strings. 97 Using pldm::oem::boot::stage::boot_stage 98 */ 99 EventToMsgMap_t bootStageToMsgMap = { 100 {boot_stage::SECPRO, "SECpro"}, 101 {boot_stage::MPRO, "Mpro"}, 102 {boot_stage::ATF_BL1, "ATF BL1"}, 103 {boot_stage::ATF_BL2, "ATF BL2"}, 104 {boot_stage::DDR_INITIALIZATION, "DDR initialization"}, 105 {boot_stage::DDR_TRAINING, "DDR training"}, 106 {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"}, 107 {boot_stage::ATF_BL31, "ATF BL31"}, 108 {boot_stage::ATF_BL32, "ATF BL32"}, 109 {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"}, 110 {boot_stage::UEFI_STATUS_CLASS_CODE_MIN, 111 "ATF BL33 (UEFI) booting status = "}}; 112 113 /* 114 A map between DDR status and logging strings. 115 Using pldm::oem::ddr::status::ddr_status 116 */ 117 EventToMsgMap_t ddrStatusToMsgMap = { 118 {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"}, 119 {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"}, 120 {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"}, 121 {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"}, 122 {ddr_status::OTHER_FAILURE, "has other failure"}, 123 {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG, 124 "has boot failure due to no configuration"}, 125 {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS, 126 "failsafe activated but boot success with the next valid configuration"}}; 127 128 /* 129 A map between DIMM status and logging strings. 130 Using pldm::oem::dimm::status::dimm_status 131 */ 132 EventToMsgMap_t dimmStatusToMsgMap = { 133 {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"}, 134 {dimm_status::NOT_INSTALLED, "is not installed"}, 135 {dimm_status::OTHER_FAILURE, "has other failure"}, 136 {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"}, 137 {dimm_status::TRAINING_FAILURE, "has training failure; "}, 138 {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}}; 139 140 /* 141 A map between PHY training failure syndrome and logging strings. 142 Using 143 pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome 144 */ 145 EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = { 146 {phy_syndrome::NA, "(N/A)"}, 147 {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"}, 148 {phy_syndrome::CA_LEVELING, "(CA leveling)"}, 149 {phy_syndrome::PHY_WRITE_LEVEL_FAILURE, 150 "(PHY write level failure - see syndrome 1)"}, 151 {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE, 152 "(PHY read gate leveling failure)"}, 153 {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"}, 154 {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"}, 155 {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}}; 156 157 /* 158 A map between DIMM training failure syndrome and logging strings. 159 Using 160 pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome 161 */ 162 EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = { 163 {dimm_syndrome::NA, "(N/A)"}, 164 {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE, 165 "(DRAM VREFDQ training failure)"}, 166 {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"}, 167 {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE, 168 "(LRDRIMM DB SW training failure)"}}; 169 170 /* 171 A map between DIMM training failure type and a pair of <logging strings - 172 syndrome map>. Using 173 pldm::oem::dimm::training_faillure::dimm_training_failure_type 174 */ 175 std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>> 176 dimmTrainingFailureTypeMap = { 177 {training_failure::PHY_TRAINING_FAILURE_TYPE, 178 std::make_pair("PHY training failure", 179 phyTrainingFailureSyndromeToMsgMap)}, 180 {training_failure::DIMM_TRAINING_FAILURE_TYPE, 181 std::make_pair("DIMM training failure", 182 dimmTrainingFailureSyndromeToMsgMap)}}; 183 184 /* 185 A map between log level and the registry used for Redfish SEL log 186 Using pldm::oem::log_level 187 */ 188 std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = { 189 {log_level::OK, ampereEventRegistry}, 190 {log_level::WARNING, ampereWarningRegistry}, 191 {log_level::CRITICAL, ampereCriticalRegistry}, 192 {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}}; 193 194 std::unordered_map< 195 uint16_t, 196 std::vector<std::pair< 197 std::string, 198 std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>> 199 stateSensorToMsgMap = { 200 {SOC_HEALTH_AVAILABILITY, 201 {{"SoC Health", 202 {{1, {log_level::OK, "Normal"}}, 203 {2, {log_level::WARNING, "Non-Critical"}}, 204 {3, {log_level::CRITICAL, "Critical"}}, 205 {4, {log_level::CRITICAL, "Fatal"}}}}, 206 {"SoC Availability", 207 {{1, {log_level::OK, "Enabled"}}, 208 {2, {log_level::WARNING, "Disabled"}}, 209 {3, {log_level::CRITICAL, "Shutdown"}}}}}}, 210 {WATCH_DOG, 211 {{"Global Watch Dog", 212 {{1, {log_level::OK, "Normal"}}, 213 {2, {log_level::CRITICAL, "Timer Expired"}}}}, 214 {"Secure Watch Dog", 215 {{1, {log_level::OK, "Normal"}}, 216 {2, {log_level::CRITICAL, "Timer Expired"}}}}, 217 {"Non-secure Watch Dog", 218 {{1, {log_level::OK, "Normal"}}, 219 {2, {log_level::CRITICAL, "Timer Expired"}}}}}}}; 220 221 std::string 222 OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId) 223 { 224 std::string description; 225 if (!tidToSocketNameMap.contains(tid)) 226 { 227 description += "TID " + std::to_string(tid) + ": "; 228 } 229 else 230 { 231 description += tidToSocketNameMap[tid] + ": "; 232 } 233 234 if (!sensorIdToStrMap.contains(sensorId)) 235 { 236 description += "Sensor ID " + std::to_string(sensorId) + ": "; 237 } 238 else 239 { 240 description += sensorIdToStrMap[sensorId] + ": "; 241 } 242 243 return description; 244 } 245 246 void OemEventManager::sendJournalRedfish(const std::string& description, 247 log_level& logLevel) 248 { 249 if (description.empty()) 250 { 251 return; 252 } 253 254 if (!logLevelToRedfishMsgIdMap.contains(logLevel)) 255 { 256 lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel, 257 "DES", description); 258 return; 259 } 260 auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel]; 261 lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID", 262 redfishMsgId, "REDFISH_MESSAGE_ARGS", description); 263 } 264 265 std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs) 266 { 267 std::string description; 268 for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum)) 269 { 270 if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx)) 271 { 272 description += " #" + std::to_string(bitIdx); 273 } 274 } 275 return description; 276 } 277 278 uint8_t OemEventManager::sensorIdToDIMMIdx(const uint16_t& sensorId) 279 { 280 uint8_t dimmIdx = maxDIMMInstantNum; 281 int sensorId_Off = sensorId - 4; 282 if ((sensorId_Off >= 0) && ((sensorId_Off % 2) == 0) && 283 ((sensorId_Off / 2) < maxDIMMInstantNum)) 284 { 285 dimmIdx = sensorId_Off / 2; 286 } 287 return dimmIdx; 288 } 289 290 void OemEventManager::handleBootOverallEvent( 291 pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading) 292 { 293 log_level logLevel{log_level::OK}; 294 std::string description; 295 std::stringstream strStream; 296 297 uint8_t byte0 = (presentReading & 0x000000ff); 298 uint8_t byte1 = (presentReading & 0x0000ff00) >> 8; 299 uint8_t byte2 = (presentReading & 0x00ff0000) >> 16; 300 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 301 /* 302 * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31, 303 * ATF BL32 and DDR initialization 304 */ 305 if (bootStageToMsgMap.contains(byte3)) 306 { 307 // Boot stage adding 308 description += bootStageToMsgMap[byte3]; 309 310 switch (byte3) 311 { 312 case boot_stage::DDR_TRAINING: 313 if (byte0 >= ddrTrainingMsg.size()) 314 { 315 logLevel = log_level::BIOSFWPANIC; 316 description += " unknown status"; 317 } 318 else 319 { 320 description += ddrTrainingMsg[byte0]; 321 } 322 if (0x01 == byte0) 323 { 324 // Add complete percentage 325 description += " at " + std::to_string(byte1) + "%"; 326 } 327 break; 328 case boot_stage::S0_DDR_TRAINING_FAILURE: 329 case boot_stage::S1_DDR_TRAINING_FAILURE: 330 // ddr_training_status_msg() 331 logLevel = log_level::BIOSFWPANIC; 332 description += " at DIMMs:"; 333 // dimmIdxs = presentReading & 0x00ffffff; 334 description += dimmIdxsToString(presentReading & 0x00ffffff); 335 description += " of socket "; 336 description += 337 (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1"; 338 break; 339 default: 340 if (byte0 >= bootStatMsg.size()) 341 { 342 logLevel = log_level::BIOSFWPANIC; 343 description += " unknown status"; 344 } 345 else 346 { 347 description += bootStatMsg[byte0]; 348 } 349 break; 350 } 351 352 // Sensor report action is fail 353 if (boot::status::BOOT_STATUS_FAILURE == byte2) 354 { 355 logLevel = log_level::BIOSFWPANIC; 356 } 357 } 358 else 359 { 360 if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX) 361 { 362 description += 363 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN]; 364 365 strStream 366 << "Segment (0x" << std::setfill('0') << std::hex 367 << std::setw(8) << static_cast<uint32_t>(presentReading) 368 << "); Status Class (0x" << std::setw(2) 369 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x" 370 << std::setw(2) << static_cast<uint32_t>(byte2) 371 << "); Operation Code (0x" << std::setw(4) 372 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16) 373 << ")" << std::dec; 374 375 description += strStream.str(); 376 } 377 } 378 379 // Log to Redfish event 380 sendJournalRedfish(description, logLevel); 381 } 382 383 int OemEventManager::processNumericSensorEvent( 384 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 385 size_t sensorDataLength) 386 { 387 uint8_t eventState = 0; 388 uint8_t previousEventState = 0; 389 uint8_t sensorDataSize = 0; 390 uint32_t presentReading; 391 auto rc = decode_numeric_sensor_data( 392 sensorData, sensorDataLength, &eventState, &previousEventState, 393 &sensorDataSize, &presentReading); 394 if (rc) 395 { 396 lg2::error( 397 "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ", 398 "TID", tid, "RC", rc); 399 return rc; 400 } 401 402 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) 403 if (auto dimmIdx = sensorIdToDIMMIdx(sensorId); dimmIdx < maxDIMMInstantNum) 404 { 405 handleDIMMStatusEvent(tid, sensorId, presentReading); 406 return PLDM_SUCCESS; 407 } 408 409 switch (sensorId) 410 { 411 case BOOT_OVERALL: 412 handleBootOverallEvent(tid, sensorId, presentReading); 413 break; 414 case PCIE_HOT_PLUG: 415 handlePCIeHotPlugEvent(tid, sensorId, presentReading); 416 break; 417 case DDR_STATUS: 418 handleDDRStatusEvent(tid, sensorId, presentReading); 419 break; 420 case PCP_VR_STATE: 421 case SOC_VR_STATE: 422 case DPHY_VR1_STATE: 423 case DPHY_VR2_STATE: 424 case D2D_VR_STATE: 425 case IOC_VR1_STATE: 426 case IOC_VR2_STATE: 427 case PCI_D_VR_STATE: 428 case PCI_A_VR_STATE: 429 handleVRDStatusEvent(tid, sensorId, presentReading); 430 break; 431 case WATCH_DOG: 432 handleNumericWatchdogEvent(tid, sensorId, presentReading); 433 break; 434 default: 435 std::string description; 436 std::stringstream strStream; 437 log_level logLevel = log_level::OK; 438 439 description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: "; 440 description += prefixMsgStrCreation(tid, sensorId); 441 strStream << std::setfill('0') << std::hex << "eventState 0x" 442 << std::setw(2) << static_cast<uint32_t>(eventState) 443 << " previousEventState 0x" << std::setw(2) 444 << static_cast<uint32_t>(previousEventState) 445 << " sensorDataSize 0x" << std::setw(2) 446 << static_cast<uint32_t>(sensorDataSize) 447 << " presentReading 0x" << std::setw(8) 448 << static_cast<uint32_t>(presentReading) << std::dec; 449 description += strStream.str(); 450 451 sendJournalRedfish(description, logLevel); 452 break; 453 } 454 return PLDM_SUCCESS; 455 } 456 457 int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId, 458 const uint8_t* sensorData, 459 size_t sensorDataLength) 460 { 461 uint8_t sensorOffset = 0; 462 uint8_t eventState = 0; 463 uint8_t previousEventState = 0; 464 465 auto rc = 466 decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset, 467 &eventState, &previousEventState); 468 if (rc) 469 { 470 lg2::error( 471 "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}", 472 "TID", tid, "RC", rc); 473 return rc; 474 } 475 476 std::string description; 477 log_level logLevel = log_level::OK; 478 479 if (stateSensorToMsgMap.contains(sensorId)) 480 { 481 description += prefixMsgStrCreation(tid, sensorId); 482 auto componentMap = stateSensorToMsgMap[sensorId]; 483 if (sensorOffset < componentMap.size()) 484 { 485 description += std::get<0>(componentMap[sensorOffset]); 486 auto stateMap = std::get<1>(componentMap[sensorOffset]); 487 if (stateMap.contains(eventState)) 488 { 489 logLevel = std::get<0>(stateMap[eventState]); 490 description += " state : " + std::get<1>(stateMap[eventState]); 491 if (stateMap.contains(previousEventState)) 492 { 493 description += "; previous state: " + 494 std::get<1>(stateMap[previousEventState]); 495 } 496 } 497 else 498 { 499 description += " sends unsupported event state: " + 500 std::to_string(eventState); 501 if (stateMap.contains(previousEventState)) 502 { 503 description += "; previous state: " + 504 std::get<1>(stateMap[previousEventState]); 505 } 506 } 507 } 508 else 509 { 510 description += "sends unsupported component sensor offset " + 511 std::to_string(sensorOffset); 512 } 513 } 514 else 515 { 516 std::stringstream strStream; 517 description += "SENSOR_EVENT : STATE_SENSOR_STATE: "; 518 description += prefixMsgStrCreation(tid, sensorId); 519 strStream << std::setfill('0') << std::hex << "sensorOffset 0x" 520 << std::setw(2) << static_cast<uint32_t>(sensorOffset) 521 << "eventState 0x" << std::setw(2) 522 << static_cast<uint32_t>(eventState) 523 << " previousEventState 0x" << std::setw(2) 524 << static_cast<uint32_t>(previousEventState) << std::dec; 525 description += strStream.str(); 526 } 527 528 sendJournalRedfish(description, logLevel); 529 530 return PLDM_SUCCESS; 531 } 532 533 int OemEventManager::processSensorOpStateEvent( 534 pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData, 535 size_t sensorDataLength) 536 { 537 uint8_t present_op_state = 0; 538 uint8_t previous_op_state = 0; 539 540 auto rc = decode_sensor_op_data(sensorData, sensorDataLength, 541 &present_op_state, &previous_op_state); 542 if (rc) 543 { 544 lg2::error( 545 "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}", 546 "TID", tid, "RC", rc); 547 return rc; 548 } 549 550 std::string description; 551 std::stringstream strStream; 552 log_level logLevel = log_level::OK; 553 554 description += "SENSOR_EVENT : SENSOR_OP_STATE: "; 555 description += prefixMsgStrCreation(tid, sensorId); 556 strStream << std::setfill('0') << std::hex << "present_op_state 0x" 557 << std::setw(2) << static_cast<uint32_t>(present_op_state) 558 << "previous_op_state 0x" << std::setw(2) 559 << static_cast<uint32_t>(previous_op_state) << std::dec; 560 description += strStream.str(); 561 562 sendJournalRedfish(description, logLevel); 563 564 return PLDM_SUCCESS; 565 } 566 567 int OemEventManager::handleSensorEvent( 568 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */, 569 pldm_tid_t tid, size_t eventDataOffset) 570 { 571 /* This OEM event handler is only used for SoC terminus*/ 572 if (!tidToSocketNameMap.contains(tid)) 573 { 574 return PLDM_SUCCESS; 575 } 576 auto eventData = 577 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset; 578 auto eventDataSize = payloadLength - eventDataOffset; 579 580 uint16_t sensorId = 0; 581 uint8_t sensorEventClassType = 0; 582 size_t eventClassDataOffset = 0; 583 auto rc = 584 decode_sensor_event_data(eventData, eventDataSize, &sensorId, 585 &sensorEventClassType, &eventClassDataOffset); 586 if (rc) 587 { 588 lg2::error("Failed to decode sensor event data return code {RC}.", "RC", 589 rc); 590 return rc; 591 } 592 const uint8_t* sensorData = eventData + eventClassDataOffset; 593 size_t sensorDataLength = eventDataSize - eventClassDataOffset; 594 595 switch (sensorEventClassType) 596 { 597 case PLDM_NUMERIC_SENSOR_STATE: 598 { 599 return processNumericSensorEvent(tid, sensorId, sensorData, 600 sensorDataLength); 601 } 602 case PLDM_STATE_SENSOR_STATE: 603 { 604 return processStateSensorEvent(tid, sensorId, sensorData, 605 sensorDataLength); 606 } 607 case PLDM_SENSOR_OP_STATE: 608 { 609 return processSensorOpStateEvent(tid, sensorId, sensorData, 610 sensorDataLength); 611 } 612 default: 613 std::string description; 614 std::stringstream strStream; 615 log_level logLevel = log_level::OK; 616 617 description += "SENSOR_EVENT : Unsupported Sensor Class " + 618 std::to_string(sensorEventClassType) + ": "; 619 description += prefixMsgStrCreation(tid, sensorId); 620 strStream << std::setfill('0') << std::hex 621 << std::setw(sizeof(sensorData) * 2) << "Sensor data: "; 622 623 auto dataPtr = sensorData; 624 for ([[maybe_unused]] const auto& i : 625 std::views::iota(0, (int)sensorDataLength)) 626 { 627 strStream << "0x" << static_cast<uint32_t>(*dataPtr); 628 dataPtr += sizeof(sensorData); 629 } 630 631 description += strStream.str(); 632 633 sendJournalRedfish(description, logLevel); 634 } 635 lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE", 636 sensorEventClassType); 637 return PLDM_ERROR; 638 } 639 640 void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId, 641 uint32_t presentReading) 642 { 643 std::string description; 644 std::stringstream strStream; 645 PCIeHotPlugEventRecord_t record{presentReading}; 646 647 std::string sAction = (!record.bits.action) ? "Insertion" : "Removal"; 648 std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed"; 649 log_level logLevel = 650 (!record.bits.opStatus) ? log_level::OK : log_level::WARNING; 651 652 description += prefixMsgStrCreation(tid, sensorId); 653 654 strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2) 655 << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x" 656 << std::setw(2) << static_cast<uint32_t>(record.bits.bus) 657 << "); Device (0x" << std::setw(2) 658 << static_cast<uint32_t>(record.bits.device) << "); Function (0x" 659 << std::setw(2) << static_cast<uint32_t>(record.bits.function) 660 << "); Action (" << sAction << "); Operation status (" 661 << sOpStatus << "); Media slot number (" << std::dec 662 << static_cast<uint32_t>(record.bits.mediaSlot) << ")"; 663 664 description += strStream.str(); 665 666 // Log to Redfish event 667 sendJournalRedfish(description, logLevel); 668 } 669 670 std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo) 671 { 672 std::string description; 673 DIMMTrainingFailure_t failure{failureInfo}; 674 675 if (dimmTrainingFailureTypeMap.contains(failure.bits.type)) 676 { 677 auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type]; 678 679 description += std::get<0>(failureInfoMap); 680 681 description += "; MCU rank index " + 682 std::to_string(failure.bits.mcuRankIdx); 683 684 description += "; Slice number " + 685 std::to_string(failure.bits.sliceNum); 686 687 description += "; Upper nibble error status: "; 688 description += (!failure.bits.upperNibbStatErr) 689 ? "No error" 690 : "Found no rising edge"; 691 692 description += "; Lower nibble error status: "; 693 description += (!failure.bits.lowerNibbStatErr) 694 ? "No error" 695 : "Found no rising edge"; 696 697 description += "; Failure syndrome 0: "; 698 699 auto& syndromeMap = std::get<1>(failureInfoMap); 700 if (syndromeMap.contains(failure.bits.syndrome)) 701 { 702 description += syndromeMap[failure.bits.syndrome]; 703 } 704 else 705 { 706 description += "(Unknown syndrome)"; 707 } 708 } 709 else 710 { 711 description += "Unknown training failure type " + 712 std::to_string(failure.bits.type); 713 } 714 715 return description; 716 } 717 718 void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId, 719 uint32_t presentReading) 720 { 721 log_level logLevel{log_level::WARNING}; 722 std::string description; 723 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 724 uint32_t byte012 = presentReading & 0xffffff; 725 726 description += prefixMsgStrCreation(tid, sensorId); 727 728 // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1) 729 auto dimmIdx = sensorIdToDIMMIdx(sensorId); 730 if (dimmIdx >= maxDIMMIdxBitNum) 731 { 732 return; 733 } 734 735 description += "DIMM " + std::to_string(dimmIdx) + " "; 736 737 if (dimmStatusToMsgMap.contains(byte3)) 738 { 739 if (byte3 == dimm_status::INSTALLED_NO_ERROR || 740 byte3 == dimm_status::INSTALLED_BUT_DISABLED) 741 { 742 logLevel = log_level::OK; 743 } 744 745 description += dimmStatusToMsgMap[byte3]; 746 747 if (byte3 == dimm_status::TRAINING_FAILURE) 748 { 749 description += "; " + dimmTrainingFailureToMsg(byte012); 750 } 751 else if (byte3 == dimm_status::PMIC_TEMP_ALERT) 752 { 753 uint8_t byte0 = (byte012 & 0xff); 754 if (byte0 < pmicTempAlertMsg.size()) 755 { 756 description += ": " + pmicTempAlertMsg[byte0]; 757 } 758 } 759 } 760 else 761 { 762 switch (byte3) 763 { 764 case dimm_status::PMIC_HIGH_TEMP: 765 if (byte012 == 0x01) 766 { 767 description += "has PMIC high temp condition"; 768 } 769 break; 770 case dimm_status::TSx_HIGH_TEMP: 771 switch (byte012) 772 { 773 case 0x01: 774 description += "has TS0"; 775 break; 776 case 0x02: 777 description += "has TS1"; 778 break; 779 case 0x03: 780 description += "has TS0 and TS1"; 781 break; 782 } 783 description += " exceeding their high temperature threshold"; 784 break; 785 case dimm_status::SPD_HUB_HIGH_TEMP: 786 if (byte012 == 0x01) 787 { 788 description += "has SPD/HUB high temp condition"; 789 } 790 break; 791 default: 792 description += "has unsupported status " + 793 std::to_string(byte3); 794 break; 795 } 796 } 797 798 // Log to Redfish event 799 sendJournalRedfish(description, logLevel); 800 } 801 802 void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId, 803 uint32_t presentReading) 804 { 805 log_level logLevel{log_level::WARNING}; 806 std::string description; 807 uint8_t byte3 = (presentReading & 0xff000000) >> 24; 808 uint32_t byte012 = presentReading & 0xffffff; 809 810 description += prefixMsgStrCreation(tid, sensorId); 811 812 description += "DDR "; 813 if (ddrStatusToMsgMap.contains(byte3)) 814 { 815 if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR) 816 { 817 logLevel = log_level::OK; 818 } 819 820 description += ddrStatusToMsgMap[byte3]; 821 822 if (byte3 == ddr_status::CONFIGURATION_FAILURE || 823 byte3 == ddr_status::TRAINING_FAILURE) 824 { 825 // List out failed DIMMs 826 description += dimmIdxsToString(byte012); 827 } 828 } 829 else 830 { 831 description += "has unsupported status " + std::to_string(byte3); 832 } 833 834 // Log to Redfish event 835 sendJournalRedfish(description, logLevel); 836 } 837 838 void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId, 839 uint32_t presentReading) 840 { 841 log_level logLevel{log_level::WARNING}; 842 std::string description; 843 std::stringstream strStream; 844 845 description += prefixMsgStrCreation(tid, sensorId); 846 847 VRDStatus_t status{presentReading}; 848 849 if (status.bits.warning && status.bits.critical) 850 { 851 description += "A VR warning and a VR critical"; 852 logLevel = log_level::CRITICAL; 853 } 854 else 855 { 856 if (status.bits.warning) 857 { 858 description += "A VR warning"; 859 } 860 else if (status.bits.critical) 861 { 862 description += "A VR critical"; 863 logLevel = log_level::CRITICAL; 864 } 865 else 866 { 867 description += "No VR warning or critical"; 868 logLevel = log_level::OK; 869 } 870 } 871 description += " condition observed"; 872 873 strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex 874 << std::setw(2) 875 << static_cast<uint32_t>(status.bits.vr_status_byte_high) 876 << "; VR status byte low is 0x" << std::setw(2) 877 << static_cast<uint32_t>(status.bits.vr_status_byte_low) 878 << "; Reading is 0x" << std::setw(2) 879 << static_cast<uint32_t>(presentReading) << ";"; 880 881 description += strStream.str(); 882 883 // Log to Redfish event 884 sendJournalRedfish(description, logLevel); 885 } 886 887 void OemEventManager::handleNumericWatchdogEvent( 888 pldm_tid_t tid, uint16_t sensorId, uint32_t presentReading) 889 { 890 std::string description; 891 log_level logLevel = log_level::CRITICAL; 892 893 description += prefixMsgStrCreation(tid, sensorId); 894 895 if (presentReading & 0x01) 896 { 897 description += "Global watchdog expired;"; 898 } 899 if (presentReading & 0x02) 900 { 901 description += "Secure watchdog expired;"; 902 } 903 if (presentReading & 0x04) 904 { 905 description += "Non-secure watchdog expired;"; 906 } 907 908 // Log to Redfish event 909 sendJournalRedfish(description, logLevel); 910 } 911 912 int OemEventManager::processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId, 913 const uint8_t* eventData, 914 size_t eventDataSize) 915 { 916 EFI_AMPERE_ERROR_DATA ampHdr; 917 918 decodeCperRecord(eventData, eventDataSize, &Hdr); 919 920 addCperSELLog(tid, eventId, &Hdr); 921 922 /* isBert at bit 12 of TypeId */ 923 if (ampHdr.TypeId & 0x0800) 924 { 925 lg2::info("Ampere SoC BERT is triggered."); 926 std::variant<std::string> value( 927 "com.ampere.CrashCapture.Trigger.TriggerAction.Bert"); 928 try 929 { 930 auto& bus = pldm::utils::DBusHandler::getBus(); 931 auto method = 932 bus.new_method_call("com.ampere.CrashCapture.Trigger", 933 "/com/ampere/crashcapture/trigger", 934 pldm::utils::dbusProperties, "Set"); 935 method.append("com.ampere.CrashCapture.Trigger", "TriggerActions", 936 value); 937 bus.call_noreply(method); 938 } 939 catch (const std::exception& e) 940 { 941 lg2::error("call BERT trigger error - {ERROR}", "ERROR", e); 942 } 943 } 944 945 return PLDM_SUCCESS; 946 } 947 948 int OemEventManager::handlepldmMessagePollEvent( 949 const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */, 950 pldm_tid_t tid, size_t eventDataOffset) 951 { 952 /* This OEM event handler is only used for SoC terminus*/ 953 if (!tidToSocketNameMap.contains(tid)) 954 { 955 return PLDM_SUCCESS; 956 } 957 958 auto eventData = 959 reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset; 960 auto eventDataSize = payloadLength - eventDataOffset; 961 962 pldm_message_poll_event poll_event{}; 963 auto rc = decode_pldm_message_poll_event_data(eventData, eventDataSize, 964 &poll_event); 965 if (rc) 966 { 967 lg2::error("Failed to decode PldmMessagePollEvent event, error {RC} ", 968 "RC", rc); 969 return rc; 970 } 971 972 auto sensorID = poll_event.event_id; 973 /* The UE errors */ 974 if (rasUESensorIDs.contains(sensorID)) 975 { 976 pldm::utils::DBusMapping dbusMapping{ 977 "/xyz/openbmc_project/led/groups/ras_ue_fault", 978 "xyz.openbmc_project.Led.Group", "Asserted", "bool"}; 979 try 980 { 981 pldm::utils::DBusHandler().setDbusProperty( 982 dbusMapping, pldm::utils::PropertyValue{bool(true)}); 983 } 984 catch (const std::exception& e) 985 { 986 lg2::error( 987 "Failed to set the RAS UE LED terminus ID {TID} sensor ID {SENSORID} - errors {ERROR}", 988 "TID", tid, "SENSORID", sensorID, "ERROR", e); 989 } 990 } 991 992 return PLDM_SUCCESS; 993 } 994 995 } // namespace oem_ampere 996 } // namespace pldm 997