1 #include "oem_event_manager.hpp"
2 
3 #include "libcper/Cper.h"
4 
5 #include "cper.hpp"
6 #include "requester/handler.hpp"
7 #include "requester/request.hpp"
8 
9 #include <config.h>
10 #include <libpldm/pldm.h>
11 #include <libpldm/utils.h>
12 #include <systemd/sd-journal.h>
13 
14 #include <phosphor-logging/lg2.hpp>
15 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
16 
17 #include <algorithm>
18 #include <map>
19 #include <set>
20 #include <sstream>
21 #include <string>
22 #include <unordered_map>
23 
24 namespace pldm
25 {
26 namespace oem_ampere
27 {
28 namespace boot_stage = boot::stage;
29 namespace ddr_status = ddr::status;
30 namespace dimm_status = dimm::status;
31 namespace dimm_syndrome = dimm::training_failure::dimm_syndrome;
32 namespace phy_syndrome = dimm::training_failure::phy_syndrome;
33 namespace training_failure = dimm::training_failure;
34 
35 constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent.OK";
36 constexpr const char* ampereWarningRegistry =
37     "OpenBMC.0.1.AmpereWarning.Warning";
38 constexpr const char* ampereCriticalRegistry =
39     "OpenBMC.0.1.AmpereCritical.Critical";
40 constexpr const char* BIOSFWPanicRegistry =
41     "OpenBMC.0.1.BIOSFirmwarePanicReason.Warning";
42 constexpr auto maxDIMMIdxBitNum = 24;
43 constexpr auto maxDIMMInstantNum = 24;
44 
45 const std::set<uint16_t> rasUESensorIDs = {CORE_UE, MCU_UE, PCIE_UE, SOC_UE};
46 
47 /*
48     An array of possible boot status of a boot stage.
49     The index maps with byte 0 of boot code.
50 */
51 std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"};
52 
53 /*
54     An array of possible boot status of DDR training stage.
55     The index maps with byte 0 of boot code.
56 */
57 std::array<std::string, 3> ddrTrainingMsg = {
58     " progress started", " in-progress", " progress completed"};
59 
60 /*
61     A map between PMIC status and logging strings.
62 */
63 std::array<std::string, 8> pmicTempAlertMsg = {
64     "Below 85°C", "85°C",  "95°C",  "105°C",
65     "115°C",      "125°C", "135°C", "Equal or greater than 140°C"};
66 
67 /*
68     In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC
69     EPs through SMBus and PCIe. When host boots up, SMBUS interface
70     comes up first. In this interface, BMC is bus owner.
71 
72     mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available).
73     pldmd will always use TID 1 for S0 and TID 2 for S1 (if available).
74 */
75 EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}};
76 
77 /*
78     A map between sensor IDs and their names in string.
79     Using pldm::oem::sensor_ids
80 */
81 EventToMsgMap_t sensorIdToStrMap = {
82     {DDR_STATUS, "DDR_STATUS"},
83     {PCP_VR_STATE, "PCP_VR_STATE"},
84     {SOC_VR_STATE, "SOC_VR_STATE"},
85     {DPHY_VR1_STATE, "DPHY_VR1_STATE"},
86     {DPHY_VR2_STATE, "DPHY_VR2_STATE"},
87     {D2D_VR_STATE, "D2D_VR_STATE"},
88     {IOC_VR1_STATE, "IOC_VR1_STATE"},
89     {IOC_VR2_STATE, "IOC_VR2_STATE"},
90     {PCI_D_VR_STATE, "PCI_D_VR_STATE"},
91     {PCI_A_VR_STATE, "PCI_A_VR_STATE"},
92     {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"},
93     {BOOT_OVERALL, "BOOT_OVERALL"},
94     {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"},
95     {WATCH_DOG, "WATCH_DOG"}};
96 
97 /*
98     A map between the boot stages and logging strings.
99     Using pldm::oem::boot::stage::boot_stage
100 */
101 EventToMsgMap_t bootStageToMsgMap = {
102     {boot_stage::SECPRO, "SECpro"},
103     {boot_stage::MPRO, "Mpro"},
104     {boot_stage::ATF_BL1, "ATF BL1"},
105     {boot_stage::ATF_BL2, "ATF BL2"},
106     {boot_stage::DDR_INITIALIZATION, "DDR initialization"},
107     {boot_stage::DDR_TRAINING, "DDR training"},
108     {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"},
109     {boot_stage::ATF_BL31, "ATF BL31"},
110     {boot_stage::ATF_BL32, "ATF BL32"},
111     {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"},
112     {boot_stage::UEFI_STATUS_CLASS_CODE_MIN,
113      "ATF BL33 (UEFI) booting status = "}};
114 
115 /*
116     A map between DDR status and logging strings.
117     Using pldm::oem::ddr::status::ddr_status
118 */
119 EventToMsgMap_t ddrStatusToMsgMap = {
120     {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"},
121     {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"},
122     {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"},
123     {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"},
124     {ddr_status::OTHER_FAILURE, "has other failure"},
125     {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG,
126      "has boot failure due to no configuration"},
127     {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS,
128      "failsafe activated but boot success with the next valid configuration"}};
129 
130 /*
131     A map between DIMM status and logging strings.
132     Using pldm::oem::dimm::status::dimm_status
133 */
134 EventToMsgMap_t dimmStatusToMsgMap = {
135     {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"},
136     {dimm_status::NOT_INSTALLED, "is not installed"},
137     {dimm_status::OTHER_FAILURE, "has other failure"},
138     {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"},
139     {dimm_status::TRAINING_FAILURE, "has training failure; "},
140     {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}};
141 
142 /*
143     A map between PHY training failure syndrome and logging strings.
144     Using
145    pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome
146 */
147 EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = {
148     {phy_syndrome::NA, "(N/A)"},
149     {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"},
150     {phy_syndrome::CA_LEVELING, "(CA leveling)"},
151     {phy_syndrome::PHY_WRITE_LEVEL_FAILURE,
152      "(PHY write level failure - see syndrome 1)"},
153     {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE,
154      "(PHY read gate leveling failure)"},
155     {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"},
156     {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"},
157     {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}};
158 
159 /*
160     A map between DIMM training failure syndrome and logging strings.
161     Using
162    pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome
163 */
164 EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = {
165     {dimm_syndrome::NA, "(N/A)"},
166     {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE,
167      "(DRAM VREFDQ training failure)"},
168     {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"},
169     {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE,
170      "(LRDRIMM DB SW training failure)"}};
171 
172 /*
173     A map between DIMM training failure type and a pair of <logging strings -
174    syndrome map>. Using
175    pldm::oem::dimm::training_faillure::dimm_training_failure_type
176 */
177 std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>>
178     dimmTrainingFailureTypeMap = {
179         {training_failure::PHY_TRAINING_FAILURE_TYPE,
180          std::make_pair("PHY training failure",
181                         phyTrainingFailureSyndromeToMsgMap)},
182         {training_failure::DIMM_TRAINING_FAILURE_TYPE,
183          std::make_pair("DIMM training failure",
184                         dimmTrainingFailureSyndromeToMsgMap)}};
185 
186 /*
187     A map between log level and the registry used for Redfish SEL log
188     Using pldm::oem::log_level
189 */
190 std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = {
191     {log_level::OK, ampereEventRegistry},
192     {log_level::WARNING, ampereWarningRegistry},
193     {log_level::CRITICAL, ampereCriticalRegistry},
194     {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}};
195 
196 std::unordered_map<
197     uint16_t,
198     std::vector<std::pair<
199         std::string,
200         std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>>
201     stateSensorToMsgMap = {
202         {SOC_HEALTH_AVAILABILITY,
203          {{"SoC Health",
204            {{1, {log_level::OK, "Normal"}},
205             {2, {log_level::WARNING, "Non-Critical"}},
206             {3, {log_level::CRITICAL, "Critical"}},
207             {4, {log_level::CRITICAL, "Fatal"}}}},
208           {"SoC Availability",
209            {{1, {log_level::OK, "Enabled"}},
210             {2, {log_level::WARNING, "Disabled"}},
211             {3, {log_level::CRITICAL, "Shutdown"}}}}}},
212         {WATCH_DOG,
213          {{"Global Watch Dog",
214            {{1, {log_level::OK, "Normal"}},
215             {2, {log_level::CRITICAL, "Timer Expired"}}}},
216           {"Secure Watch Dog",
217            {{1, {log_level::OK, "Normal"}},
218             {2, {log_level::CRITICAL, "Timer Expired"}}}},
219           {"Non-secure Watch Dog",
220            {{1, {log_level::OK, "Normal"}},
221             {2, {log_level::CRITICAL, "Timer Expired"}}}}}}};
222 
223 std::string
prefixMsgStrCreation(pldm_tid_t tid,uint16_t sensorId)224     OemEventManager::prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId)
225 {
226     std::string description;
227     if (!tidToSocketNameMap.contains(tid))
228     {
229         description += "TID " + std::to_string(tid) + ": ";
230     }
231     else
232     {
233         description += tidToSocketNameMap[tid] + ": ";
234     }
235 
236     if (!sensorIdToStrMap.contains(sensorId))
237     {
238         description += "Sensor ID " + std::to_string(sensorId) + ": ";
239     }
240     else
241     {
242         description += sensorIdToStrMap[sensorId] + ": ";
243     }
244 
245     return description;
246 }
247 
sendJournalRedfish(const std::string & description,log_level & logLevel)248 void OemEventManager::sendJournalRedfish(const std::string& description,
249                                          log_level& logLevel)
250 {
251     if (description.empty())
252     {
253         return;
254     }
255 
256     if (!logLevelToRedfishMsgIdMap.contains(logLevel))
257     {
258         lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel,
259                    "DES", description);
260         return;
261     }
262     auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel];
263     lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID",
264               redfishMsgId, "REDFISH_MESSAGE_ARGS", description);
265 }
266 
dimmIdxsToString(uint32_t dimmIdxs)267 std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs)
268 {
269     std::string description;
270     for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum))
271     {
272         if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx))
273         {
274             description += " #" + std::to_string(bitIdx);
275         }
276     }
277     return description;
278 }
279 
sensorIdToDIMMIdx(const uint16_t & sensorId)280 uint8_t OemEventManager::sensorIdToDIMMIdx(const uint16_t& sensorId)
281 {
282     uint8_t dimmIdx = maxDIMMInstantNum;
283     int sensorId_Off = sensorId - 4;
284     if ((sensorId_Off >= 0) && ((sensorId_Off % 2) == 0) &&
285         ((sensorId_Off / 2) < maxDIMMInstantNum))
286     {
287         dimmIdx = sensorId_Off / 2;
288     }
289     return dimmIdx;
290 }
291 
handleBootOverallEvent(pldm_tid_t,uint16_t,uint32_t presentReading)292 void OemEventManager::handleBootOverallEvent(
293     pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading)
294 {
295     log_level logLevel{log_level::OK};
296     std::string description;
297     std::stringstream strStream;
298 
299     uint8_t byte0 = (presentReading & 0x000000ff);
300     uint8_t byte1 = (presentReading & 0x0000ff00) >> 8;
301     uint8_t byte2 = (presentReading & 0x00ff0000) >> 16;
302     uint8_t byte3 = (presentReading & 0xff000000) >> 24;
303     /*
304      * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31,
305      * ATF BL32 and DDR initialization
306      */
307     if (bootStageToMsgMap.contains(byte3))
308     {
309         // Boot stage adding
310         description += bootStageToMsgMap[byte3];
311 
312         switch (byte3)
313         {
314             case boot_stage::DDR_TRAINING:
315                 if (byte0 >= ddrTrainingMsg.size())
316                 {
317                     logLevel = log_level::BIOSFWPANIC;
318                     description += " unknown status";
319                 }
320                 else
321                 {
322                     description += ddrTrainingMsg[byte0];
323                 }
324                 if (0x01 == byte0)
325                 {
326                     // Add complete percentage
327                     description += " at " + std::to_string(byte1) + "%";
328                 }
329                 break;
330             case boot_stage::S0_DDR_TRAINING_FAILURE:
331             case boot_stage::S1_DDR_TRAINING_FAILURE:
332                 // ddr_training_status_msg()
333                 logLevel = log_level::BIOSFWPANIC;
334                 description += " at DIMMs:";
335                 // dimmIdxs = presentReading & 0x00ffffff;
336                 description += dimmIdxsToString(presentReading & 0x00ffffff);
337                 description += " of socket ";
338                 description +=
339                     (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1";
340                 break;
341             default:
342                 if (byte0 >= bootStatMsg.size())
343                 {
344                     logLevel = log_level::BIOSFWPANIC;
345                     description += " unknown status";
346                 }
347                 else
348                 {
349                     description += bootStatMsg[byte0];
350                 }
351                 break;
352         }
353 
354         // Sensor report action is fail
355         if (boot::status::BOOT_STATUS_FAILURE == byte2)
356         {
357             logLevel = log_level::BIOSFWPANIC;
358         }
359     }
360     else
361     {
362         if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX)
363         {
364             description +=
365                 bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN];
366 
367             strStream
368                 << "Segment (0x" << std::setfill('0') << std::hex
369                 << std::setw(8) << static_cast<uint32_t>(presentReading)
370                 << "); Status Class (0x" << std::setw(2)
371                 << static_cast<uint32_t>(byte3) << "); Status SubClass (0x"
372                 << std::setw(2) << static_cast<uint32_t>(byte2)
373                 << "); Operation Code (0x" << std::setw(4)
374                 << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16)
375                 << ")" << std::dec;
376 
377             description += strStream.str();
378         }
379     }
380 
381     // Log to Redfish event
382     sendJournalRedfish(description, logLevel);
383 }
384 
processNumericSensorEvent(pldm_tid_t tid,uint16_t sensorId,const uint8_t * sensorData,size_t sensorDataLength)385 int OemEventManager::processNumericSensorEvent(
386     pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
387     size_t sensorDataLength)
388 {
389     uint8_t eventState = 0;
390     uint8_t previousEventState = 0;
391     uint8_t sensorDataSize = 0;
392     uint32_t presentReading;
393     auto rc = decode_numeric_sensor_data(
394         sensorData, sensorDataLength, &eventState, &previousEventState,
395         &sensorDataSize, &presentReading);
396     if (rc)
397     {
398         lg2::error(
399             "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ",
400             "TID", tid, "RC", rc);
401         return rc;
402     }
403 
404     // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
405     if (auto dimmIdx = sensorIdToDIMMIdx(sensorId); dimmIdx < maxDIMMInstantNum)
406     {
407         handleDIMMStatusEvent(tid, sensorId, presentReading);
408         return PLDM_SUCCESS;
409     }
410 
411     switch (sensorId)
412     {
413         case BOOT_OVERALL:
414             handleBootOverallEvent(tid, sensorId, presentReading);
415             break;
416         case PCIE_HOT_PLUG:
417             handlePCIeHotPlugEvent(tid, sensorId, presentReading);
418             break;
419         case DDR_STATUS:
420             handleDDRStatusEvent(tid, sensorId, presentReading);
421             break;
422         case PCP_VR_STATE:
423         case SOC_VR_STATE:
424         case DPHY_VR1_STATE:
425         case DPHY_VR2_STATE:
426         case D2D_VR_STATE:
427         case IOC_VR1_STATE:
428         case IOC_VR2_STATE:
429         case PCI_D_VR_STATE:
430         case PCI_A_VR_STATE:
431             handleVRDStatusEvent(tid, sensorId, presentReading);
432             break;
433         case WATCH_DOG:
434             handleNumericWatchdogEvent(tid, sensorId, presentReading);
435             break;
436         default:
437             std::string description;
438             std::stringstream strStream;
439             log_level logLevel = log_level::OK;
440 
441             description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: ";
442             description += prefixMsgStrCreation(tid, sensorId);
443             strStream << std::setfill('0') << std::hex << "eventState 0x"
444                       << std::setw(2) << static_cast<uint32_t>(eventState)
445                       << " previousEventState 0x" << std::setw(2)
446                       << static_cast<uint32_t>(previousEventState)
447                       << " sensorDataSize 0x" << std::setw(2)
448                       << static_cast<uint32_t>(sensorDataSize)
449                       << " presentReading 0x" << std::setw(8)
450                       << static_cast<uint32_t>(presentReading) << std::dec;
451             description += strStream.str();
452 
453             sendJournalRedfish(description, logLevel);
454             break;
455     }
456     return PLDM_SUCCESS;
457 }
458 
processStateSensorEvent(pldm_tid_t tid,uint16_t sensorId,const uint8_t * sensorData,size_t sensorDataLength)459 int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
460                                              const uint8_t* sensorData,
461                                              size_t sensorDataLength)
462 {
463     uint8_t sensorOffset = 0;
464     uint8_t eventState = 0;
465     uint8_t previousEventState = 0;
466 
467     auto rc =
468         decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset,
469                                  &eventState, &previousEventState);
470     if (rc)
471     {
472         lg2::error(
473             "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}",
474             "TID", tid, "RC", rc);
475         return rc;
476     }
477 
478     std::string description;
479     log_level logLevel = log_level::OK;
480 
481     if (stateSensorToMsgMap.contains(sensorId))
482     {
483         description += prefixMsgStrCreation(tid, sensorId);
484         auto componentMap = stateSensorToMsgMap[sensorId];
485         if (sensorOffset < componentMap.size())
486         {
487             description += std::get<0>(componentMap[sensorOffset]);
488             auto stateMap = std::get<1>(componentMap[sensorOffset]);
489             if (stateMap.contains(eventState))
490             {
491                 logLevel = std::get<0>(stateMap[eventState]);
492                 description += " state : " + std::get<1>(stateMap[eventState]);
493                 if (stateMap.contains(previousEventState))
494                 {
495                     description += "; previous state: " +
496                                    std::get<1>(stateMap[previousEventState]);
497                 }
498             }
499             else
500             {
501                 description += " sends unsupported event state: " +
502                                std::to_string(eventState);
503                 if (stateMap.contains(previousEventState))
504                 {
505                     description += "; previous state: " +
506                                    std::get<1>(stateMap[previousEventState]);
507                 }
508             }
509         }
510         else
511         {
512             description += "sends unsupported component sensor offset " +
513                            std::to_string(sensorOffset);
514         }
515     }
516     else
517     {
518         std::stringstream strStream;
519         description += "SENSOR_EVENT : STATE_SENSOR_STATE: ";
520         description += prefixMsgStrCreation(tid, sensorId);
521         strStream << std::setfill('0') << std::hex << "sensorOffset 0x"
522                   << std::setw(2) << static_cast<uint32_t>(sensorOffset)
523                   << "eventState 0x" << std::setw(2)
524                   << static_cast<uint32_t>(eventState)
525                   << " previousEventState 0x" << std::setw(2)
526                   << static_cast<uint32_t>(previousEventState) << std::dec;
527         description += strStream.str();
528     }
529 
530     sendJournalRedfish(description, logLevel);
531 
532     return PLDM_SUCCESS;
533 }
534 
processSensorOpStateEvent(pldm_tid_t tid,uint16_t sensorId,const uint8_t * sensorData,size_t sensorDataLength)535 int OemEventManager::processSensorOpStateEvent(
536     pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
537     size_t sensorDataLength)
538 {
539     uint8_t present_op_state = 0;
540     uint8_t previous_op_state = 0;
541 
542     auto rc = decode_sensor_op_data(sensorData, sensorDataLength,
543                                     &present_op_state, &previous_op_state);
544     if (rc)
545     {
546         lg2::error(
547             "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}",
548             "TID", tid, "RC", rc);
549         return rc;
550     }
551 
552     std::string description;
553     std::stringstream strStream;
554     log_level logLevel = log_level::OK;
555 
556     description += "SENSOR_EVENT : SENSOR_OP_STATE: ";
557     description += prefixMsgStrCreation(tid, sensorId);
558     strStream << std::setfill('0') << std::hex << "present_op_state 0x"
559               << std::setw(2) << static_cast<uint32_t>(present_op_state)
560               << "previous_op_state 0x" << std::setw(2)
561               << static_cast<uint32_t>(previous_op_state) << std::dec;
562     description += strStream.str();
563 
564     sendJournalRedfish(description, logLevel);
565 
566     return PLDM_SUCCESS;
567 }
568 
handleSensorEvent(const pldm_msg * request,size_t payloadLength,uint8_t,pldm_tid_t tid,size_t eventDataOffset)569 int OemEventManager::handleSensorEvent(
570     const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
571     pldm_tid_t tid, size_t eventDataOffset)
572 {
573     /* This OEM event handler is only used for SoC terminus*/
574     if (!tidToSocketNameMap.contains(tid))
575     {
576         return PLDM_SUCCESS;
577     }
578     auto eventData =
579         reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
580     auto eventDataSize = payloadLength - eventDataOffset;
581 
582     uint16_t sensorId = 0;
583     uint8_t sensorEventClassType = 0;
584     size_t eventClassDataOffset = 0;
585     auto rc =
586         decode_sensor_event_data(eventData, eventDataSize, &sensorId,
587                                  &sensorEventClassType, &eventClassDataOffset);
588     if (rc)
589     {
590         lg2::error("Failed to decode sensor event data return code {RC}.", "RC",
591                    rc);
592         return rc;
593     }
594     const uint8_t* sensorData = eventData + eventClassDataOffset;
595     size_t sensorDataLength = eventDataSize - eventClassDataOffset;
596 
597     switch (sensorEventClassType)
598     {
599         case PLDM_NUMERIC_SENSOR_STATE:
600         {
601             return processNumericSensorEvent(tid, sensorId, sensorData,
602                                              sensorDataLength);
603         }
604         case PLDM_STATE_SENSOR_STATE:
605         {
606             return processStateSensorEvent(tid, sensorId, sensorData,
607                                            sensorDataLength);
608         }
609         case PLDM_SENSOR_OP_STATE:
610         {
611             return processSensorOpStateEvent(tid, sensorId, sensorData,
612                                              sensorDataLength);
613         }
614         default:
615             std::string description;
616             std::stringstream strStream;
617             log_level logLevel = log_level::OK;
618 
619             description += "SENSOR_EVENT : Unsupported Sensor Class " +
620                            std::to_string(sensorEventClassType) + ": ";
621             description += prefixMsgStrCreation(tid, sensorId);
622             strStream << std::setfill('0') << std::hex
623                       << std::setw(sizeof(sensorData) * 2) << "Sensor data: ";
624 
625             auto dataPtr = sensorData;
626             for ([[maybe_unused]] const auto& i :
627                  std::views::iota(0, (int)sensorDataLength))
628             {
629                 strStream << "0x" << static_cast<uint32_t>(*dataPtr);
630                 dataPtr += sizeof(sensorData);
631             }
632 
633             description += strStream.str();
634 
635             sendJournalRedfish(description, logLevel);
636     }
637     lg2::info("Unsupported class type {CLASSTYPE}", "CLASSTYPE",
638               sensorEventClassType);
639     return PLDM_ERROR;
640 }
641 
handlePCIeHotPlugEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)642 void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
643                                              uint32_t presentReading)
644 {
645     std::string description;
646     std::stringstream strStream;
647     PCIeHotPlugEventRecord_t record{presentReading};
648 
649     std::string sAction = (!record.bits.action) ? "Insertion" : "Removal";
650     std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed";
651     log_level logLevel =
652         (!record.bits.opStatus) ? log_level::OK : log_level::WARNING;
653 
654     description += prefixMsgStrCreation(tid, sensorId);
655 
656     strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2)
657               << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x"
658               << std::setw(2) << static_cast<uint32_t>(record.bits.bus)
659               << "); Device (0x" << std::setw(2)
660               << static_cast<uint32_t>(record.bits.device) << "); Function (0x"
661               << std::setw(2) << static_cast<uint32_t>(record.bits.function)
662               << "); Action (" << sAction << "); Operation status ("
663               << sOpStatus << "); Media slot number (" << std::dec
664               << static_cast<uint32_t>(record.bits.mediaSlot) << ")";
665 
666     description += strStream.str();
667 
668     // Log to Redfish event
669     sendJournalRedfish(description, logLevel);
670 }
671 
dimmTrainingFailureToMsg(uint32_t failureInfo)672 std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo)
673 {
674     std::string description;
675     DIMMTrainingFailure_t failure{failureInfo};
676 
677     if (dimmTrainingFailureTypeMap.contains(failure.bits.type))
678     {
679         auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type];
680 
681         description += std::get<0>(failureInfoMap);
682 
683         description += "; MCU rank index " +
684                        std::to_string(failure.bits.mcuRankIdx);
685 
686         description += "; Slice number " +
687                        std::to_string(failure.bits.sliceNum);
688 
689         description += "; Upper nibble error status: ";
690         description += (!failure.bits.upperNibbStatErr)
691                            ? "No error"
692                            : "Found no rising edge";
693 
694         description += "; Lower nibble error status: ";
695         description += (!failure.bits.lowerNibbStatErr)
696                            ? "No error"
697                            : "Found no rising edge";
698 
699         description += "; Failure syndrome 0: ";
700 
701         auto& syndromeMap = std::get<1>(failureInfoMap);
702         if (syndromeMap.contains(failure.bits.syndrome))
703         {
704             description += syndromeMap[failure.bits.syndrome];
705         }
706         else
707         {
708             description += "(Unknown syndrome)";
709         }
710     }
711     else
712     {
713         description += "Unknown training failure type " +
714                        std::to_string(failure.bits.type);
715     }
716 
717     return description;
718 }
719 
handleDIMMStatusEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)720 void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
721                                             uint32_t presentReading)
722 {
723     log_level logLevel{log_level::WARNING};
724     std::string description;
725     uint8_t byte3 = (presentReading & 0xff000000) >> 24;
726     uint32_t byte012 = presentReading & 0xffffff;
727 
728     description += prefixMsgStrCreation(tid, sensorId);
729 
730     // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
731     auto dimmIdx = sensorIdToDIMMIdx(sensorId);
732     if (dimmIdx >= maxDIMMIdxBitNum)
733     {
734         return;
735     }
736 
737     description += "DIMM " + std::to_string(dimmIdx) + " ";
738 
739     if (dimmStatusToMsgMap.contains(byte3))
740     {
741         if (byte3 == dimm_status::INSTALLED_NO_ERROR ||
742             byte3 == dimm_status::INSTALLED_BUT_DISABLED)
743         {
744             logLevel = log_level::OK;
745         }
746 
747         description += dimmStatusToMsgMap[byte3];
748 
749         if (byte3 == dimm_status::TRAINING_FAILURE)
750         {
751             description += "; " + dimmTrainingFailureToMsg(byte012);
752         }
753         else if (byte3 == dimm_status::PMIC_TEMP_ALERT)
754         {
755             uint8_t byte0 = (byte012 & 0xff);
756             if (byte0 < pmicTempAlertMsg.size())
757             {
758                 description += ": " + pmicTempAlertMsg[byte0];
759             }
760         }
761     }
762     else
763     {
764         switch (byte3)
765         {
766             case dimm_status::PMIC_HIGH_TEMP:
767                 if (byte012 == 0x01)
768                 {
769                     description += "has PMIC high temp condition";
770                 }
771                 break;
772             case dimm_status::TSx_HIGH_TEMP:
773                 switch (byte012)
774                 {
775                     case 0x01:
776                         description += "has TS0";
777                         break;
778                     case 0x02:
779                         description += "has TS1";
780                         break;
781                     case 0x03:
782                         description += "has TS0 and TS1";
783                         break;
784                 }
785                 description += " exceeding their high temperature threshold";
786                 break;
787             case dimm_status::SPD_HUB_HIGH_TEMP:
788                 if (byte012 == 0x01)
789                 {
790                     description += "has SPD/HUB high temp condition";
791                 }
792                 break;
793             default:
794                 description += "has unsupported status " +
795                                std::to_string(byte3);
796                 break;
797         }
798     }
799 
800     // Log to Redfish event
801     sendJournalRedfish(description, logLevel);
802 }
803 
handleDDRStatusEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)804 void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
805                                            uint32_t presentReading)
806 {
807     log_level logLevel{log_level::WARNING};
808     std::string description;
809     uint8_t byte3 = (presentReading & 0xff000000) >> 24;
810     uint32_t byte012 = presentReading & 0xffffff;
811 
812     description += prefixMsgStrCreation(tid, sensorId);
813 
814     description += "DDR ";
815     if (ddrStatusToMsgMap.contains(byte3))
816     {
817         if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR)
818         {
819             logLevel = log_level::OK;
820         }
821 
822         description += ddrStatusToMsgMap[byte3];
823 
824         if (byte3 == ddr_status::CONFIGURATION_FAILURE ||
825             byte3 == ddr_status::TRAINING_FAILURE)
826         {
827             // List out failed DIMMs
828             description += dimmIdxsToString(byte012);
829         }
830     }
831     else
832     {
833         description += "has unsupported status " + std::to_string(byte3);
834     }
835 
836     // Log to Redfish event
837     sendJournalRedfish(description, logLevel);
838 }
839 
handleVRDStatusEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)840 void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
841                                            uint32_t presentReading)
842 {
843     log_level logLevel{log_level::WARNING};
844     std::string description;
845     std::stringstream strStream;
846 
847     description += prefixMsgStrCreation(tid, sensorId);
848 
849     VRDStatus_t status{presentReading};
850 
851     if (status.bits.warning && status.bits.critical)
852     {
853         description += "A VR warning and a VR critical";
854         logLevel = log_level::CRITICAL;
855     }
856     else
857     {
858         if (status.bits.warning)
859         {
860             description += "A VR warning";
861         }
862         else if (status.bits.critical)
863         {
864             description += "A VR critical";
865             logLevel = log_level::CRITICAL;
866         }
867         else
868         {
869             description += "No VR warning or critical";
870             logLevel = log_level::OK;
871         }
872     }
873     description += " condition observed";
874 
875     strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex
876               << std::setw(2)
877               << static_cast<uint32_t>(status.bits.vr_status_byte_high)
878               << "; VR status byte low is 0x" << std::setw(2)
879               << static_cast<uint32_t>(status.bits.vr_status_byte_low)
880               << "; Reading is 0x" << std::setw(2)
881               << static_cast<uint32_t>(presentReading) << ";";
882 
883     description += strStream.str();
884 
885     // Log to Redfish event
886     sendJournalRedfish(description, logLevel);
887 }
888 
handleNumericWatchdogEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)889 void OemEventManager::handleNumericWatchdogEvent(
890     pldm_tid_t tid, uint16_t sensorId, uint32_t presentReading)
891 {
892     std::string description;
893     log_level logLevel = log_level::CRITICAL;
894 
895     description += prefixMsgStrCreation(tid, sensorId);
896 
897     if (presentReading & 0x01)
898     {
899         description += "Global watchdog expired;";
900     }
901     if (presentReading & 0x02)
902     {
903         description += "Secure watchdog expired;";
904     }
905     if (presentReading & 0x04)
906     {
907         description += "Non-secure watchdog expired;";
908     }
909 
910     // Log to Redfish event
911     sendJournalRedfish(description, logLevel);
912 }
913 
processOemMsgPollEvent(pldm_tid_t tid,uint16_t eventId,const uint8_t * eventData,size_t eventDataSize)914 int OemEventManager::processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId,
915                                             const uint8_t* eventData,
916                                             size_t eventDataSize)
917 {
918     EFI_AMPERE_ERROR_DATA ampHdr;
919 
920     decodeCperRecord(eventData, eventDataSize, &ampHdr);
921 
922     addCperSELLog(tid, eventId, &ampHdr);
923 
924     /* isBert at bit 12 of TypeId */
925     if (ampHdr.TypeId & 0x0800)
926     {
927         lg2::info("Ampere SoC BERT is triggered.");
928         std::variant<std::string> value(
929             "com.ampere.CrashCapture.Trigger.TriggerAction.Bert");
930         try
931         {
932             auto& bus = pldm::utils::DBusHandler::getBus();
933             auto method =
934                 bus.new_method_call("com.ampere.CrashCapture.Trigger",
935                                     "/com/ampere/crashcapture/trigger",
936                                     pldm::utils::dbusProperties, "Set");
937             method.append("com.ampere.CrashCapture.Trigger", "TriggerActions",
938                           value);
939             bus.call_noreply(method);
940         }
941         catch (const std::exception& e)
942         {
943             lg2::error("call BERT trigger error - {ERROR}", "ERROR", e);
944         }
945     }
946 
947     return PLDM_SUCCESS;
948 }
949 
handlepldmMessagePollEvent(const pldm_msg * request,size_t payloadLength,uint8_t,pldm_tid_t tid,size_t eventDataOffset)950 int OemEventManager::handlepldmMessagePollEvent(
951     const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
952     pldm_tid_t tid, size_t eventDataOffset)
953 {
954     /* This OEM event handler is only used for SoC terminus*/
955     if (!tidToSocketNameMap.contains(tid))
956     {
957         return PLDM_SUCCESS;
958     }
959 
960     auto eventData =
961         reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
962     auto eventDataSize = payloadLength - eventDataOffset;
963 
964     pldm_message_poll_event poll_event{};
965     auto rc = decode_pldm_message_poll_event_data(eventData, eventDataSize,
966                                                   &poll_event);
967     if (rc)
968     {
969         lg2::error("Failed to decode PldmMessagePollEvent event, error {RC} ",
970                    "RC", rc);
971         return rc;
972     }
973 
974     auto sensorID = poll_event.event_id;
975     /* The UE errors */
976     if (rasUESensorIDs.contains(sensorID))
977     {
978         pldm::utils::DBusMapping dbusMapping{
979             "/xyz/openbmc_project/led/groups/ras_ue_fault",
980             "xyz.openbmc_project.Led.Group", "Asserted", "bool"};
981         try
982         {
983             pldm::utils::DBusHandler().setDbusProperty(
984                 dbusMapping, pldm::utils::PropertyValue{bool(true)});
985         }
986         catch (const std::exception& e)
987         {
988             lg2::error(
989                 "Failed to set the RAS UE LED terminus ID {TID} sensor ID {SENSORID} - errors {ERROR}",
990                 "TID", tid, "SENSORID", sensorID, "ERROR", e);
991         }
992     }
993 
994     return PLDM_SUCCESS;
995 }
996 
997 } // namespace oem_ampere
998 } // namespace pldm
999