xref: /openbmc/pldm/oem/ampere/event/oem_event_manager.cpp (revision 218f49920093e9a60a60ac9b2d84396844fdb9d3)
1  #include "oem_event_manager.hpp"
2  
3  #include "libcper/Cper.h"
4  
5  #include "cper.hpp"
6  #include "requester/handler.hpp"
7  #include "requester/request.hpp"
8  
9  #include <config.h>
10  #include <libpldm/pldm.h>
11  #include <libpldm/utils.h>
12  #include <systemd/sd-journal.h>
13  
14  #include <phosphor-logging/lg2.hpp>
15  #include <xyz/openbmc_project/Logging/Entry/server.hpp>
16  
17  #include <algorithm>
18  #include <map>
19  #include <set>
20  #include <sstream>
21  #include <string>
22  #include <unordered_map>
23  
24  namespace pldm
25  {
26  namespace oem_ampere
27  {
28  namespace fs = std::filesystem;
29  using namespace std::chrono;
30  
31  namespace boot_stage = boot::stage;
32  namespace ddr_status = ddr::status;
33  namespace dimm_status = dimm::status;
34  namespace dimm_syndrome = dimm::training_failure::dimm_syndrome;
35  namespace phy_syndrome = dimm::training_failure::phy_syndrome;
36  namespace training_failure = dimm::training_failure;
37  
38  constexpr const char* ampereEventRegistry = "OpenBMC.0.1.AmpereEvent";
39  constexpr const char* ampereWarningRegistry = "OpenBMC.0.1.AmpereWarning";
40  constexpr const char* ampereCriticalRegistry = "OpenBMC.0.1.AmpereCritical";
41  constexpr const char* BIOSFWPanicRegistry =
42      "OpenBMC.0.1.BIOSFirmwarePanicReason";
43  constexpr auto maxDIMMIdxBitNum = 24;
44  constexpr auto maxDIMMInstantNum = 24;
45  
46  const std::set<uint16_t> rasUESensorIDs = {CORE_UE, MCU_UE, PCIE_UE, SOC_UE};
47  
48  /*
49      An array of possible boot status of a boot stage.
50      The index maps with byte 0 of boot code.
51  */
52  std::array<std::string, 3> bootStatMsg = {" booting", " completed", " failed"};
53  
54  /*
55      An array of possible boot status of DDR training stage.
56      The index maps with byte 0 of boot code.
57  */
58  std::array<std::string, 3> ddrTrainingMsg = {
59      " progress started", " in-progress", " progress completed"};
60  
61  /*
62      A map between PMIC status and logging strings.
63  */
64  std::array<std::string, 8> pmicTempAlertMsg = {
65      "Below 85°C", "85°C",  "95°C",  "105°C",
66      "115°C",      "125°C", "135°C", "Equal or greater than 140°C"};
67  
68  /*
69      In Ampere systems, BMC only directly communicates with MCTP/PLDM SoC
70      EPs through SMBus and PCIe. When host boots up, SMBUS interface
71      comes up first. In this interface, BMC is bus owner.
72  
73      mctpd will set the EID 0x14 for S0 and 0x16 for S1 (if available).
74      pldmd will always use TID 1 for S0 and TID 2 for S1 (if available).
75  */
76  EventToMsgMap_t tidToSocketNameMap = {{1, "SOCKET 0"}, {2, "SOCKET 1"}};
77  
78  /*
79      A map between sensor IDs and their names in string.
80      Using pldm::oem::sensor_ids
81  */
82  EventToMsgMap_t sensorIdToStrMap = {
83      {DDR_STATUS, "DDR_STATUS"},
84      {PCP_VR_STATE, "PCP_VR_STATE"},
85      {SOC_VR_STATE, "SOC_VR_STATE"},
86      {DPHY_VR1_STATE, "DPHY_VR1_STATE"},
87      {DPHY_VR2_STATE, "DPHY_VR2_STATE"},
88      {D2D_VR_STATE, "D2D_VR_STATE"},
89      {IOC_VR1_STATE, "IOC_VR1_STATE"},
90      {IOC_VR2_STATE, "IOC_VR2_STATE"},
91      {PCI_D_VR_STATE, "PCI_D_VR_STATE"},
92      {PCI_A_VR_STATE, "PCI_A_VR_STATE"},
93      {PCIE_HOT_PLUG, "PCIE_HOT_PLUG"},
94      {BOOT_OVERALL, "BOOT_OVERALL"},
95      {SOC_HEALTH_AVAILABILITY, "SOC_HEALTH_AVAILABILITY"},
96      {WATCH_DOG, "WATCH_DOG"}};
97  
98  /*
99      A map between the boot stages and logging strings.
100      Using pldm::oem::boot::stage::boot_stage
101  */
102  EventToMsgMap_t bootStageToMsgMap = {
103      {boot_stage::SECPRO, "SECpro"},
104      {boot_stage::MPRO, "Mpro"},
105      {boot_stage::ATF_BL1, "ATF BL1"},
106      {boot_stage::ATF_BL2, "ATF BL2"},
107      {boot_stage::DDR_INITIALIZATION, "DDR initialization"},
108      {boot_stage::DDR_TRAINING, "DDR training"},
109      {boot_stage::S0_DDR_TRAINING_FAILURE, "DDR training failure"},
110      {boot_stage::ATF_BL31, "ATF BL31"},
111      {boot_stage::ATF_BL32, "ATF BL32"},
112      {boot_stage::S1_DDR_TRAINING_FAILURE, "DDR training failure"},
113      {boot_stage::UEFI_STATUS_CLASS_CODE_MIN,
114       "ATF BL33 (UEFI) booting status = "}};
115  
116  /*
117      A map between DDR status and logging strings.
118      Using pldm::oem::ddr::status::ddr_status
119  */
120  EventToMsgMap_t ddrStatusToMsgMap = {
121      {ddr_status::NO_SYSTEM_LEVEL_ERROR, "has no system level error"},
122      {ddr_status::ECC_INITIALIZATION_FAILURE, "has ECC initialization failure"},
123      {ddr_status::CONFIGURATION_FAILURE, "has configuration failure at DIMMs:"},
124      {ddr_status::TRAINING_FAILURE, "has training failure at DIMMs:"},
125      {ddr_status::OTHER_FAILURE, "has other failure"},
126      {ddr_status::BOOT_FAILURE_NO_VALID_CONFIG,
127       "has boot failure due to no configuration"},
128      {ddr_status::FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS,
129       "failsafe activated but boot success with the next valid configuration"}};
130  
131  /*
132      A map between DIMM status and logging strings.
133      Using pldm::oem::dimm::status::dimm_status
134  */
135  EventToMsgMap_t dimmStatusToMsgMap = {
136      {dimm_status::INSTALLED_NO_ERROR, "is installed and no error"},
137      {dimm_status::NOT_INSTALLED, "is not installed"},
138      {dimm_status::OTHER_FAILURE, "has other failure"},
139      {dimm_status::INSTALLED_BUT_DISABLED, "is installed but disabled"},
140      {dimm_status::TRAINING_FAILURE, "has training failure; "},
141      {dimm_status::PMIC_TEMP_ALERT, "has PMIC temperature alert"}};
142  
143  /*
144      A map between PHY training failure syndrome and logging strings.
145      Using
146     pldm::oem::dimm::training_faillure::phy_syndrome::phy_training_failure_syndrome
147  */
148  EventToMsgMap_t phyTrainingFailureSyndromeToMsgMap = {
149      {phy_syndrome::NA, "(N/A)"},
150      {phy_syndrome::PHY_TRAINING_SETUP_FAILURE, "(PHY training setup failure)"},
151      {phy_syndrome::CA_LEVELING, "(CA leveling)"},
152      {phy_syndrome::PHY_WRITE_LEVEL_FAILURE,
153       "(PHY write level failure - see syndrome 1)"},
154      {phy_syndrome::PHY_READ_GATE_LEVELING_FAILURE,
155       "(PHY read gate leveling failure)"},
156      {phy_syndrome::PHY_READ_LEVEL_FAILURE, "(PHY read level failure)"},
157      {phy_syndrome::WRITE_DQ_LEVELING, "(Write DQ leveling)"},
158      {phy_syndrome::PHY_SW_TRAINING_FAILURE, "(PHY SW training failure)"}};
159  
160  /*
161      A map between DIMM training failure syndrome and logging strings.
162      Using
163     pldm::oem::dimm::training_faillure::dimm_syndrome::dimm_training_failure_syndrome
164  */
165  EventToMsgMap_t dimmTrainingFailureSyndromeToMsgMap = {
166      {dimm_syndrome::NA, "(N/A)"},
167      {dimm_syndrome::DRAM_VREFDQ_TRAINING_FAILURE,
168       "(DRAM VREFDQ training failure)"},
169      {dimm_syndrome::LRDIMM_DB_TRAINING_FAILURE, "(LRDIMM DB training failure)"},
170      {dimm_syndrome::LRDRIMM_DB_SW_TRAINING_FAILURE,
171       "(LRDRIMM DB SW training failure)"}};
172  
173  /*
174      A map between DIMM training failure type and a pair of <logging strings -
175     syndrome map>. Using
176     pldm::oem::dimm::training_faillure::dimm_training_failure_type
177  */
178  std::unordered_map<uint8_t, std::pair<std::string, EventToMsgMap_t>>
179      dimmTrainingFailureTypeMap = {
180          {training_failure::PHY_TRAINING_FAILURE_TYPE,
181           std::make_pair("PHY training failure",
182                          phyTrainingFailureSyndromeToMsgMap)},
183          {training_failure::DIMM_TRAINING_FAILURE_TYPE,
184           std::make_pair("DIMM training failure",
185                          dimmTrainingFailureSyndromeToMsgMap)}};
186  
187  /*
188      A map between log level and the registry used for Redfish SEL log
189      Using pldm::oem::log_level
190  */
191  std::unordered_map<log_level, std::string> logLevelToRedfishMsgIdMap = {
192      {log_level::OK, ampereEventRegistry},
193      {log_level::WARNING, ampereWarningRegistry},
194      {log_level::CRITICAL, ampereCriticalRegistry},
195      {log_level::BIOSFWPANIC, BIOSFWPanicRegistry}};
196  
197  std::unordered_map<
198      uint16_t,
199      std::vector<std::pair<
200          std::string,
201          std::unordered_map<uint8_t, std::pair<log_level, std::string>>>>>
202      stateSensorToMsgMap = {
203          {SOC_HEALTH_AVAILABILITY,
204           {{"SoC Health",
205             {{1, {log_level::OK, "Normal"}},
206              {2, {log_level::WARNING, "Non-Critical"}},
207              {3, {log_level::CRITICAL, "Critical"}},
208              {4, {log_level::CRITICAL, "Fatal"}}}},
209            {"SoC Availability",
210             {{1, {log_level::OK, "Enabled"}},
211              {2, {log_level::WARNING, "Disabled"}},
212              {3, {log_level::CRITICAL, "Shutdown"}}}}}},
213          {WATCH_DOG,
214           {{"Global Watch Dog",
215             {{1, {log_level::OK, "Normal"}},
216              {2, {log_level::CRITICAL, "Timer Expired"}}}},
217            {"Secure Watch Dog",
218             {{1, {log_level::OK, "Normal"}},
219              {2, {log_level::CRITICAL, "Timer Expired"}}}},
220            {"Non-secure Watch Dog",
221             {{1, {log_level::OK, "Normal"}},
222              {2, {log_level::CRITICAL, "Timer Expired"}}}}}}};
223  
prefixMsgStrCreation(pldm_tid_t tid,uint16_t sensorId)224  std::string OemEventManager::prefixMsgStrCreation(pldm_tid_t tid,
225                                                    uint16_t sensorId)
226  {
227      std::string description;
228      if (!tidToSocketNameMap.contains(tid))
229      {
230          description += "TID " + std::to_string(tid) + ": ";
231      }
232      else
233      {
234          description += tidToSocketNameMap[tid] + ": ";
235      }
236  
237      if (!sensorIdToStrMap.contains(sensorId))
238      {
239          description += "Sensor ID " + std::to_string(sensorId) + ": ";
240      }
241      else
242      {
243          description += sensorIdToStrMap[sensorId] + ": ";
244      }
245  
246      return description;
247  }
248  
sendJournalRedfish(const std::string & description,log_level & logLevel)249  void OemEventManager::sendJournalRedfish(const std::string& description,
250                                           log_level& logLevel)
251  {
252      if (description.empty())
253      {
254          return;
255      }
256  
257      if (!logLevelToRedfishMsgIdMap.contains(logLevel))
258      {
259          lg2::error("Invalid {LEVEL} Description {DES}", "LEVEL", logLevel,
260                     "DES", description);
261          return;
262      }
263      auto redfishMsgId = logLevelToRedfishMsgIdMap[logLevel];
264      lg2::info("MESSAGE={DES}", "DES", description, "REDFISH_MESSAGE_ID",
265                redfishMsgId, "REDFISH_MESSAGE_ARGS", description);
266  }
267  
dimmIdxsToString(uint32_t dimmIdxs)268  std::string OemEventManager::dimmIdxsToString(uint32_t dimmIdxs)
269  {
270      std::string description;
271      for (const auto bitIdx : std::views::iota(0, maxDIMMIdxBitNum))
272      {
273          if (dimmIdxs & (static_cast<uint32_t>(1) << bitIdx))
274          {
275              description += " #" + std::to_string(bitIdx);
276          }
277      }
278      return description;
279  }
280  
sensorIdToDIMMIdx(const uint16_t & sensorId)281  uint8_t OemEventManager::sensorIdToDIMMIdx(const uint16_t& sensorId)
282  {
283      uint8_t dimmIdx = maxDIMMInstantNum;
284      int sensorId_Off = sensorId - 4;
285      if ((sensorId_Off >= 0) && ((sensorId_Off % 2) == 0) &&
286          ((sensorId_Off / 2) < maxDIMMInstantNum))
287      {
288          dimmIdx = sensorId_Off / 2;
289      }
290      return dimmIdx;
291  }
292  
handleBootOverallEvent(pldm_tid_t,uint16_t,uint32_t presentReading)293  void OemEventManager::handleBootOverallEvent(
294      pldm_tid_t /*tid*/, uint16_t /*sensorId*/, uint32_t presentReading)
295  {
296      log_level logLevel{log_level::OK};
297      std::string description;
298      std::stringstream strStream;
299  
300      uint8_t byte0 = (presentReading & 0x000000ff);
301      uint8_t byte1 = (presentReading & 0x0000ff00) >> 8;
302      uint8_t byte2 = (presentReading & 0x00ff0000) >> 16;
303      uint8_t byte3 = (presentReading & 0xff000000) >> 24;
304      /*
305       * Handle SECpro, Mpro, ATF BL1, ATF BL2, ATF BL31,
306       * ATF BL32 and DDR initialization
307       */
308      if (bootStageToMsgMap.contains(byte3))
309      {
310          // Boot stage adding
311          description += bootStageToMsgMap[byte3];
312  
313          switch (byte3)
314          {
315              case boot_stage::DDR_TRAINING:
316                  if (byte0 >= ddrTrainingMsg.size())
317                  {
318                      logLevel = log_level::BIOSFWPANIC;
319                      description += " unknown status";
320                  }
321                  else
322                  {
323                      description += ddrTrainingMsg[byte0];
324                  }
325                  if (0x01 == byte0)
326                  {
327                      // Add complete percentage
328                      description += " at " + std::to_string(byte1) + "%";
329                  }
330                  break;
331              case boot_stage::S0_DDR_TRAINING_FAILURE:
332              case boot_stage::S1_DDR_TRAINING_FAILURE:
333                  // ddr_training_status_msg()
334                  logLevel = log_level::BIOSFWPANIC;
335                  description += " at DIMMs:";
336                  // dimmIdxs = presentReading & 0x00ffffff;
337                  description += dimmIdxsToString(presentReading & 0x00ffffff);
338                  description += " of socket ";
339                  description +=
340                      (boot_stage::S0_DDR_TRAINING_FAILURE == byte3) ? "0" : "1";
341                  break;
342              default:
343                  if (byte0 >= bootStatMsg.size())
344                  {
345                      logLevel = log_level::BIOSFWPANIC;
346                      description += " unknown status";
347                  }
348                  else
349                  {
350                      description += bootStatMsg[byte0];
351                  }
352                  break;
353          }
354  
355          // Sensor report action is fail
356          if (boot::status::BOOT_STATUS_FAILURE == byte2)
357          {
358              logLevel = log_level::BIOSFWPANIC;
359          }
360      }
361      else
362      {
363          if (byte3 <= boot_stage::UEFI_STATUS_CLASS_CODE_MAX)
364          {
365              description +=
366                  bootStageToMsgMap[boot_stage::UEFI_STATUS_CLASS_CODE_MIN];
367  
368              strStream
369                  << "Segment (0x" << std::setfill('0') << std::hex
370                  << std::setw(8) << static_cast<uint32_t>(presentReading)
371                  << "); Status Class (0x" << std::setw(2)
372                  << static_cast<uint32_t>(byte3) << "); Status SubClass (0x"
373                  << std::setw(2) << static_cast<uint32_t>(byte2)
374                  << "); Operation Code (0x" << std::setw(4)
375                  << static_cast<uint32_t>((presentReading & 0xffff0000) >> 16)
376                  << ")" << std::dec;
377  
378              description += strStream.str();
379          }
380      }
381  
382      // Log to Redfish event
383      sendJournalRedfish(description, logLevel);
384  }
385  
processNumericSensorEvent(pldm_tid_t tid,uint16_t sensorId,const uint8_t * sensorData,size_t sensorDataLength)386  int OemEventManager::processNumericSensorEvent(
387      pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
388      size_t sensorDataLength)
389  {
390      uint8_t eventState = 0;
391      uint8_t previousEventState = 0;
392      uint8_t sensorDataSize = 0;
393      uint32_t presentReading;
394      auto rc = decode_numeric_sensor_data(
395          sensorData, sensorDataLength, &eventState, &previousEventState,
396          &sensorDataSize, &presentReading);
397      if (rc)
398      {
399          lg2::error(
400              "Failed to decode numericSensorState event for terminus ID {TID}, error {RC} ",
401              "TID", tid, "RC", rc);
402          return rc;
403      }
404  
405      // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
406      if (auto dimmIdx = sensorIdToDIMMIdx(sensorId); dimmIdx < maxDIMMInstantNum)
407      {
408          handleDIMMStatusEvent(tid, sensorId, presentReading);
409          return PLDM_SUCCESS;
410      }
411  
412      switch (sensorId)
413      {
414          case BOOT_OVERALL:
415              handleBootOverallEvent(tid, sensorId, presentReading);
416              break;
417          case PCIE_HOT_PLUG:
418              handlePCIeHotPlugEvent(tid, sensorId, presentReading);
419              break;
420          case DDR_STATUS:
421              handleDDRStatusEvent(tid, sensorId, presentReading);
422              break;
423          case PCP_VR_STATE:
424          case SOC_VR_STATE:
425          case DPHY_VR1_STATE:
426          case DPHY_VR2_STATE:
427          case D2D_VR_STATE:
428          case IOC_VR1_STATE:
429          case IOC_VR2_STATE:
430          case PCI_D_VR_STATE:
431          case PCI_A_VR_STATE:
432              handleVRDStatusEvent(tid, sensorId, presentReading);
433              break;
434          case WATCH_DOG:
435              handleNumericWatchdogEvent(tid, sensorId, presentReading);
436              break;
437          default:
438              std::string description;
439              std::stringstream strStream;
440  
441              description += "SENSOR_EVENT : NUMERIC_SENSOR_STATE: ";
442              description += prefixMsgStrCreation(tid, sensorId);
443              strStream << std::setfill('0') << std::hex << "eventState 0x"
444                        << std::setw(2) << static_cast<uint32_t>(eventState)
445                        << " previousEventState 0x" << std::setw(2)
446                        << static_cast<uint32_t>(previousEventState)
447                        << " sensorDataSize 0x" << std::setw(2)
448                        << static_cast<uint32_t>(sensorDataSize)
449                        << " presentReading 0x" << std::setw(8)
450                        << static_cast<uint32_t>(presentReading) << std::dec;
451              description += strStream.str();
452              std::cout << description << "\n";
453      }
454      return PLDM_SUCCESS;
455  }
456  
processStateSensorEvent(pldm_tid_t tid,uint16_t sensorId,const uint8_t * sensorData,size_t sensorDataLength)457  int OemEventManager::processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
458                                               const uint8_t* sensorData,
459                                               size_t sensorDataLength)
460  {
461      uint8_t sensorOffset = 0;
462      uint8_t eventState = 0;
463      uint8_t previousEventState = 0;
464  
465      auto rc =
466          decode_state_sensor_data(sensorData, sensorDataLength, &sensorOffset,
467                                   &eventState, &previousEventState);
468      if (rc)
469      {
470          lg2::error(
471              "Failed to decode stateSensorState event for terminus ID {TID}, error {RC}",
472              "TID", tid, "RC", rc);
473          return rc;
474      }
475  
476      std::string description;
477  
478      if (stateSensorToMsgMap.contains(sensorId))
479      {
480          log_level logLevel = log_level::OK;
481  
482          description += prefixMsgStrCreation(tid, sensorId);
483          auto componentMap = stateSensorToMsgMap[sensorId];
484          if (sensorOffset < componentMap.size())
485          {
486              description += std::get<0>(componentMap[sensorOffset]);
487              auto stateMap = std::get<1>(componentMap[sensorOffset]);
488              if (stateMap.contains(eventState))
489              {
490                  logLevel = std::get<0>(stateMap[eventState]);
491                  description += " state : " + std::get<1>(stateMap[eventState]);
492                  if (stateMap.contains(previousEventState))
493                  {
494                      description += "; previous state: " +
495                                     std::get<1>(stateMap[previousEventState]);
496                  }
497              }
498              else
499              {
500                  description += " sends unsupported event state: " +
501                                 std::to_string(eventState);
502                  if (stateMap.contains(previousEventState))
503                  {
504                      description += "; previous state: " +
505                                     std::get<1>(stateMap[previousEventState]);
506                  }
507              }
508          }
509          else
510          {
511              description += "sends unsupported component sensor offset " +
512                             std::to_string(sensorOffset);
513          }
514  
515          sendJournalRedfish(description, logLevel);
516      }
517      else
518      {
519          std::stringstream strStream;
520          description += "SENSOR_EVENT : STATE_SENSOR_STATE: ";
521          description += prefixMsgStrCreation(tid, sensorId);
522          strStream << std::setfill('0') << std::hex << "sensorOffset 0x"
523                    << std::setw(2) << static_cast<uint32_t>(sensorOffset)
524                    << "eventState 0x" << std::setw(2)
525                    << static_cast<uint32_t>(eventState)
526                    << " previousEventState 0x" << std::setw(2)
527                    << static_cast<uint32_t>(previousEventState) << std::dec;
528          description += strStream.str();
529          std::cout << description << "\n";
530      }
531  
532      return PLDM_SUCCESS;
533  }
534  
processSensorOpStateEvent(pldm_tid_t tid,uint16_t sensorId,const uint8_t * sensorData,size_t sensorDataLength)535  int OemEventManager::processSensorOpStateEvent(
536      pldm_tid_t tid, uint16_t sensorId, const uint8_t* sensorData,
537      size_t sensorDataLength)
538  {
539      uint8_t present_op_state = 0;
540      uint8_t previous_op_state = 0;
541  
542      auto rc = decode_sensor_op_data(sensorData, sensorDataLength,
543                                      &present_op_state, &previous_op_state);
544      if (rc)
545      {
546          lg2::error(
547              "Failed to decode sensorOpState event for terminus ID {TID}, error {RC}",
548              "TID", tid, "RC", rc);
549          return rc;
550      }
551  
552      std::string description;
553      std::stringstream strStream;
554  
555      description += "SENSOR_EVENT : SENSOR_OP_STATE: ";
556      description += prefixMsgStrCreation(tid, sensorId);
557      strStream << std::setfill('0') << std::hex << "present_op_state 0x"
558                << std::setw(2) << static_cast<uint32_t>(present_op_state)
559                << "previous_op_state 0x" << std::setw(2)
560                << static_cast<uint32_t>(previous_op_state) << std::dec;
561      description += strStream.str();
562      std::cout << description << "\n";
563  
564      return PLDM_SUCCESS;
565  }
566  
handleSensorEvent(const pldm_msg * request,size_t payloadLength,uint8_t,pldm_tid_t tid,size_t eventDataOffset)567  int OemEventManager::handleSensorEvent(
568      const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
569      pldm_tid_t tid, size_t eventDataOffset)
570  {
571      /* This OEM event handler is only used for SoC terminus*/
572      if (!tidToSocketNameMap.contains(tid))
573      {
574          return PLDM_SUCCESS;
575      }
576      auto eventData =
577          reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
578      auto eventDataSize = payloadLength - eventDataOffset;
579  
580      uint16_t sensorId = 0;
581      uint8_t sensorEventClassType = 0;
582      size_t eventClassDataOffset = 0;
583      auto rc =
584          decode_sensor_event_data(eventData, eventDataSize, &sensorId,
585                                   &sensorEventClassType, &eventClassDataOffset);
586      if (rc)
587      {
588          lg2::error("Failed to decode sensor event data return code {RC}.", "RC",
589                     rc);
590          return rc;
591      }
592      const uint8_t* sensorData = eventData + eventClassDataOffset;
593      size_t sensorDataLength = eventDataSize - eventClassDataOffset;
594  
595      switch (sensorEventClassType)
596      {
597          case PLDM_NUMERIC_SENSOR_STATE:
598          {
599              return processNumericSensorEvent(tid, sensorId, sensorData,
600                                               sensorDataLength);
601          }
602          case PLDM_STATE_SENSOR_STATE:
603          {
604              return processStateSensorEvent(tid, sensorId, sensorData,
605                                             sensorDataLength);
606          }
607          case PLDM_SENSOR_OP_STATE:
608          {
609              return processSensorOpStateEvent(tid, sensorId, sensorData,
610                                               sensorDataLength);
611          }
612          default:
613              std::string description;
614              std::stringstream strStream;
615  
616              description += "SENSOR_EVENT : Unsupported Sensor Class " +
617                             std::to_string(sensorEventClassType) + ": ";
618              description += prefixMsgStrCreation(tid, sensorId);
619              strStream << std::setfill('0') << std::hex
620                        << std::setw(sizeof(sensorData) * 2) << "Sensor data: ";
621  
622              auto dataPtr = sensorData;
623              for ([[maybe_unused]] const auto& i :
624                   std::views::iota(0, (int)sensorDataLength))
625              {
626                  strStream << "0x" << static_cast<uint32_t>(*dataPtr);
627                  dataPtr += sizeof(sensorData);
628              }
629  
630              description += strStream.str();
631              std::cout << description << "\n";
632      }
633  
634      return PLDM_ERROR;
635  }
636  
handlePCIeHotPlugEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)637  void OemEventManager::handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
638                                               uint32_t presentReading)
639  {
640      std::string description;
641      std::stringstream strStream;
642      PCIeHotPlugEventRecord_t record{presentReading};
643  
644      std::string sAction = (!record.bits.action) ? "Insertion" : "Removal";
645      std::string sOpStatus = (!record.bits.opStatus) ? "Successful" : "Failed";
646      log_level logLevel =
647          (!record.bits.opStatus) ? log_level::OK : log_level::WARNING;
648  
649      description += prefixMsgStrCreation(tid, sensorId);
650  
651      strStream << "Segment (0x" << std::setfill('0') << std::hex << std::setw(2)
652                << static_cast<uint32_t>(record.bits.segment) << "); Bus (0x"
653                << std::setw(2) << static_cast<uint32_t>(record.bits.bus)
654                << "); Device (0x" << std::setw(2)
655                << static_cast<uint32_t>(record.bits.device) << "); Function (0x"
656                << std::setw(2) << static_cast<uint32_t>(record.bits.function)
657                << "); Action (" << sAction << "); Operation status ("
658                << sOpStatus << "); Media slot number (" << std::dec
659                << static_cast<uint32_t>(record.bits.mediaSlot) << ")";
660  
661      description += strStream.str();
662  
663      // Log to Redfish event
664      sendJournalRedfish(description, logLevel);
665  }
666  
dimmTrainingFailureToMsg(uint32_t failureInfo)667  std::string OemEventManager::dimmTrainingFailureToMsg(uint32_t failureInfo)
668  {
669      std::string description;
670      DIMMTrainingFailure_t failure{failureInfo};
671  
672      if (dimmTrainingFailureTypeMap.contains(failure.bits.type))
673      {
674          auto failureInfoMap = dimmTrainingFailureTypeMap[failure.bits.type];
675  
676          description += std::get<0>(failureInfoMap);
677  
678          description += "; MCU rank index " +
679                         std::to_string(failure.bits.mcuRankIdx);
680  
681          description += "; Slice number " +
682                         std::to_string(failure.bits.sliceNum);
683  
684          description += "; Upper nibble error status: ";
685          description += (!failure.bits.upperNibbStatErr)
686                             ? "No error"
687                             : "Found no rising edge";
688  
689          description += "; Lower nibble error status: ";
690          description += (!failure.bits.lowerNibbStatErr)
691                             ? "No error"
692                             : "Found no rising edge";
693  
694          description += "; Failure syndrome 0: ";
695  
696          auto& syndromeMap = std::get<1>(failureInfoMap);
697          if (syndromeMap.contains(failure.bits.syndrome))
698          {
699              description += syndromeMap[failure.bits.syndrome];
700          }
701          else
702          {
703              description += "(Unknown syndrome)";
704          }
705      }
706      else
707      {
708          description += "Unknown training failure type " +
709                         std::to_string(failure.bits.type);
710      }
711  
712      return description;
713  }
714  
handleDIMMStatusEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)715  void OemEventManager::handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
716                                              uint32_t presentReading)
717  {
718      log_level logLevel{log_level::WARNING};
719      std::string description;
720      uint8_t byte3 = (presentReading & 0xff000000) >> 24;
721      uint32_t byte012 = presentReading & 0xffffff;
722  
723      description += prefixMsgStrCreation(tid, sensorId);
724  
725      // DIMMx_Status sensorID 4+2*index (index 0 -> maxDIMMInstantNum-1)
726      auto dimmIdx = sensorIdToDIMMIdx(sensorId);
727      if (dimmIdx >= maxDIMMIdxBitNum)
728      {
729          return;
730      }
731  
732      description += "DIMM " + std::to_string(dimmIdx) + " ";
733  
734      if (dimmStatusToMsgMap.contains(byte3))
735      {
736          if (byte3 == dimm_status::INSTALLED_NO_ERROR ||
737              byte3 == dimm_status::INSTALLED_BUT_DISABLED)
738          {
739              logLevel = log_level::OK;
740          }
741  
742          description += dimmStatusToMsgMap[byte3];
743  
744          if (byte3 == dimm_status::TRAINING_FAILURE)
745          {
746              description += "; " + dimmTrainingFailureToMsg(byte012);
747          }
748          else if (byte3 == dimm_status::PMIC_TEMP_ALERT)
749          {
750              uint8_t byte0 = (byte012 & 0xff);
751              if (byte0 < pmicTempAlertMsg.size())
752              {
753                  description += ": " + pmicTempAlertMsg[byte0];
754              }
755          }
756      }
757      else
758      {
759          switch (byte3)
760          {
761              case dimm_status::PMIC_HIGH_TEMP:
762                  if (byte012 == 0x01)
763                  {
764                      description += "has PMIC high temp condition";
765                  }
766                  break;
767              case dimm_status::TSx_HIGH_TEMP:
768                  switch (byte012)
769                  {
770                      case 0x01:
771                          description += "has TS0";
772                          break;
773                      case 0x02:
774                          description += "has TS1";
775                          break;
776                      case 0x03:
777                          description += "has TS0 and TS1";
778                          break;
779                  }
780                  description += " exceeding their high temperature threshold";
781                  break;
782              case dimm_status::SPD_HUB_HIGH_TEMP:
783                  if (byte012 == 0x01)
784                  {
785                      description += "has SPD/HUB high temp condition";
786                  }
787                  break;
788              default:
789                  description += "has unsupported status " +
790                                 std::to_string(byte3);
791                  break;
792          }
793      }
794  
795      // Log to Redfish event
796      sendJournalRedfish(description, logLevel);
797  }
798  
handleDDRStatusEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)799  void OemEventManager::handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
800                                             uint32_t presentReading)
801  {
802      log_level logLevel{log_level::WARNING};
803      std::string description;
804      uint8_t byte3 = (presentReading & 0xff000000) >> 24;
805      uint32_t byte012 = presentReading & 0xffffff;
806  
807      description += prefixMsgStrCreation(tid, sensorId);
808  
809      description += "DDR ";
810      if (ddrStatusToMsgMap.contains(byte3))
811      {
812          if (byte3 == ddr_status::NO_SYSTEM_LEVEL_ERROR)
813          {
814              logLevel = log_level::OK;
815          }
816  
817          description += ddrStatusToMsgMap[byte3];
818  
819          if (byte3 == ddr_status::CONFIGURATION_FAILURE ||
820              byte3 == ddr_status::TRAINING_FAILURE)
821          {
822              // List out failed DIMMs
823              description += dimmIdxsToString(byte012);
824          }
825      }
826      else
827      {
828          description += "has unsupported status " + std::to_string(byte3);
829      }
830  
831      // Log to Redfish event
832      sendJournalRedfish(description, logLevel);
833  }
834  
handleVRDStatusEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)835  void OemEventManager::handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
836                                             uint32_t presentReading)
837  {
838      log_level logLevel{log_level::WARNING};
839      std::string description;
840      std::stringstream strStream;
841  
842      description += prefixMsgStrCreation(tid, sensorId);
843  
844      VRDStatus_t status{presentReading};
845  
846      if (status.bits.warning && status.bits.critical)
847      {
848          description += "A VR warning and a VR critical";
849          logLevel = log_level::CRITICAL;
850      }
851      else
852      {
853          if (status.bits.warning)
854          {
855              description += "A VR warning";
856          }
857          else if (status.bits.critical)
858          {
859              description += "A VR critical";
860              logLevel = log_level::CRITICAL;
861          }
862          else
863          {
864              description += "No VR warning or critical";
865              logLevel = log_level::OK;
866          }
867      }
868      description += " condition observed";
869  
870      strStream << "; VR status byte high is 0x" << std::setfill('0') << std::hex
871                << std::setw(2)
872                << static_cast<uint32_t>(status.bits.vr_status_byte_high)
873                << "; VR status byte low is 0x" << std::setw(2)
874                << static_cast<uint32_t>(status.bits.vr_status_byte_low)
875                << "; Reading is 0x" << std::setw(2)
876                << static_cast<uint32_t>(presentReading) << ";";
877  
878      description += strStream.str();
879  
880      // Log to Redfish event
881      sendJournalRedfish(description, logLevel);
882  }
883  
handleNumericWatchdogEvent(pldm_tid_t tid,uint16_t sensorId,uint32_t presentReading)884  void OemEventManager::handleNumericWatchdogEvent(
885      pldm_tid_t tid, uint16_t sensorId, uint32_t presentReading)
886  {
887      std::string description;
888      log_level logLevel = log_level::CRITICAL;
889  
890      description += prefixMsgStrCreation(tid, sensorId);
891  
892      if (presentReading & 0x01)
893      {
894          description += "Global watchdog expired;";
895      }
896      if (presentReading & 0x02)
897      {
898          description += "Secure watchdog expired;";
899      }
900      if (presentReading & 0x04)
901      {
902          description += "Non-secure watchdog expired;";
903      }
904  
905      // Log to Redfish event
906      sendJournalRedfish(description, logLevel);
907  }
908  
processOemMsgPollEvent(pldm_tid_t tid,uint16_t eventId,const uint8_t * eventData,size_t eventDataSize)909  int OemEventManager::processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId,
910                                              const uint8_t* eventData,
911                                              size_t eventDataSize)
912  {
913      EFI_AMPERE_ERROR_DATA ampHdr;
914  
915      decodeCperRecord(eventData, eventDataSize, &ampHdr);
916  
917      addCperSELLog(tid, eventId, &ampHdr);
918  
919      /* isBert at bit 12 of TypeId */
920      if (ampHdr.TypeId & 0x0800)
921      {
922          lg2::info("Ampere SoC BERT is triggered.");
923          std::variant<std::string> value(
924              "com.ampere.CrashCapture.Trigger.TriggerAction.Bert");
925          try
926          {
927              auto& bus = pldm::utils::DBusHandler::getBus();
928              auto method =
929                  bus.new_method_call("com.ampere.CrashCapture.Trigger",
930                                      "/com/ampere/crashcapture/trigger",
931                                      pldm::utils::dbusProperties, "Set");
932              method.append("com.ampere.CrashCapture.Trigger", "TriggerActions",
933                            value);
934              bus.call_noreply(method);
935          }
936          catch (const std::exception& e)
937          {
938              lg2::error("call BERT trigger error - {ERROR}", "ERROR", e);
939          }
940      }
941  
942      return PLDM_SUCCESS;
943  }
944  
handlepldmMessagePollEvent(const pldm_msg * request,size_t payloadLength,uint8_t,pldm_tid_t tid,size_t eventDataOffset)945  int OemEventManager::handlepldmMessagePollEvent(
946      const pldm_msg* request, size_t payloadLength, uint8_t /* formatVersion */,
947      pldm_tid_t tid, size_t eventDataOffset)
948  {
949      /* This OEM event handler is only used for SoC terminus*/
950      if (!tidToSocketNameMap.contains(tid))
951      {
952          return PLDM_SUCCESS;
953      }
954  
955      auto eventData =
956          reinterpret_cast<const uint8_t*>(request->payload) + eventDataOffset;
957      auto eventDataSize = payloadLength - eventDataOffset;
958  
959      pldm_message_poll_event poll_event{};
960      auto rc = decode_pldm_message_poll_event_data(eventData, eventDataSize,
961                                                    &poll_event);
962      if (rc)
963      {
964          lg2::error("Failed to decode PldmMessagePollEvent event, error {RC} ",
965                     "RC", rc);
966          return rc;
967      }
968  
969      auto sensorID = poll_event.event_id;
970      /* The UE errors */
971      if (rasUESensorIDs.contains(sensorID))
972      {
973          pldm::utils::DBusMapping dbusMapping{
974              "/xyz/openbmc_project/led/groups/ras_ue_fault",
975              "xyz.openbmc_project.Led.Group", "Asserted", "bool"};
976          try
977          {
978              pldm::utils::DBusHandler().setDbusProperty(
979                  dbusMapping, pldm::utils::PropertyValue{bool(true)});
980          }
981          catch (const std::exception& e)
982          {
983              lg2::error(
984                  "Failed to set the RAS UE LED terminus ID {TID} sensor ID {SENSORID} - errors {ERROR}",
985                  "TID", tid, "SENSORID", sensorID, "ERROR", e);
986          }
987      }
988  
989      return PLDM_SUCCESS;
990  }
991  
oemPollForPlatformEvent(pldm_tid_t tid)992  exec::task<int> OemEventManager::oemPollForPlatformEvent(pldm_tid_t tid)
993  {
994      uint64_t t0 = 0;
995  
996      /* This OEM event handler is only used for SoC terminus */
997      if (!tidToSocketNameMap.contains(tid))
998      {
999          co_return PLDM_SUCCESS;
1000      }
1001  
1002      if (!timeStampMap.contains(tid))
1003      {
1004          sd_event_now(event.get(), CLOCK_MONOTONIC, &t0);
1005          timeStampMap.emplace(std::make_pair(tid, t0));
1006      }
1007      else
1008      {
1009          sd_event_now(event.get(), CLOCK_MONOTONIC, &t0);
1010          uint64_t elapsed = t0 - timeStampMap[tid];
1011          if (elapsed >= NORMAL_EVENT_POLLING_TIME)
1012          {
1013              co_await manager->pollForPlatformEvent(tid, 0, 0);
1014              timeStampMap[tid] = t0;
1015          }
1016      }
1017  
1018      co_return PLDM_SUCCESS;
1019  }
1020  } // namespace oem_ampere
1021  } // namespace pldm
1022