xref: /openbmc/pldm/oem/ampere/event/oem_event_manager.hpp (revision 4a5038370b1513022b41e5d99a319f627c0084c8)
1 #pragma once
2 
3 #include "libpldm/pldm.h"
4 
5 #include "common/instance_id.hpp"
6 #include "common/types.hpp"
7 #include "oem_event_manager.hpp"
8 #include "platform-mc/manager.hpp"
9 #include "requester/handler.hpp"
10 #include "requester/request.hpp"
11 
12 namespace pldm
13 {
14 namespace oem_ampere
15 {
16 using namespace pldm::pdr;
17 #define NORMAL_EVENT_POLLING_TIME 5000000 // ms
18 
19 using EventToMsgMap_t = std::unordered_map<uint8_t, std::string>;
20 
21 enum sensor_ids
22 {
23     DDR_STATUS = 51,
24     PCP_VR_STATE = 75,
25     SOC_VR_STATE = 80,
26     DPHY_VR1_STATE = 85,
27     DPHY_VR2_STATE = 90,
28     D2D_VR_STATE = 95,
29     IOC_VR1_STATE = 100,
30     IOC_VR2_STATE = 105,
31     PCI_D_VR_STATE = 110,
32     PCI_A_VR_STATE = 115,
33     PCIE_HOT_PLUG = 169,
34     SOC_HEALTH_AVAILABILITY = 170,
35     BOOT_OVERALL = 175,
36     WATCH_DOG = 179,
37     CORE_UE = 192,
38     MCU_UE = 194,
39     PCIE_UE = 196,
40     SOC_UE = 198,
41     SOC_BERT = 200,
42 };
43 
44 namespace boot
45 {
46 namespace status
47 {
48 enum boot_status
49 {
50     BOOT_STATUS_SUCCESS = 0x80,
51     BOOT_STATUS_FAILURE = 0x81,
52 };
53 } // namespace status
54 namespace stage
55 {
56 enum boot_stage
57 {
58     UEFI_STATUS_CLASS_CODE_MIN = 0x00,
59     UEFI_STATUS_CLASS_CODE_MAX = 0x7f,
60     SECPRO = 0x90,
61     MPRO = 0x91,
62     ATF_BL1 = 0x92,
63     ATF_BL2 = 0x93,
64     DDR_INITIALIZATION = 0x94,
65     DDR_TRAINING = 0x95,
66     S0_DDR_TRAINING_FAILURE = 0x96,
67     ATF_BL31 = 0x97,
68     ATF_BL32 = 0x98,
69     S1_DDR_TRAINING_FAILURE = 0x99,
70 };
71 } // namespace stage
72 } // namespace boot
73 
74 enum class log_level : int
75 {
76     OK,
77     WARNING,
78     CRITICAL,
79     BIOSFWPANIC,
80 };
81 
82 /*
83  * PresentReading value format
84  * FIELD       |                   COMMENT
85  * Bit 31      |   Reserved
86  * Bit 30:24   |   Media slot number (0 - 63) This field can be used by UEFI
87  *             |   to indicate the media slot number (such as NVMe/SSD slot)
88  *             |   (7 bits)
89  * Bit 23      |   Operation status: 1 = operation failed
90  *             |   0 = operation successful
91  * Bit 22      |   Action: 0 - Insertion 1 - Removal
92  * Bit 21:18   |   Function (4 bits)
93  * Bit 17:13   |   Device (5 bits)
94  * Bit 12:5    |   Bus (8 bits)
95  * Bit 4:0     |   Segment (5 bits)
96  */
97 typedef union
98 {
99     uint32_t value;
100     struct
101     {
102         uint32_t segment:5;
103         uint32_t bus:8;
104         uint32_t device:5;
105         uint32_t function:4;
106         uint32_t action:1;
107         uint32_t opStatus:1;
108         uint32_t mediaSlot:7;
109         uint32_t reserved:1;
110     } __attribute__((packed)) bits;
111 } PCIeHotPlugEventRecord_t;
112 
113 typedef union
114 {
115     uint32_t value;
116     struct
117     {
118         uint32_t type:2;
119         uint32_t mcuRankIdx:3;
120         uint32_t reserved_1:3; // byte0
121         uint32_t sliceNum:4;
122         uint32_t upperNibbStatErr:1;
123         uint32_t lowerNibbStatErr:1;
124         uint32_t reserved_2:2; // byte1
125         uint32_t syndrome:4;
126         uint32_t reserved_3:4; // byte2
127         uint32_t reserved_byte:8;
128     } __attribute__((packed)) bits;
129 } DIMMTrainingFailure_t;
130 
131 namespace ddr
132 {
133 namespace status
134 {
135 enum ddr_status
136 {
137     NO_SYSTEM_LEVEL_ERROR = 0x01,
138     ECC_INITIALIZATION_FAILURE = 0x04,
139     CONFIGURATION_FAILURE = 0x05,
140     TRAINING_FAILURE = 0x06,
141     OTHER_FAILURE = 0x07,
142     BOOT_FAILURE_NO_VALID_CONFIG = 0x08,
143     FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS = 0x09,
144 };
145 }
146 } // namespace ddr
147 
148 namespace dimm
149 {
150 namespace status
151 {
152 enum dimm_status
153 {
154     INSTALLED_NO_ERROR = 0x01,
155     NOT_INSTALLED = 0x02,
156     OTHER_FAILURE = 0x07,
157     INSTALLED_BUT_DISABLED = 0x10,
158     TRAINING_FAILURE = 0x12,
159     PMIC_HIGH_TEMP = 0x13,
160     TSx_HIGH_TEMP = 0x14,
161     SPD_HUB_HIGH_TEMP = 0x15,
162     PMIC_TEMP_ALERT = 0x16,
163 };
164 } // namespace status
165 
166 namespace training_failure
167 {
168 enum dimm_training_failure_type
169 {
170     PHY_TRAINING_FAILURE_TYPE = 0x01,
171     DIMM_TRAINING_FAILURE_TYPE = 0x02,
172 };
173 
174 namespace phy_syndrome
175 {
176 enum phy_training_failure_syndrome
177 {
178     NA = 0x00,
179     PHY_TRAINING_SETUP_FAILURE = 0x01,
180     CA_LEVELING = 0x02,
181     PHY_WRITE_LEVEL_FAILURE = 0x03,
182     PHY_READ_GATE_LEVELING_FAILURE = 0x04,
183     PHY_READ_LEVEL_FAILURE = 0x05,
184     WRITE_DQ_LEVELING = 0x06,
185     PHY_SW_TRAINING_FAILURE = 0x07,
186 };
187 } // namespace phy_syndrome
188 
189 namespace dimm_syndrome
190 {
191 enum dimm_training_failure_syndrome
192 {
193     NA = 0x00,
194     DRAM_VREFDQ_TRAINING_FAILURE = 0x01,
195     LRDIMM_DB_TRAINING_FAILURE = 0x02,
196     LRDRIMM_DB_SW_TRAINING_FAILURE = 0x03,
197 };
198 } // namespace dimm_syndrome
199 } // namespace training_failure
200 } // namespace dimm
201 
202 /*
203  * PresentReading value format
204  * FIELD       |                   COMMENT
205  * Bit 31:30   |   Reserved (2 bits)
206  * Bit 29      |   A VR Critical condition observed (1 bit)
207  * Bit 28      |   A VR Warning condition observed (1 bit)
208  * Bit 27:16   |   Reserved (12 bits)
209  * Bit 15:8    |   VR status byte high - The bit definition is the same as the
210  *             |   corresponding VR PMBUS STATUS_WORD (upper byte) (8 bits)
211  * Bit 7:0     |   VR status byte low - The bit definition is the same as the
212  *             |   corresponding VR PMBUS STATUS_WORD (lower byte) (8 bits)
213  */
214 typedef union
215 {
216     uint32_t value;
217     struct
218     {
219         uint32_t vr_status_byte_low:8;
220         uint32_t vr_status_byte_high:8;
221         uint32_t reserved_1:12;
222         uint32_t warning:1;
223         uint32_t critical:1;
224         uint32_t reserved_2:2;
225     } __attribute__((packed)) bits;
226 } VRDStatus_t;
227 
228 /**
229  * @brief OemEventManager
230  *
231  *
232  */
233 class OemEventManager
234 {
235   public:
236     OemEventManager() = delete;
237     OemEventManager(const OemEventManager&) = delete;
238     OemEventManager(OemEventManager&&) = delete;
239     OemEventManager& operator=(const OemEventManager&) = delete;
240     OemEventManager& operator=(OemEventManager&&) = delete;
241     virtual ~OemEventManager() = default;
242 
OemEventManager(sdeventplus::Event & event,requester::Handler<requester::Request> *,pldm::InstanceIdDb &,platform_mc::Manager * platformManager)243     explicit OemEventManager(
244         sdeventplus::Event& event,
245         requester::Handler<requester::Request>* /* handler */,
246         pldm::InstanceIdDb& /* instanceIdDb */,
247         platform_mc::Manager* platformManager) :
248         event(event), manager(platformManager) {};
249 
250     /** @brief Decode sensor event messages and handle correspondingly.
251      *
252      *  @param[in] request - the request message of sensor event
253      *  @param[in] payloadLength - the payload length of sensor event
254      *  @param[in] formatVersion - the format version of sensor event
255      *  @param[in] tid - TID
256      *  @param[in] eventDataOffset - the event data offset of sensor event
257      *
258      *  @return int - returned error code
259      */
260     int handleSensorEvent(const pldm_msg* request, size_t payloadLength,
261                           uint8_t /* formatVersion */, pldm_tid_t tid,
262                           size_t eventDataOffset);
263 
264     /** @brief Handle the polled CPER (0x07, 0xFA) event class.
265      *
266      *  @param[in] tid - terminus ID
267      *  @param[out] eventId - Event ID
268      *  @param[in] eventData - event data
269      *  @param[in] eventDataSize - size of event data
270      *
271      *  @return int - PLDM completion code
272      */
273     int processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId,
274                                const uint8_t* eventData, size_t eventDataSize);
275 
276     /** @brief Decode sensor event messages and handle correspondingly.
277      *
278      *  @param[in] request - the request message of sensor event
279      *  @param[in] payloadLength - the payload length of sensor event
280      *  @param[in] formatVersion - the format version of sensor event
281      *  @param[in] tid - TID
282      *  @param[in] eventDataOffset - the event data offset of sensor event
283      *
284      *  @return int - returned error code
285      */
286     int handlepldmMessagePollEvent(
287         const pldm_msg* request, size_t payloadLength,
288         uint8_t /* formatVersion */, pldm_tid_t tid, size_t eventDataOffset);
289 
290     /** @brief A Coroutine to do OEM PollForPlatformEvent action
291      *
292      *  @param[in] tid - the destination TID
293      *  @return coroutine return_value - PLDM completion code
294      */
295     exec::task<int> oemPollForPlatformEvent(pldm_tid_t tid);
296 
297   protected:
298     /** @brief Create prefix string for logging message.
299      *
300      *  @param[in] tid - TID
301      *  @param[in] sensorId - Sensor ID
302      *
303      *  @return std::string - the prefeix string
304      */
305     std::string prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId);
306 
307     /** @brief Log the message into Redfish SEL.
308      *
309      *  @param[in] description - the logging message
310      *  @param[in] logLevel - the logging level
311      */
312     void sendJournalRedfish(const std::string& description,
313                             log_level& logLevel);
314 
315     /** @brief Convert the one-hot DIMM index byte into a string of DIMM
316      * indexes.
317      *
318      *  @param[in] dimmIdxs - the one-hot DIMM index byte
319      *
320      *  @return std::string - the string of DIMM indexes
321      */
322     std::string dimmIdxsToString(uint32_t dimmIdxs);
323 
324     /** @brief Convert sensor ID to DIMM index. Return maxDIMMInstantNum
325      * in failure.
326      *
327      *  @param[in] sensorId - sensorID
328      *
329      *  @return uint8_t - DIMM index
330      */
331     uint8_t sensorIdToDIMMIdx(const uint16_t& sensorId);
332 
333     /** @brief Convert the DIMM training failure into logging string.
334      *
335      *  @param[in] failureInfo - the one-hot DIMM index byte
336      *
337      *  @return std::string - the returned logging string
338      */
339     std::string dimmTrainingFailureToMsg(uint32_t failureInfo);
340 
341     /** @brief Handle numeric sensor event message from PCIe hot-plug sensor.
342      *
343      *  @param[in] tid - TID
344      *  @param[in] sensorId - Sensor ID
345      *  @param[in] presentReading - the present reading of the sensor
346      */
347     void handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
348                                 uint32_t presentReading);
349 
350     /** @brief Handle numeric sensor event message from boot overall sensor.
351      *
352      *  @param[in] tid - TID
353      *  @param[in] sensorId - Sensor ID
354      *  @param[in] presentReading - the present reading of the sensor
355      */
356     void handleBootOverallEvent(pldm_tid_t /*tid*/, uint16_t /*sensorId*/,
357                                 uint32_t presentReading);
358 
359     /** @brief Handle numeric sensor event message from DIMM status sensor.
360      *
361      *  @param[in] tid - TID
362      *  @param[in] sensorId - Sensor ID
363      *  @param[in] presentReading - the present reading of the sensor
364      */
365     void handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
366                                uint32_t presentReading);
367 
368     /** @brief Handle numeric sensor event message from DDR status sensor.
369      *
370      *  @param[in] tid - TID
371      *  @param[in] sensorId - Sensor ID
372      *  @param[in] presentReading - the present reading of the sensor
373      */
374     void handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
375                               uint32_t presentReading);
376 
377     /** @brief Handle numeric sensor event message from VRD status sensor.
378      *
379      *  @param[in] tid - TID
380      *  @param[in] sensorId - Sensor ID
381      *  @param[in] presentReading - the present reading of the sensor
382      */
383     void handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
384                               uint32_t presentReading);
385 
386     /** @brief Handle numeric sensor event message from Watchdog status sensor.
387      *
388      *  @param[in] tid - TID
389      *  @param[in] sensorId - Sensor ID
390      *  @param[in] presentReading - the present reading of the sensor
391      */
392     void handleNumericWatchdogEvent(pldm_tid_t tid, uint16_t sensorId,
393                                     uint32_t presentReading);
394 
395     /** @brief Handle numeric sensor event messages.
396      *
397      *  @param[in] tid - TID
398      *  @param[in] sensorId - Sensor ID
399      *  @param[in] sensorData - the sensor data
400      *  @param[in] sensorDataLength - the length of sensor data
401      *
402      *  @return int - returned error code
403      */
404     int processNumericSensorEvent(pldm_tid_t tid, uint16_t sensorId,
405                                   const uint8_t* sensorData,
406                                   size_t sensorDataLength);
407 
408     /** @brief Handle state sensor event messages.
409      *
410      *  @param[in] tid - TID
411      *  @param[in] sensorId - Sensor ID
412      *  @param[in] sensorData - the sensor data
413      *  @param[in] sensorDataLength - the length of sensor data
414      *
415      *  @return int - returned error code
416      */
417     int processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
418                                 const uint8_t* sensorData,
419                                 size_t sensorDataLength);
420 
421     /** @brief Handle op state sensor event messages.
422      *
423      *  @param[in] tid - TID
424      *  @param[in] sensorId - Sensor ID
425      *  @param[in] sensorData - the sensor data
426      *  @param[in] sensorDataLength - the length of sensor data
427      *
428      *  @return int - returned error code
429      */
430     int processSensorOpStateEvent(pldm_tid_t tid, uint16_t sensorId,
431                                   const uint8_t* sensorData,
432                                   size_t sensorDataLength);
433 
434     /** @brief reference of main event loop of pldmd, primarily used to schedule
435      *  work
436      */
437     sdeventplus::Event& event;
438 
439     /** @brief Latest OEM PollForPlatformEvent message timeStamp. */
440     std::map<pldm_tid_t, uint64_t> timeStampMap;
441 
442     /** @brief A Manager interface for calling the hook functions */
443     platform_mc::Manager* manager;
444 };
445 } // namespace oem_ampere
446 } // namespace pldm
447