xref: /openbmc/pldm/oem/ampere/event/oem_event_manager.hpp (revision 79f9ff6bfcc9f257397b857967f3a1af2c28779d)
1 #pragma once
2 
3 #include "libpldm/pldm.h"
4 
5 #include "common/instance_id.hpp"
6 #include "common/types.hpp"
7 #include "oem_event_manager.hpp"
8 #include "platform-mc/manager.hpp"
9 #include "requester/handler.hpp"
10 #include "requester/request.hpp"
11 
12 namespace pldm
13 {
14 namespace oem_ampere
15 {
16 using namespace pldm::pdr;
17 
18 using EventToMsgMap_t = std::unordered_map<uint8_t, std::string>;
19 
20 enum sensor_ids
21 {
22     DDR_STATUS = 51,
23     PCP_VR_STATE = 75,
24     SOC_VR_STATE = 80,
25     DPHY_VR1_STATE = 85,
26     DPHY_VR2_STATE = 90,
27     D2D_VR_STATE = 95,
28     IOC_VR1_STATE = 100,
29     IOC_VR2_STATE = 105,
30     PCI_D_VR_STATE = 110,
31     PCI_A_VR_STATE = 115,
32     PCIE_HOT_PLUG = 169,
33     SOC_HEALTH_AVAILABILITY = 170,
34     BOOT_OVERALL = 175,
35     WATCH_DOG = 179,
36     CORE_UE = 192,
37     MCU_UE = 194,
38     PCIE_UE = 196,
39     SOC_UE = 198,
40     SOC_BERT = 200,
41 };
42 
43 namespace boot
44 {
45 namespace status
46 {
47 enum boot_status
48 {
49     BOOT_STATUS_SUCCESS = 0x80,
50     BOOT_STATUS_FAILURE = 0x81,
51 };
52 } // namespace status
53 namespace stage
54 {
55 enum boot_stage
56 {
57     UEFI_STATUS_CLASS_CODE_MIN = 0x00,
58     UEFI_STATUS_CLASS_CODE_MAX = 0x7f,
59     SECPRO = 0x90,
60     MPRO = 0x91,
61     ATF_BL1 = 0x92,
62     ATF_BL2 = 0x93,
63     DDR_INITIALIZATION = 0x94,
64     DDR_TRAINING = 0x95,
65     S0_DDR_TRAINING_FAILURE = 0x96,
66     ATF_BL31 = 0x97,
67     ATF_BL32 = 0x98,
68     S1_DDR_TRAINING_FAILURE = 0x99,
69 };
70 } // namespace stage
71 } // namespace boot
72 
73 enum class log_level : int
74 {
75     OK,
76     WARNING,
77     CRITICAL,
78     BIOSFWPANIC,
79 };
80 
81 /*
82  * PresentReading value format
83  * FIELD       |                   COMMENT
84  * Bit 31      |   Reserved
85  * Bit 30:24   |   Media slot number (0 - 63) This field can be used by UEFI
86  *             |   to indicate the media slot number (such as NVMe/SSD slot)
87  *             |   (7 bits)
88  * Bit 23      |   Operation status: 1 = operation failed
89  *             |   0 = operation successful
90  * Bit 22      |   Action: 0 - Insertion 1 - Removal
91  * Bit 21:18   |   Function (4 bits)
92  * Bit 17:13   |   Device (5 bits)
93  * Bit 12:5    |   Bus (8 bits)
94  * Bit 4:0     |   Segment (5 bits)
95  */
96 typedef union
97 {
98     uint32_t value;
99     struct
100     {
101         uint32_t segment:5;
102         uint32_t bus:8;
103         uint32_t device:5;
104         uint32_t function:4;
105         uint32_t action:1;
106         uint32_t opStatus:1;
107         uint32_t mediaSlot:7;
108         uint32_t reserved:1;
109     } __attribute__((packed)) bits;
110 } PCIeHotPlugEventRecord_t;
111 
112 typedef union
113 {
114     uint32_t value;
115     struct
116     {
117         uint32_t type:2;
118         uint32_t mcuRankIdx:3;
119         uint32_t reserved_1:3; // byte0
120         uint32_t sliceNum:4;
121         uint32_t upperNibbStatErr:1;
122         uint32_t lowerNibbStatErr:1;
123         uint32_t reserved_2:2; // byte1
124         uint32_t syndrome:4;
125         uint32_t reserved_3:4; // byte2
126         uint32_t reserved_byte:8;
127     } __attribute__((packed)) bits;
128 } DIMMTrainingFailure_t;
129 
130 namespace ddr
131 {
132 namespace status
133 {
134 enum ddr_status
135 {
136     NO_SYSTEM_LEVEL_ERROR = 0x01,
137     ECC_INITIALIZATION_FAILURE = 0x04,
138     CONFIGURATION_FAILURE = 0x05,
139     TRAINING_FAILURE = 0x06,
140     OTHER_FAILURE = 0x07,
141     BOOT_FAILURE_NO_VALID_CONFIG = 0x08,
142     FAILSAFE_ACTIVATED_NEXT_BOOT_SUCCESS = 0x09,
143 };
144 }
145 } // namespace ddr
146 
147 namespace dimm
148 {
149 namespace status
150 {
151 enum dimm_status
152 {
153     INSTALLED_NO_ERROR = 0x01,
154     NOT_INSTALLED = 0x02,
155     OTHER_FAILURE = 0x07,
156     INSTALLED_BUT_DISABLED = 0x10,
157     TRAINING_FAILURE = 0x12,
158     PMIC_HIGH_TEMP = 0x13,
159     TSx_HIGH_TEMP = 0x14,
160     SPD_HUB_HIGH_TEMP = 0x15,
161     PMIC_TEMP_ALERT = 0x16,
162 };
163 } // namespace status
164 
165 namespace training_failure
166 {
167 enum dimm_training_failure_type
168 {
169     PHY_TRAINING_FAILURE_TYPE = 0x01,
170     DIMM_TRAINING_FAILURE_TYPE = 0x02,
171 };
172 
173 namespace phy_syndrome
174 {
175 enum phy_training_failure_syndrome
176 {
177     NA = 0x00,
178     PHY_TRAINING_SETUP_FAILURE = 0x01,
179     CA_LEVELING = 0x02,
180     PHY_WRITE_LEVEL_FAILURE = 0x03,
181     PHY_READ_GATE_LEVELING_FAILURE = 0x04,
182     PHY_READ_LEVEL_FAILURE = 0x05,
183     WRITE_DQ_LEVELING = 0x06,
184     PHY_SW_TRAINING_FAILURE = 0x07,
185 };
186 } // namespace phy_syndrome
187 
188 namespace dimm_syndrome
189 {
190 enum dimm_training_failure_syndrome
191 {
192     NA = 0x00,
193     DRAM_VREFDQ_TRAINING_FAILURE = 0x01,
194     LRDIMM_DB_TRAINING_FAILURE = 0x02,
195     LRDRIMM_DB_SW_TRAINING_FAILURE = 0x03,
196 };
197 } // namespace dimm_syndrome
198 } // namespace training_failure
199 } // namespace dimm
200 
201 /*
202  * PresentReading value format
203  * FIELD       |                   COMMENT
204  * Bit 31:30   |   Reserved (2 bits)
205  * Bit 29      |   A VR Critical condition observed (1 bit)
206  * Bit 28      |   A VR Warning condition observed (1 bit)
207  * Bit 27:16   |   Reserved (12 bits)
208  * Bit 15:8    |   VR status byte high - The bit definition is the same as the
209  *             |   corresponding VR PMBUS STATUS_WORD (upper byte) (8 bits)
210  * Bit 7:0     |   VR status byte low - The bit definition is the same as the
211  *             |   corresponding VR PMBUS STATUS_WORD (lower byte) (8 bits)
212  */
213 typedef union
214 {
215     uint32_t value;
216     struct
217     {
218         uint32_t vr_status_byte_low:8;
219         uint32_t vr_status_byte_high:8;
220         uint32_t reserved_1:12;
221         uint32_t warning:1;
222         uint32_t critical:1;
223         uint32_t reserved_2:2;
224     } __attribute__((packed)) bits;
225 } VRDStatus_t;
226 
227 /**
228  * @brief OemEventManager
229  *
230  *
231  */
232 class OemEventManager
233 {
234   public:
235     OemEventManager() = delete;
236     OemEventManager(const OemEventManager&) = delete;
237     OemEventManager(OemEventManager&&) = delete;
238     OemEventManager& operator=(const OemEventManager&) = delete;
239     OemEventManager& operator=(OemEventManager&&) = delete;
240     virtual ~OemEventManager() = default;
241 
OemEventManager(sdeventplus::Event & event,requester::Handler<requester::Request> *,pldm::InstanceIdDb &)242     explicit OemEventManager(
243         sdeventplus::Event& event,
244         requester::Handler<requester::Request>* /* handler */,
245         pldm::InstanceIdDb& /* instanceIdDb */) : event(event) {};
246 
247     /** @brief Decode sensor event messages and handle correspondingly.
248      *
249      *  @param[in] request - the request message of sensor event
250      *  @param[in] payloadLength - the payload length of sensor event
251      *  @param[in] formatVersion - the format version of sensor event
252      *  @param[in] tid - TID
253      *  @param[in] eventDataOffset - the event data offset of sensor event
254      *
255      *  @return int - returned error code
256      */
257     int handleSensorEvent(const pldm_msg* request, size_t payloadLength,
258                           uint8_t /* formatVersion */, pldm_tid_t tid,
259                           size_t eventDataOffset);
260 
261     /** @brief Handle the polled CPER (0x07, 0xFA) event class.
262      *
263      *  @param[in] tid - terminus ID
264      *  @param[out] eventId - Event ID
265      *  @param[in] eventData - event data
266      *  @param[in] eventDataSize - size of event data
267      *
268      *  @return int - PLDM completion code
269      */
270     int processOemMsgPollEvent(pldm_tid_t tid, uint16_t eventId,
271                                const uint8_t* eventData, size_t eventDataSize);
272 
273     /** @brief Decode sensor event messages and handle correspondingly.
274      *
275      *  @param[in] request - the request message of sensor event
276      *  @param[in] payloadLength - the payload length of sensor event
277      *  @param[in] formatVersion - the format version of sensor event
278      *  @param[in] tid - TID
279      *  @param[in] eventDataOffset - the event data offset of sensor event
280      *
281      *  @return int - returned error code
282      */
283     int handlepldmMessagePollEvent(
284         const pldm_msg* request, size_t payloadLength,
285         uint8_t /* formatVersion */, pldm_tid_t tid, size_t eventDataOffset);
286 
287   protected:
288     /** @brief Create prefix string for logging message.
289      *
290      *  @param[in] tid - TID
291      *  @param[in] sensorId - Sensor ID
292      *
293      *  @return std::string - the prefeix string
294      */
295     std::string prefixMsgStrCreation(pldm_tid_t tid, uint16_t sensorId);
296 
297     /** @brief Log the message into Redfish SEL.
298      *
299      *  @param[in] description - the logging message
300      *  @param[in] logLevel - the logging level
301      */
302     void sendJournalRedfish(const std::string& description,
303                             log_level& logLevel);
304 
305     /** @brief Convert the one-hot DIMM index byte into a string of DIMM
306      * indexes.
307      *
308      *  @param[in] dimmIdxs - the one-hot DIMM index byte
309      *
310      *  @return std::string - the string of DIMM indexes
311      */
312     std::string dimmIdxsToString(uint32_t dimmIdxs);
313 
314     /** @brief Convert sensor ID to DIMM index. Return maxDIMMInstantNum
315      * in failure.
316      *
317      *  @param[in] sensorId - sensorID
318      *
319      *  @return uint8_t - DIMM index
320      */
321     uint8_t sensorIdToDIMMIdx(const uint16_t& sensorId);
322 
323     /** @brief Convert the DIMM training failure into logging string.
324      *
325      *  @param[in] failureInfo - the one-hot DIMM index byte
326      *
327      *  @return std::string - the returned logging string
328      */
329     std::string dimmTrainingFailureToMsg(uint32_t failureInfo);
330 
331     /** @brief Handle numeric sensor event message from PCIe hot-plug sensor.
332      *
333      *  @param[in] tid - TID
334      *  @param[in] sensorId - Sensor ID
335      *  @param[in] presentReading - the present reading of the sensor
336      */
337     void handlePCIeHotPlugEvent(pldm_tid_t tid, uint16_t sensorId,
338                                 uint32_t presentReading);
339 
340     /** @brief Handle numeric sensor event message from boot overall sensor.
341      *
342      *  @param[in] tid - TID
343      *  @param[in] sensorId - Sensor ID
344      *  @param[in] presentReading - the present reading of the sensor
345      */
346     void handleBootOverallEvent(pldm_tid_t /*tid*/, uint16_t /*sensorId*/,
347                                 uint32_t presentReading);
348 
349     /** @brief Handle numeric sensor event message from DIMM status sensor.
350      *
351      *  @param[in] tid - TID
352      *  @param[in] sensorId - Sensor ID
353      *  @param[in] presentReading - the present reading of the sensor
354      */
355     void handleDIMMStatusEvent(pldm_tid_t tid, uint16_t sensorId,
356                                uint32_t presentReading);
357 
358     /** @brief Handle numeric sensor event message from DDR status sensor.
359      *
360      *  @param[in] tid - TID
361      *  @param[in] sensorId - Sensor ID
362      *  @param[in] presentReading - the present reading of the sensor
363      */
364     void handleDDRStatusEvent(pldm_tid_t tid, uint16_t sensorId,
365                               uint32_t presentReading);
366 
367     /** @brief Handle numeric sensor event message from VRD status sensor.
368      *
369      *  @param[in] tid - TID
370      *  @param[in] sensorId - Sensor ID
371      *  @param[in] presentReading - the present reading of the sensor
372      */
373     void handleVRDStatusEvent(pldm_tid_t tid, uint16_t sensorId,
374                               uint32_t presentReading);
375 
376     /** @brief Handle numeric sensor event message from Watchdog status sensor.
377      *
378      *  @param[in] tid - TID
379      *  @param[in] sensorId - Sensor ID
380      *  @param[in] presentReading - the present reading of the sensor
381      */
382     void handleNumericWatchdogEvent(pldm_tid_t tid, uint16_t sensorId,
383                                     uint32_t presentReading);
384 
385     /** @brief Handle numeric sensor event messages.
386      *
387      *  @param[in] tid - TID
388      *  @param[in] sensorId - Sensor ID
389      *  @param[in] sensorData - the sensor data
390      *  @param[in] sensorDataLength - the length of sensor data
391      *
392      *  @return int - returned error code
393      */
394     int processNumericSensorEvent(pldm_tid_t tid, uint16_t sensorId,
395                                   const uint8_t* sensorData,
396                                   size_t sensorDataLength);
397 
398     /** @brief Handle state sensor event messages.
399      *
400      *  @param[in] tid - TID
401      *  @param[in] sensorId - Sensor ID
402      *  @param[in] sensorData - the sensor data
403      *  @param[in] sensorDataLength - the length of sensor data
404      *
405      *  @return int - returned error code
406      */
407     int processStateSensorEvent(pldm_tid_t tid, uint16_t sensorId,
408                                 const uint8_t* sensorData,
409                                 size_t sensorDataLength);
410 
411     /** @brief Handle op state sensor event messages.
412      *
413      *  @param[in] tid - TID
414      *  @param[in] sensorId - Sensor ID
415      *  @param[in] sensorData - the sensor data
416      *  @param[in] sensorDataLength - the length of sensor data
417      *
418      *  @return int - returned error code
419      */
420     int processSensorOpStateEvent(pldm_tid_t tid, uint16_t sensorId,
421                                   const uint8_t* sensorData,
422                                   size_t sensorDataLength);
423 
424     /** @brief reference of main event loop of pldmd, primarily used to schedule
425      *  work
426      */
427     sdeventplus::Event& event;
428 };
429 } // namespace oem_ampere
430 } // namespace pldm
431