xref: /openbmc/openpower-occ-control/occ_manager.hpp (revision ffb6321e9e5acd2823e19174c28683dee8140d95)
1 #pragma once
2 
3 #include "occ_pass_through.hpp"
4 #include "occ_status.hpp"
5 #include "pldm.hpp"
6 
7 #ifdef PHAL_SUPPORT
8 #include <libphal.H>
9 #endif
10 #include "powercap.hpp"
11 #include "powermode.hpp"
12 #include "utils.hpp"
13 
14 #include <sdbusplus/bus.hpp>
15 #include <sdeventplus/event.hpp>
16 #include <sdeventplus/source/signal.hpp>
17 #include <sdeventplus/utility/timer.hpp>
18 #include <stdplus/signal.hpp>
19 
20 #include <cstring>
21 #include <functional>
22 #include <vector>
23 
24 namespace sdbusRule = sdbusplus::bus::match::rules;
25 namespace open_power
26 {
27 namespace occ
28 {
29 
30 enum occFruType
31 {
32     processorCore = 0,
33     internalMemCtlr = 1,
34     dimm = 2,
35     memCtrlAndDimm = 3,
36     VRMVdd = 6,
37     PMIC = 7,
38     memCtlrExSensor = 8,
39     processorIoRing = 9
40 };
41 
42 /** @brief Default time, in seconds, between OCC poll commands */
43 constexpr unsigned int defaultPollingInterval = 5;
44 
45 constexpr auto AMBIENT_PATH =
46     "/xyz/openbmc_project/sensors/temperature/Ambient_Virtual_Temp";
47 constexpr auto AMBIENT_INTERFACE = "xyz.openbmc_project.Sensor.Value";
48 constexpr auto AMBIENT_PROP = "Value";
49 constexpr auto ALTITUDE_PATH = "/xyz/openbmc_project/sensors/altitude/Altitude";
50 constexpr auto ALTITUDE_INTERFACE = "xyz.openbmc_project.Sensor.Value";
51 constexpr auto ALTITUDE_PROP = "Value";
52 
53 constexpr auto EXTN_LABEL_PWRM_MEMORY_POWER = "5057524d";
54 constexpr auto EXTN_LABEL_PWRP_PROCESSOR_POWER = "50575250";
55 
56 /** @class Manager
57  *  @brief Builds and manages OCC objects
58  */
59 struct Manager
60 {
61   public:
62     Manager() = delete;
63     Manager(const Manager&) = delete;
64     Manager& operator=(const Manager&) = delete;
65     Manager(Manager&&) = delete;
66     Manager& operator=(Manager&&) = delete;
67     ~Manager() = default;
68 
69     /** @brief Adds OCC pass-through and status objects on the bus
70      *         when corresponding CPU inventory is created.
71      *
72      *  @param[in] event - Unique ptr reference to sd_event
73      */
Manageropen_power::occ::Manager74     explicit Manager(EventPtr& event) :
75         event(event), pollInterval(defaultPollingInterval),
76         sdpEvent(sdeventplus::Event::get_default()),
77         _pollTimer(
78             std::make_unique<
79                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
80                 sdpEvent, std::bind(&Manager::pollerTimerExpired, this))),
81         ambientPropChanged(
82             utils::getBus(),
83             sdbusRule::member("PropertiesChanged") +
84                 sdbusRule::path(AMBIENT_PATH) +
85                 sdbusRule::argN(0, AMBIENT_INTERFACE) +
86                 sdbusRule::interface("org.freedesktop.DBus.Properties"),
87             std::bind(&Manager::ambientCallback, this, std::placeholders::_1)),
88         discoverTimer(
89             std::make_unique<
90                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
91                 sdpEvent, std::bind(&Manager::findAndCreateObjects, this))),
92         waitForAllOccsTimer(
93             std::make_unique<
94                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
95                 sdpEvent, std::bind(&Manager::occsNotAllRunning, this))),
96         throttlePldmTraceTimer(
97             std::make_unique<
98                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
99                 sdpEvent, std::bind(&Manager::throttlePldmTraceExpired, this)))
100     {
101         findAndCreateObjects();
102 
103         readAltitude();
104     }
105 
106     void createPldmHandle();
107 
108     /** @brief Return the number of bound OCCs */
getNumOCCsopen_power::occ::Manager109     inline auto getNumOCCs() const
110     {
111         return activeCount;
112     }
113 
114     /** @brief Called by a Device to report that the SBE timed out
115      *         and appropriate action should be taken
116      *
117      * @param[in] instance - the OCC instance id
118      */
119     void sbeTimeout(unsigned int instance);
120 
121     /** @brief Return the latest ambient and altitude readings
122      *
123      *  @param[out] ambientValid - true if ambientTemp is valid
124      *  @param[out] ambient - ambient temperature in degrees C
125      *  @param[out] altitude - altitude in meters
126      */
127     void getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
128                         uint16_t& altitude) const;
129 
130     /** @brief Notify pcap object to update bounds */
131     void updatePcapBounds() const;
132 
133     /**
134      * @brief Set all sensor values of this OCC to NaN.
135      * @param[in] id - Id of the OCC.
136      * */
137     void setSensorValueToNaN(uint32_t id) const;
138 
139     /** @brief Set all sensor values of this OCC to NaN and non functional.
140      *
141      *  @param[in] id - Id of the OCC.
142      */
143     void setSensorValueToNonFunctional(uint32_t id) const;
144 
145     /** @brief Clear any state flags that need to be reset when the host state
146      * is off */
147     void hostPoweredOff();
148 
149     /** @brief Collect data to include in BMC dumps
150      *         This will get called when app receives a SIGUSR1 signal
151      */
152     void collectDumpData(sdeventplus::source::Signal&,
153                          const struct signalfd_siginfo*);
154 
155     /** @brief Name of file to put the occ-control dump data */
156     static const std::string dumpFile;
157 
158   private:
159     /** @brief Creates the OCC D-Bus objects.
160      */
161     void findAndCreateObjects();
162 
163     /** @brief Callback that responds to cpu creation in the inventory -
164      *         by creating the needed objects.
165      *
166      *  @param[in] msg - bus message
167      *
168      *  @returns 0 to indicate success
169      */
170     int cpuCreated(sdbusplus::message_t& msg);
171 
172     /** @brief Create child OCC objects.
173      *
174      *  @param[in] occ - the occ name, such as occ0.
175      */
176     void createObjects(const std::string& occ);
177 
178     /** @brief Callback handler invoked by Status object when the OccActive
179      *         property is changed. This is needed to make sure that the
180      *         error detection is started only after all the OCCs are bound.
181      *         Similarly, when one of the OCC gets its OccActive property
182      *         un-set, then the OCC error detection needs to be stopped on
183      *         all the OCCs
184      *
185      *  @param[in] status - OccActive status
186      */
187     void statusCallBack(instanceID instance, bool status);
188 
189     /** @brief Set flag that a PM Complex reset is needed (to be initiated
190      * later) */
191     void resetOccRequest(instanceID instance);
192 
193     /** @brief Initiate the request to reset the PM Complex (PLDM -> HBRT) */
194     void initiateOccRequest(instanceID instance);
195 
196     /** @brief Sends a Heartbeat command to host control command handler */
197     void sendHeartBeat();
198 
199     /** @brief reference to sd_event wrapped in unique_ptr */
200     EventPtr& event;
201 
202     /** @brief OCC pass-through objects */
203     std::vector<std::unique_ptr<PassThrough>> passThroughObjects;
204 
205     /** @brief OCC Status objects */
206     std::vector<std::unique_ptr<Status>> statusObjects;
207 
208     /** @brief Power cap monitor and occ notification object */
209     std::unique_ptr<open_power::occ::powercap::PowerCap> pcap;
210 
211     /** @brief Power mode monitor and notification object */
212     std::unique_ptr<open_power::occ::powermode::PowerMode> pmode;
213 
214     /** @brief sbdbusplus match objects */
215     std::vector<sdbusplus::bus::match_t> cpuMatches;
216 
217     /** @brief Number of OCCs that are bound */
218     uint8_t activeCount = 0;
219 
220     /** @brief Number of seconds between poll commands */
221     uint8_t pollInterval;
222 
223     /** @brief Ambient temperature of the system in degrees C */
224     uint8_t ambient = 0xFF; // default: not available
225 
226     /** @brief Altitude of the system in meters */
227     uint16_t altitude = 0xFFFF; // default: not available
228 
229     /** @brief Poll timer event */
230     sdeventplus::Event sdpEvent;
231 
232     /** @brief Flags to indicate if waiting for all of the OCC active sensors to
233      * come online */
234     bool waitingForAllOccActiveSensors = false;
235 
236     /** @brief Set containing intance numbers of any OCCs that became active
237      *         while waiting for status objects to be created */
238     std::set<uint8_t> queuedActiveState;
239 
240     /**
241      * @brief The timer to be used once the OCC goes active.  When it expires,
242      *        a POLL command will be sent to the OCC and then timer restarted.
243      */
244     std::unique_ptr<
245         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
246         _pollTimer;
247 
248     /** @brief Subscribe to ambient temperature changed events */
249     sdbusplus::bus::match_t ambientPropChanged;
250 
251     /** @brief Flag to indicate that a PM complex reset needs to happen */
252     bool resetRequired = false;
253     /** @brief Instance number of the OCC/processor that triggered the reset */
254     uint8_t resetInstance = 255;
255     /** @brief Set when a PM complex reset has been issued (to prevent multiple
256      * requests) */
257     bool resetInProgress = false;
258 
259     /** @brief Callback handler invoked by the PLDM event handler when state of
260      *         the OCC is toggled by the host. The caller passes the instance
261      *         of the OCC and state of the OCC.
262      *
263      *  @param[in] instance - instance of the OCC
264      *  @param[in] status - true when the OCC goes active and false when the OCC
265      *                      goes inactive
266      *
267      *  @return true if setting the state of OCC is successful and false if it
268      *          fails.
269      */
270     bool updateOCCActive(instanceID instance, bool status);
271 
272     /** @brief Callback handler invoked by the PLDM event handler when mode of
273      *         the OCC SAFE MODE is inacted or cleared.
274      */
275     void updateOccSafeMode(bool safeState);
276 
277     /** @brief Callback handler invoked by PLDM sensor change when
278      *         the HRESET succeeds or fails.
279      *
280      *  @param[in] instance - the SBE instance id
281      *  @param[in] success - true if the HRESET succeeded, otherwise false
282      */
283     void sbeHRESETResult(instanceID instance, bool success);
284 
285 #ifdef PHAL_SUPPORT
286     /** @brief Helper function to check whether an SBE dump should be collected
287      *         now.
288      *
289      *  @param[in] instance - the SBE instance id
290      *
291      *  @return true if an SBE dump should be collected and false if not
292      */
293     bool sbeCanDump(unsigned int instance);
294 
295     /** @brief Helper function to set the SBE state through PDBG/PHAL
296      *
297      * @param[in] instance - instance of the SBE
298      * @param[in] state - the state to which the SBE should be set
299      *
300      */
301     void setSBEState(unsigned int instance, enum sbe_state state);
302 
303     /** @brief Helper function to get the SBE instance PDBG processor target
304      *
305      * @param[in] instance - the SBE instance id
306      *
307      * @return a pointer to the PDBG target
308      */
309     struct pdbg_target* getPdbgTarget(unsigned int instance);
310 
311     /** @brief Whether pdbg_targets_init has been called */
312     bool pdbgInitialized = false;
313 #endif
314 
315     std::unique_ptr<pldm::Interface> pldmHandle = nullptr;
316 
317     /**
318      * @brief Timer used when discovering OCCs in /dev.
319      */
320     std::unique_ptr<
321         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
322         discoverTimer;
323 
324     /**
325      * @brief Used when discovering /dev/occ objects to know if
326      *        any were added since the last check.
327      */
328     std::vector<int> prevOCCSearch;
329 
330     /**
331      * @brief Timer used when waiting for OCCs to go active.
332      */
333     std::unique_ptr<
334         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
335         waitForAllOccsTimer;
336 
337     /**
338      * @brief Timer used to throttle PLDM traces when there are problems
339      determining the OCC status via pldm. Used to prevent excessive
340      journal traces.
341      */
342     std::unique_ptr<
343         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
344         throttlePldmTraceTimer;
345     /**
346      * @brief onPldmTimeoutCreatePel flag will be used to indicate if
347      *        a PEL should get created when the throttlePldmTraceTimer expires.
348      *        The first time the throttlePldmTraceTimer expires, the traces
349      *        will be throttled and then the timer gets restarted. The
350      *        next time the timer expires, a PEL will get created.
351      */
352     bool onPldmTimeoutCreatePel = false;
353 
354     /** @brief Check if all of the OCC Active sensors are available and if not
355      * restart the discoverTimer
356      */
357     void throttlePldmTraceExpired();
358 
359     /** @brief Create a PEL when the code is not able to obtain the OCC PDRs
360      * via PLDM. This is called when the throttlePldmTraceTimer expires.
361      */
362     void createPldmSensorPEL();
363 
364     /** @brief Called when code times out waiting for all OCCs to be running or
365      *         after the app is restarted (Status does not callback into
366      * Manager).
367      */
368     void occsNotAllRunning();
369 
370     /** @brief Check if all of the OCC Active sensors are available and if not
371      * restart the discoverTimer
372      */
373     void checkAllActiveSensors();
374 
375     /**
376      * @brief Called when poll timer expires and forces a POLL command to the
377      * OCC. The poll timer will then be restarted.
378      * */
379     void pollerTimerExpired();
380 
381     /**
382      * @brief Finds the OCC devices in /dev
383      *
384      * @return The IDs of the OCCs - 0, 1, etc.
385      */
386     std::vector<int> findOCCsInDev();
387 
388     /**
389      * @brief Gets the occ sensor values.
390      * @param[in] occ - pointer to OCCs Status object
391      * */
392     void getSensorValues(std::unique_ptr<Status>& occ);
393 
394     /**
395      * @brief Trigger OCC driver to read the temperature sensors.
396      * @param[in] path - path of the OCC sensors.
397      * @param[in] id - Id of the OCC.
398      * */
399     void readTempSensors(const fs::path& path, uint32_t id);
400 
401     /**
402      * @brief Trigger OCC driver to read the extended sensors.
403      * @param[in] path - path of the OCC sensors.
404      * @param[in] id - Id of the OCC.
405      * */
406     void readExtnSensors(const fs::path& path, uint32_t id);
407 
408     /**
409      * @brief Trigger OCC driver to read the power sensors.
410      * @param[in] path - path of the OCC sensors.
411      * @param[in] id - Id of the OCC.
412      * */
413     void readPowerSensors(const fs::path& path, uint32_t id);
414 
415     /** @brief Store the existing OCC sensors on D-BUS */
416     std::map<std::string, uint32_t> existingSensors;
417 
418     /** @brief Get FunctionID from the `powerX_label` file.
419      *  @param[in] value - the value of the `powerX_label` file.
420      *  @returns FunctionID of the power sensors.
421      */
422     std::optional<std::string> getPowerLabelFunctionID(
423         const std::string& value);
424 
425     /** @brief The power sensor names map */
426     const std::map<std::string, std::string> powerSensorName = {
427         {"system", "total_power"}, {"1", "p0_mem_power"},
428         {"2", "p1_mem_power"},     {"3", "p2_mem_power"},
429         {"4", "p3_mem_power"},     {"5", "p0_power"},
430         {"6", "p1_power"},         {"7", "p2_power"},
431         {"8", "p3_power"},         {"9", "p0_cache_power"},
432         {"10", "p1_cache_power"},  {"11", "p2_cache_power"},
433         {"12", "p3_cache_power"},  {"13", "io_a_power"},
434         {"14", "io_b_power"},      {"15", "io_c_power"},
435         {"16", "fans_a_power"},    {"17", "fans_b_power"},
436         {"18", "storage_a_power"}, {"19", "storage_b_power"},
437         {"23", "mem_cache_power"}, {"25", "p0_mem_0_power"},
438         {"26", "p0_mem_1_power"},  {"27", "p0_mem_2_power"},
439         {"35", "pcie_dcm0_power"}, {"36", "pcie_dcm1_power"},
440         {"37", "pcie_dcm2_power"}, {"38", "pcie_dcm3_power"},
441         {"39", "io_dcm0_power"},   {"40", "io_dcm1_power"},
442         {"41", "io_dcm2_power"},   {"42", "io_dcm3_power"},
443         {"43", "avdd_total_power"}};
444 
445     /** @brief The dimm temperature sensor names map  */
446     const std::map<uint32_t, std::string> dimmTempSensorName = {
447         {internalMemCtlr, "_intmb_temp"},
448         {dimm, "_dram_temp"},
449         {memCtrlAndDimm, "_dram_extmb_temp"},
450         {PMIC, "_pmic_temp"},
451         {memCtlrExSensor, "_extmb_temp"}};
452 
453     /** @brief The dimm DVFS temperature sensor names map  */
454     const std::map<uint32_t, std::string> dimmDVFSSensorName = {
455         {internalMemCtlr, "dimm_intmb_dvfs_temp"},
456         {dimm, "dimm_dram_dvfs_temp"},
457         {memCtrlAndDimm, "dimm_dram_extmb_dvfs_temp"},
458         {PMIC, "dimm_pmic_dvfs_temp"},
459         {memCtlrExSensor, "dimm_extmb_dvfs_temp"}};
460 
461     /** @brief Read the altitude from DBus */
462     void readAltitude();
463 
464     /** @brief Callback function when ambient temperature changes
465      *
466      *  @param[in]  msg - Data associated with subscribed signal
467      */
468     void ambientCallback(sdbusplus::message_t& msg);
469 
470     /** @brief Confirm that a single OCC master was found and start presence
471      * monitoring
472      */
473     void validateOccMaster();
474 };
475 
476 } // namespace occ
477 } // namespace open_power
478