xref: /openbmc/openpower-occ-control/occ_manager.hpp (revision 720a3841e8684f93a25953d9db66bd4d0a4c3df7)
1 #pragma once
2 
3 #include "occ_pass_through.hpp"
4 #include "occ_status.hpp"
5 #ifdef PLDM
6 #include "pldm.hpp"
7 
8 #ifdef PHAL_SUPPORT
9 #include <libphal.H>
10 #endif
11 #endif
12 #include "powercap.hpp"
13 #include "utils.hpp"
14 #ifdef POWER10
15 #include "powermode.hpp"
16 #endif
17 
18 #include <sdbusplus/bus.hpp>
19 #include <sdeventplus/event.hpp>
20 #include <sdeventplus/utility/timer.hpp>
21 
22 #include <cstring>
23 #include <functional>
24 #include <vector>
25 
26 namespace sdbusRule = sdbusplus::bus::match::rules;
27 namespace open_power
28 {
29 namespace occ
30 {
31 
32 #ifdef READ_OCC_SENSORS
33 enum occFruType
34 {
35     processorCore = 0,
36     internalMemCtlr = 1,
37     dimm = 2,
38     memCtrlAndDimm = 3,
39     VRMVdd = 6,
40     PMIC = 7,
41     memCtlrExSensor = 8,
42     processorIoRing = 9
43 };
44 #endif
45 
46 /** @brief Default time, in seconds, between OCC poll commands */
47 #ifndef POWER10
48 constexpr unsigned int defaultPollingInterval = 1;
49 #else
50 constexpr unsigned int defaultPollingInterval = 5;
51 #endif
52 
53 constexpr auto AMBIENT_PATH =
54     "/xyz/openbmc_project/sensors/temperature/Ambient_Virtual_Temp";
55 constexpr auto AMBIENT_INTERFACE = "xyz.openbmc_project.Sensor.Value";
56 constexpr auto AMBIENT_PROP = "Value";
57 constexpr auto ALTITUDE_PATH = "/xyz/openbmc_project/sensors/altitude/Altitude";
58 constexpr auto ALTITUDE_INTERFACE = "xyz.openbmc_project.Sensor.Value";
59 constexpr auto ALTITUDE_PROP = "Value";
60 
61 /** @class Manager
62  *  @brief Builds and manages OCC objects
63  */
64 struct Manager
65 {
66   public:
67     Manager() = delete;
68     Manager(const Manager&) = delete;
69     Manager& operator=(const Manager&) = delete;
70     Manager(Manager&&) = delete;
71     Manager& operator=(Manager&&) = delete;
72     ~Manager() = default;
73 
74     /** @brief Adds OCC pass-through and status objects on the bus
75      *         when corresponding CPU inventory is created.
76      *
77      *  @param[in] event - Unique ptr reference to sd_event
78      */
Manageropen_power::occ::Manager79     explicit Manager(EventPtr& event) :
80         event(event), pollInterval(defaultPollingInterval),
81         sdpEvent(sdeventplus::Event::get_default()),
82         _pollTimer(
83             std::make_unique<
84                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
85                 sdpEvent, std::bind(&Manager::pollerTimerExpired, this))),
86         ambientPropChanged(
87             utils::getBus(),
88             sdbusRule::member("PropertiesChanged") +
89                 sdbusRule::path(AMBIENT_PATH) +
90                 sdbusRule::argN(0, AMBIENT_INTERFACE) +
91                 sdbusRule::interface("org.freedesktop.DBus.Properties"),
92             std::bind(&Manager::ambientCallback, this, std::placeholders::_1))
93 #ifdef POWER10
94         ,
95         discoverTimer(
96             std::make_unique<
97                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
98                 sdpEvent, std::bind(&Manager::findAndCreateObjects, this))),
99         waitForAllOccsTimer(
100             std::make_unique<
101                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
102                 sdpEvent, std::bind(&Manager::occsNotAllRunning, this)))
103 #ifdef PLDM
104         ,
105         throttlePldmTraceTimer(
106             std::make_unique<
107                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
108                 sdpEvent, std::bind(&Manager::throttlePldmTraceExpired, this)))
109 #endif
110 #endif // POWER10
111     {
112 #ifdef I2C_OCC
113         // I2C OCC status objects are initialized directly
114         initStatusObjects();
115 #else
116         findAndCreateObjects();
117 #endif
118         readAltitude();
119     }
120 
121     void createPldmHandle();
122 
123     /** @brief Return the number of bound OCCs */
getNumOCCsopen_power::occ::Manager124     inline auto getNumOCCs() const
125     {
126         return activeCount;
127     }
128 
129 #ifdef PLDM
130     /** @brief Called by a Device to report that the SBE timed out
131      *         and appropriate action should be taken
132      *
133      * @param[in] instance - the OCC instance id
134      */
135     void sbeTimeout(unsigned int instance);
136 #endif
137 
138     /** @brief Return the latest ambient and altitude readings
139      *
140      *  @param[out] ambientValid - true if ambientTemp is valid
141      *  @param[out] ambient - ambient temperature in degrees C
142      *  @param[out] altitude - altitude in meters
143      */
144     void getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
145                         uint16_t& altitude) const;
146 
147     /** @brief Notify pcap object to update bounds */
148     void updatePcapBounds() const;
149 
150     /**
151      * @brief Set all sensor values of this OCC to NaN.
152      * @param[in] id - Id of the OCC.
153      * */
154     void setSensorValueToNaN(uint32_t id) const;
155 
156     /** @brief Set all sensor values of this OCC to NaN and non functional.
157      *
158      *  @param[in] id - Id of the OCC.
159      */
160     void setSensorValueToNonFunctional(uint32_t id) const;
161 
162   private:
163     /** @brief Creates the OCC D-Bus objects.
164      */
165     void findAndCreateObjects();
166 
167     /** @brief Callback that responds to cpu creation in the inventory -
168      *         by creating the needed objects.
169      *
170      *  @param[in] msg - bus message
171      *
172      *  @returns 0 to indicate success
173      */
174     int cpuCreated(sdbusplus::message_t& msg);
175 
176     /** @brief Create child OCC objects.
177      *
178      *  @param[in] occ - the occ name, such as occ0.
179      */
180     void createObjects(const std::string& occ);
181 
182     /** @brief Callback handler invoked by Status object when the OccActive
183      *         property is changed. This is needed to make sure that the
184      *         error detection is started only after all the OCCs are bound.
185      *         Similarly, when one of the OCC gets its OccActive property
186      *         un-set, then the OCC error detection needs to be stopped on
187      *         all the OCCs
188      *
189      *  @param[in] status - OccActive status
190      */
191     void statusCallBack(instanceID instance, bool status);
192 
193     /** @brief Set flag that a PM Complex reset is needed (to be initiated
194      * later) */
195     void resetOccRequest(instanceID instance);
196 
197     /** @brief Initiate the request to reset the PM Complex (PLDM -> HBRT) */
198     void initiateOccRequest(instanceID instance);
199 
200     /** @brief Sends a Heartbeat command to host control command handler */
201     void sendHeartBeat();
202 
203     /** @brief reference to sd_event wrapped in unique_ptr */
204     EventPtr& event;
205 
206     /** @brief OCC pass-through objects */
207     std::vector<std::unique_ptr<PassThrough>> passThroughObjects;
208 
209     /** @brief OCC Status objects */
210     std::vector<std::unique_ptr<Status>> statusObjects;
211 
212     /** @brief Power cap monitor and occ notification object */
213     std::unique_ptr<open_power::occ::powercap::PowerCap> pcap;
214 
215 #ifdef POWER10
216     /** @brief Power mode monitor and notification object */
217     std::unique_ptr<open_power::occ::powermode::PowerMode> pmode;
218 #endif
219 
220     /** @brief sbdbusplus match objects */
221     std::vector<sdbusplus::bus::match_t> cpuMatches;
222 
223     /** @brief Number of OCCs that are bound */
224     uint8_t activeCount = 0;
225 
226     /** @brief Number of seconds between poll commands */
227     uint8_t pollInterval;
228 
229     /** @brief Ambient temperature of the system in degrees C */
230     uint8_t ambient = 0xFF; // default: not available
231 
232     /** @brief Altitude of the system in meters */
233     uint16_t altitude = 0xFFFF; // default: not available
234 
235     /** @brief Poll timer event */
236     sdeventplus::Event sdpEvent;
237 
238     /** @brief Flags to indicate if waiting for all of the OCC active sensors to
239      * come online */
240     bool waitingForAllOccActiveSensors = false;
241 
242     /** @brief Set containing intance numbers of any OCCs that became active
243      *         while waiting for status objects to be created */
244     std::set<uint8_t> queuedActiveState;
245 
246     /**
247      * @brief The timer to be used once the OCC goes active.  When it expires,
248      *        a POLL command will be sent to the OCC and then timer restarted.
249      */
250     std::unique_ptr<
251         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
252         _pollTimer;
253 
254     /** @brief Subscribe to ambient temperature changed events */
255     sdbusplus::bus::match_t ambientPropChanged;
256 
257     /** @brief Flag to indicate that a PM complex reset needs to happen */
258     bool resetRequired = false;
259     /** @brief Instance number of the OCC/processor that triggered the reset */
260     uint8_t resetInstance = 255;
261     /** @brief Set when a PM complex reset has been issued (to prevent multiple
262      * requests) */
263     bool resetInProgress = false;
264 
265 #ifdef I2C_OCC
266     /** @brief Init Status objects for I2C OCC devices
267      *
268      * It iterates in /sys/bus/i2c/devices, finds all occ hwmon devices
269      * and creates status objects.
270      */
271     void initStatusObjects();
272 #endif
273 
274 #ifdef PLDM
275     /** @brief Callback handler invoked by the PLDM event handler when state of
276      *         the OCC is toggled by the host. The caller passes the instance
277      *         of the OCC and state of the OCC.
278      *
279      *  @param[in] instance - instance of the OCC
280      *  @param[in] status - true when the OCC goes active and false when the OCC
281      *                      goes inactive
282      *
283      *  @return true if setting the state of OCC is successful and false if it
284      *          fails.
285      */
286     bool updateOCCActive(instanceID instance, bool status);
287 
288     /** @brief Callback handler invoked by the PLDM event handler when mode of
289      *         the OCC SAFE MODE is inacted or cleared.
290      */
291     void updateOccSafeMode(bool safeState);
292 
293     /** @brief Callback handler invoked by PLDM sensor change when
294      *         the HRESET succeeds or fails.
295      *
296      *  @param[in] instance - the SBE instance id
297      *  @param[in] success - true if the HRESET succeeded, otherwise false
298      */
299     void sbeHRESETResult(instanceID instance, bool success);
300 
301 #ifdef PHAL_SUPPORT
302     /** @brief Helper function to check whether an SBE dump should be collected
303      *         now.
304      *
305      *  @param[in] instance - the SBE instance id
306      *
307      *  @return true if an SBE dump should be collected and false if not
308      */
309     bool sbeCanDump(unsigned int instance);
310 
311     /** @brief Helper function to set the SBE state through PDBG/PHAL
312      *
313      * @param[in] instance - instance of the SBE
314      * @param[in] state - the state to which the SBE should be set
315      *
316      */
317     void setSBEState(unsigned int instance, enum sbe_state state);
318 
319     /** @brief Helper function to get the SBE instance PDBG processor target
320      *
321      * @param[in] instance - the SBE instance id
322      *
323      * @return a pointer to the PDBG target
324      */
325     struct pdbg_target* getPdbgTarget(unsigned int instance);
326 
327     /** @brief Whether pdbg_targets_init has been called */
328     bool pdbgInitialized = false;
329 #endif
330 
331     std::unique_ptr<pldm::Interface> pldmHandle = nullptr;
332 #endif
333 
334 #ifdef POWER10
335     /**
336      * @brief Timer used when discovering OCCs in /dev.
337      */
338     std::unique_ptr<
339         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
340         discoverTimer;
341 
342     /**
343      * @brief Used when discovering /dev/occ objects to know if
344      *        any were added since the last check.
345      */
346     std::vector<int> prevOCCSearch;
347 
348     /**
349      * @brief Timer used when waiting for OCCs to go active.
350      */
351     std::unique_ptr<
352         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
353         waitForAllOccsTimer;
354 
355 #ifdef PLDM
356     /**
357      * @brief Timer used to throttle PLDM traces when there are problems
358               determining the OCC status via pldm. Used to prevent excessive
359               journal traces.
360      */
361     std::unique_ptr<
362         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
363         throttlePldmTraceTimer;
364     /**
365      * @brief onPldmTimeoutCreatePel flag will be used to indicate if
366      *        a PEL should get created when the throttlePldmTraceTimer expires.
367      *        The first time the throttlePldmTraceTimer expires, the traces
368      *        will be throttled and then the timer gets restarted. The
369      *        next time the timer expires, a PEL will get created.
370      */
371     bool onPldmTimeoutCreatePel = false;
372 
373     /** @brief Check if all of the OCC Active sensors are available and if not
374      * restart the discoverTimer
375      */
376     void throttlePldmTraceExpired();
377 
378     /** @brief Create a PEL when the code is not able to obtain the OCC PDRs
379      * via PLDM. This is called when the throttlePldmTraceTimer expires.
380      */
381     void createPldmSensorPEL();
382 #endif
383 
384     /** @brief Called when code times out waiting for all OCCs to be running or
385      *         after the app is restarted (Status does not callback into
386      * Manager).
387      */
388     void occsNotAllRunning();
389 
390     /** @brief Check if all of the OCC Active sensors are available and if not
391      * restart the discoverTimer
392      */
393     void checkAllActiveSensors();
394 #endif // POWER10
395 
396     /**
397      * @brief Called when poll timer expires and forces a POLL command to the
398      * OCC. The poll timer will then be restarted.
399      * */
400     void pollerTimerExpired();
401 
402     /**
403      * @brief Finds the OCC devices in /dev
404      *
405      * @return The IDs of the OCCs - 0, 1, etc.
406      */
407     std::vector<int> findOCCsInDev();
408 
409 #ifdef READ_OCC_SENSORS
410     /**
411      * @brief Gets the occ sensor values.
412      * @param[in] occ - pointer to OCCs Status object
413      * */
414     void getSensorValues(std::unique_ptr<Status>& occ);
415 
416     /**
417      * @brief Trigger OCC driver to read the temperature sensors.
418      * @param[in] path - path of the OCC sensors.
419      * @param[in] id - Id of the OCC.
420      * */
421     void readTempSensors(const fs::path& path, uint32_t id);
422 
423     /**
424      * @brief Trigger OCC driver to read the power sensors.
425      * @param[in] path - path of the OCC sensors.
426      * @param[in] id - Id of the OCC.
427      * */
428     void readPowerSensors(const fs::path& path, uint32_t id);
429 
430     /** @brief Store the existing OCC sensors on D-BUS */
431     std::map<std::string, uint32_t> existingSensors;
432 
433     /** @brief Get FunctionID from the `powerX_label` file.
434      *  @param[in] value - the value of the `powerX_label` file.
435      *  @returns FunctionID of the power sensors.
436      */
437     std::optional<std::string>
438         getPowerLabelFunctionID(const std::string& value);
439 
440     /** @brief The power sensor names map */
441     const std::map<std::string, std::string> powerSensorName = {
442         {"system", "total_power"}, {"1", "p0_mem_power"},
443         {"2", "p1_mem_power"},     {"3", "p2_mem_power"},
444         {"4", "p3_mem_power"},     {"5", "p0_power"},
445         {"6", "p1_power"},         {"7", "p2_power"},
446         {"8", "p3_power"},         {"9", "p0_cache_power"},
447         {"10", "p1_cache_power"},  {"11", "p2_cache_power"},
448         {"12", "p3_cache_power"},  {"13", "io_a_power"},
449         {"14", "io_b_power"},      {"15", "io_c_power"},
450         {"16", "fans_a_power"},    {"17", "fans_b_power"},
451         {"18", "storage_a_power"}, {"19", "storage_b_power"},
452         {"23", "mem_cache_power"}, {"25", "p0_mem_0_power"},
453         {"26", "p0_mem_1_power"},  {"27", "p0_mem_2_power"},
454         {"35", "pcie_dcm0_power"}, {"36", "pcie_dcm1_power"},
455         {"37", "pcie_dcm2_power"}, {"38", "pcie_dcm3_power"},
456         {"39", "io_dcm0_power"},   {"40", "io_dcm1_power"},
457         {"41", "io_dcm2_power"},   {"42", "io_dcm3_power"},
458         {"43", "avdd_total_power"}};
459 
460     /** @brief The dimm temperature sensor names map  */
461     const std::map<uint32_t, std::string> dimmTempSensorName = {
462         {internalMemCtlr, "_intmb_temp"},
463         {dimm, "_dram_temp"},
464         {memCtrlAndDimm, "_dram_extmb_temp"},
465         {PMIC, "_pmic_temp"},
466         {memCtlrExSensor, "_extmb_temp"}};
467 
468     /** @brief The dimm DVFS temperature sensor names map  */
469     const std::map<uint32_t, std::string> dimmDVFSSensorName = {
470         {internalMemCtlr, "dimm_intmb_dvfs_temp"},
471         {dimm, "dimm_dram_dvfs_temp"},
472         {memCtrlAndDimm, "dimm_dram_extmb_dvfs_temp"},
473         {PMIC, "dimm_pmic_dvfs_temp"},
474         {memCtlrExSensor, "dimm_extmb_dvfs_temp"}};
475 #endif
476 
477     /** @brief Read the altitude from DBus */
478     void readAltitude();
479 
480     /** @brief Callback function when ambient temperature changes
481      *
482      *  @param[in]  msg - Data associated with subscribed signal
483      */
484     void ambientCallback(sdbusplus::message_t& msg);
485 
486     /** @brief Confirm that a single OCC master was found and start presence
487      * monitoring
488      */
489     void validateOccMaster();
490 };
491 
492 } // namespace occ
493 } // namespace open_power
494