xref: /openbmc/openpower-occ-control/occ_manager.hpp (revision c488bac124fbbcb0dbe83e48922c3087a5aaa7bd)
1 #pragma once
2 
3 #include "occ_pass_through.hpp"
4 #include "occ_status.hpp"
5 #ifdef PLDM
6 #include "pldm.hpp"
7 
8 #ifdef PHAL_SUPPORT
9 #include <libphal.H>
10 #endif
11 #endif
12 #include "powercap.hpp"
13 #include "utils.hpp"
14 #ifdef POWER10
15 #include "powermode.hpp"
16 #endif
17 
18 #include <sdbusplus/bus.hpp>
19 #include <sdeventplus/event.hpp>
20 #include <sdeventplus/utility/timer.hpp>
21 
22 #include <cstring>
23 #include <functional>
24 #include <vector>
25 
26 namespace sdbusRule = sdbusplus::bus::match::rules;
27 namespace open_power
28 {
29 namespace occ
30 {
31 
32 #ifdef READ_OCC_SENSORS
33 enum occFruType
34 {
35     processorCore = 0,
36     internalMemCtlr = 1,
37     dimm = 2,
38     memCtrlAndDimm = 3,
39     VRMVdd = 6,
40     PMIC = 7,
41     memCtlrExSensor = 8,
42     processorIoRing = 9
43 };
44 #endif
45 
46 /** @brief Default time, in seconds, between OCC poll commands */
47 #ifndef POWER10
48 constexpr unsigned int defaultPollingInterval = 1;
49 #else
50 constexpr unsigned int defaultPollingInterval = 5;
51 #endif
52 
53 constexpr auto AMBIENT_PATH =
54     "/xyz/openbmc_project/sensors/temperature/Ambient_Virtual_Temp";
55 constexpr auto AMBIENT_INTERFACE = "xyz.openbmc_project.Sensor.Value";
56 constexpr auto AMBIENT_PROP = "Value";
57 constexpr auto ALTITUDE_PATH = "/xyz/openbmc_project/sensors/altitude/Altitude";
58 constexpr auto ALTITUDE_INTERFACE = "xyz.openbmc_project.Sensor.Value";
59 constexpr auto ALTITUDE_PROP = "Value";
60 
61 constexpr auto EXTN_LABEL_PWRM_MEMORY_POWER = "5057524d";
62 constexpr auto EXTN_LABEL_PWRP_PROCESSOR_POWER = "50575250";
63 
64 /** @class Manager
65  *  @brief Builds and manages OCC objects
66  */
67 struct Manager
68 {
69   public:
70     Manager() = delete;
71     Manager(const Manager&) = delete;
72     Manager& operator=(const Manager&) = delete;
73     Manager(Manager&&) = delete;
74     Manager& operator=(Manager&&) = delete;
75     ~Manager() = default;
76 
77     /** @brief Adds OCC pass-through and status objects on the bus
78      *         when corresponding CPU inventory is created.
79      *
80      *  @param[in] event - Unique ptr reference to sd_event
81      */
Manageropen_power::occ::Manager82     explicit Manager(EventPtr& event) :
83         event(event), pollInterval(defaultPollingInterval),
84         sdpEvent(sdeventplus::Event::get_default()),
85         _pollTimer(
86             std::make_unique<
87                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
88                 sdpEvent, std::bind(&Manager::pollerTimerExpired, this))),
89         ambientPropChanged(
90             utils::getBus(),
91             sdbusRule::member("PropertiesChanged") +
92                 sdbusRule::path(AMBIENT_PATH) +
93                 sdbusRule::argN(0, AMBIENT_INTERFACE) +
94                 sdbusRule::interface("org.freedesktop.DBus.Properties"),
95             std::bind(&Manager::ambientCallback, this, std::placeholders::_1))
96 #ifdef POWER10
97         ,
98         discoverTimer(
99             std::make_unique<
100                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
101                 sdpEvent, std::bind(&Manager::findAndCreateObjects, this))),
102         waitForAllOccsTimer(
103             std::make_unique<
104                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
105                 sdpEvent, std::bind(&Manager::occsNotAllRunning, this)))
106 #ifdef PLDM
107         ,
108         throttlePldmTraceTimer(
109             std::make_unique<
110                 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
111                 sdpEvent, std::bind(&Manager::throttlePldmTraceExpired, this)))
112 #endif
113 #endif // POWER10
114     {
115 #ifdef I2C_OCC
116         // I2C OCC status objects are initialized directly
117         initStatusObjects();
118 #else
119         findAndCreateObjects();
120 #endif
121         readAltitude();
122     }
123 
124     void createPldmHandle();
125 
126     /** @brief Return the number of bound OCCs */
getNumOCCsopen_power::occ::Manager127     inline auto getNumOCCs() const
128     {
129         return activeCount;
130     }
131 
132 #ifdef PLDM
133     /** @brief Called by a Device to report that the SBE timed out
134      *         and appropriate action should be taken
135      *
136      * @param[in] instance - the OCC instance id
137      */
138     void sbeTimeout(unsigned int instance);
139 #endif
140 
141     /** @brief Return the latest ambient and altitude readings
142      *
143      *  @param[out] ambientValid - true if ambientTemp is valid
144      *  @param[out] ambient - ambient temperature in degrees C
145      *  @param[out] altitude - altitude in meters
146      */
147     void getAmbientData(bool& ambientValid, uint8_t& ambientTemp,
148                         uint16_t& altitude) const;
149 
150     /** @brief Notify pcap object to update bounds */
151     void updatePcapBounds() const;
152 
153     /**
154      * @brief Set all sensor values of this OCC to NaN.
155      * @param[in] id - Id of the OCC.
156      * */
157     void setSensorValueToNaN(uint32_t id) const;
158 
159     /** @brief Set all sensor values of this OCC to NaN and non functional.
160      *
161      *  @param[in] id - Id of the OCC.
162      */
163     void setSensorValueToNonFunctional(uint32_t id) const;
164 
165     /** @brief Clear any state flags that need to be reset when the host state
166      * is off */
167     void hostPoweredOff();
168 
169   private:
170     /** @brief Creates the OCC D-Bus objects.
171      */
172     void findAndCreateObjects();
173 
174     /** @brief Callback that responds to cpu creation in the inventory -
175      *         by creating the needed objects.
176      *
177      *  @param[in] msg - bus message
178      *
179      *  @returns 0 to indicate success
180      */
181     int cpuCreated(sdbusplus::message_t& msg);
182 
183     /** @brief Create child OCC objects.
184      *
185      *  @param[in] occ - the occ name, such as occ0.
186      */
187     void createObjects(const std::string& occ);
188 
189     /** @brief Callback handler invoked by Status object when the OccActive
190      *         property is changed. This is needed to make sure that the
191      *         error detection is started only after all the OCCs are bound.
192      *         Similarly, when one of the OCC gets its OccActive property
193      *         un-set, then the OCC error detection needs to be stopped on
194      *         all the OCCs
195      *
196      *  @param[in] status - OccActive status
197      */
198     void statusCallBack(instanceID instance, bool status);
199 
200     /** @brief Set flag that a PM Complex reset is needed (to be initiated
201      * later) */
202     void resetOccRequest(instanceID instance);
203 
204     /** @brief Initiate the request to reset the PM Complex (PLDM -> HBRT) */
205     void initiateOccRequest(instanceID instance);
206 
207     /** @brief Sends a Heartbeat command to host control command handler */
208     void sendHeartBeat();
209 
210     /** @brief reference to sd_event wrapped in unique_ptr */
211     EventPtr& event;
212 
213     /** @brief OCC pass-through objects */
214     std::vector<std::unique_ptr<PassThrough>> passThroughObjects;
215 
216     /** @brief OCC Status objects */
217     std::vector<std::unique_ptr<Status>> statusObjects;
218 
219     /** @brief Power cap monitor and occ notification object */
220     std::unique_ptr<open_power::occ::powercap::PowerCap> pcap;
221 
222 #ifdef POWER10
223     /** @brief Power mode monitor and notification object */
224     std::unique_ptr<open_power::occ::powermode::PowerMode> pmode;
225 #endif
226 
227     /** @brief sbdbusplus match objects */
228     std::vector<sdbusplus::bus::match_t> cpuMatches;
229 
230     /** @brief Number of OCCs that are bound */
231     uint8_t activeCount = 0;
232 
233     /** @brief Number of seconds between poll commands */
234     uint8_t pollInterval;
235 
236     /** @brief Ambient temperature of the system in degrees C */
237     uint8_t ambient = 0xFF; // default: not available
238 
239     /** @brief Altitude of the system in meters */
240     uint16_t altitude = 0xFFFF; // default: not available
241 
242     /** @brief Poll timer event */
243     sdeventplus::Event sdpEvent;
244 
245     /** @brief Flags to indicate if waiting for all of the OCC active sensors to
246      * come online */
247     bool waitingForAllOccActiveSensors = false;
248 
249     /** @brief Set containing intance numbers of any OCCs that became active
250      *         while waiting for status objects to be created */
251     std::set<uint8_t> queuedActiveState;
252 
253     /**
254      * @brief The timer to be used once the OCC goes active.  When it expires,
255      *        a POLL command will be sent to the OCC and then timer restarted.
256      */
257     std::unique_ptr<
258         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
259         _pollTimer;
260 
261     /** @brief Subscribe to ambient temperature changed events */
262     sdbusplus::bus::match_t ambientPropChanged;
263 
264     /** @brief Flag to indicate that a PM complex reset needs to happen */
265     bool resetRequired = false;
266     /** @brief Instance number of the OCC/processor that triggered the reset */
267     uint8_t resetInstance = 255;
268     /** @brief Set when a PM complex reset has been issued (to prevent multiple
269      * requests) */
270     bool resetInProgress = false;
271 
272 #ifdef I2C_OCC
273     /** @brief Init Status objects for I2C OCC devices
274      *
275      * It iterates in /sys/bus/i2c/devices, finds all occ hwmon devices
276      * and creates status objects.
277      */
278     void initStatusObjects();
279 #endif
280 
281 #ifdef PLDM
282     /** @brief Callback handler invoked by the PLDM event handler when state of
283      *         the OCC is toggled by the host. The caller passes the instance
284      *         of the OCC and state of the OCC.
285      *
286      *  @param[in] instance - instance of the OCC
287      *  @param[in] status - true when the OCC goes active and false when the OCC
288      *                      goes inactive
289      *
290      *  @return true if setting the state of OCC is successful and false if it
291      *          fails.
292      */
293     bool updateOCCActive(instanceID instance, bool status);
294 
295     /** @brief Callback handler invoked by the PLDM event handler when mode of
296      *         the OCC SAFE MODE is inacted or cleared.
297      */
298     void updateOccSafeMode(bool safeState);
299 
300     /** @brief Callback handler invoked by PLDM sensor change when
301      *         the HRESET succeeds or fails.
302      *
303      *  @param[in] instance - the SBE instance id
304      *  @param[in] success - true if the HRESET succeeded, otherwise false
305      */
306     void sbeHRESETResult(instanceID instance, bool success);
307 
308 #ifdef PHAL_SUPPORT
309     /** @brief Helper function to check whether an SBE dump should be collected
310      *         now.
311      *
312      *  @param[in] instance - the SBE instance id
313      *
314      *  @return true if an SBE dump should be collected and false if not
315      */
316     bool sbeCanDump(unsigned int instance);
317 
318     /** @brief Helper function to set the SBE state through PDBG/PHAL
319      *
320      * @param[in] instance - instance of the SBE
321      * @param[in] state - the state to which the SBE should be set
322      *
323      */
324     void setSBEState(unsigned int instance, enum sbe_state state);
325 
326     /** @brief Helper function to get the SBE instance PDBG processor target
327      *
328      * @param[in] instance - the SBE instance id
329      *
330      * @return a pointer to the PDBG target
331      */
332     struct pdbg_target* getPdbgTarget(unsigned int instance);
333 
334     /** @brief Whether pdbg_targets_init has been called */
335     bool pdbgInitialized = false;
336 #endif
337 
338     std::unique_ptr<pldm::Interface> pldmHandle = nullptr;
339 #endif
340 
341 #ifdef POWER10
342     /**
343      * @brief Timer used when discovering OCCs in /dev.
344      */
345     std::unique_ptr<
346         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
347         discoverTimer;
348 
349     /**
350      * @brief Used when discovering /dev/occ objects to know if
351      *        any were added since the last check.
352      */
353     std::vector<int> prevOCCSearch;
354 
355     /**
356      * @brief Timer used when waiting for OCCs to go active.
357      */
358     std::unique_ptr<
359         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
360         waitForAllOccsTimer;
361 
362 #ifdef PLDM
363     /**
364      * @brief Timer used to throttle PLDM traces when there are problems
365      determining the OCC status via pldm. Used to prevent excessive
366      journal traces.
367      */
368     std::unique_ptr<
369         sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>
370         throttlePldmTraceTimer;
371     /**
372      * @brief onPldmTimeoutCreatePel flag will be used to indicate if
373      *        a PEL should get created when the throttlePldmTraceTimer expires.
374      *        The first time the throttlePldmTraceTimer expires, the traces
375      *        will be throttled and then the timer gets restarted. The
376      *        next time the timer expires, a PEL will get created.
377      */
378     bool onPldmTimeoutCreatePel = false;
379 
380     /** @brief Check if all of the OCC Active sensors are available and if not
381      * restart the discoverTimer
382      */
383     void throttlePldmTraceExpired();
384 
385     /** @brief Create a PEL when the code is not able to obtain the OCC PDRs
386      * via PLDM. This is called when the throttlePldmTraceTimer expires.
387      */
388     void createPldmSensorPEL();
389 #endif
390 
391     /** @brief Called when code times out waiting for all OCCs to be running or
392      *         after the app is restarted (Status does not callback into
393      * Manager).
394      */
395     void occsNotAllRunning();
396 
397     /** @brief Check if all of the OCC Active sensors are available and if not
398      * restart the discoverTimer
399      */
400     void checkAllActiveSensors();
401 #endif // POWER10
402 
403     /**
404      * @brief Called when poll timer expires and forces a POLL command to the
405      * OCC. The poll timer will then be restarted.
406      * */
407     void pollerTimerExpired();
408 
409     /**
410      * @brief Finds the OCC devices in /dev
411      *
412      * @return The IDs of the OCCs - 0, 1, etc.
413      */
414     std::vector<int> findOCCsInDev();
415 
416 #ifdef READ_OCC_SENSORS
417     /**
418      * @brief Gets the occ sensor values.
419      * @param[in] occ - pointer to OCCs Status object
420      * */
421     void getSensorValues(std::unique_ptr<Status>& occ);
422 
423     /**
424      * @brief Trigger OCC driver to read the temperature sensors.
425      * @param[in] path - path of the OCC sensors.
426      * @param[in] id - Id of the OCC.
427      * */
428     void readTempSensors(const fs::path& path, uint32_t id);
429 
430     /**
431      * @brief Trigger OCC driver to read the extended sensors.
432      * @param[in] path - path of the OCC sensors.
433      * @param[in] id - Id of the OCC.
434      * */
435     void readExtnSensors(const fs::path& path, uint32_t id);
436 
437     /**
438      * @brief Trigger OCC driver to read the power sensors.
439      * @param[in] path - path of the OCC sensors.
440      * @param[in] id - Id of the OCC.
441      * */
442     void readPowerSensors(const fs::path& path, uint32_t id);
443 
444     /** @brief Store the existing OCC sensors on D-BUS */
445     std::map<std::string, uint32_t> existingSensors;
446 
447     /** @brief Get FunctionID from the `powerX_label` file.
448      *  @param[in] value - the value of the `powerX_label` file.
449      *  @returns FunctionID of the power sensors.
450      */
451     std::optional<std::string> getPowerLabelFunctionID(
452         const std::string& value);
453 
454     /** @brief The power sensor names map */
455     const std::map<std::string, std::string> powerSensorName = {
456         {"system", "total_power"}, {"1", "p0_mem_power"},
457         {"2", "p1_mem_power"},     {"3", "p2_mem_power"},
458         {"4", "p3_mem_power"},     {"5", "p0_power"},
459         {"6", "p1_power"},         {"7", "p2_power"},
460         {"8", "p3_power"},         {"9", "p0_cache_power"},
461         {"10", "p1_cache_power"},  {"11", "p2_cache_power"},
462         {"12", "p3_cache_power"},  {"13", "io_a_power"},
463         {"14", "io_b_power"},      {"15", "io_c_power"},
464         {"16", "fans_a_power"},    {"17", "fans_b_power"},
465         {"18", "storage_a_power"}, {"19", "storage_b_power"},
466         {"23", "mem_cache_power"}, {"25", "p0_mem_0_power"},
467         {"26", "p0_mem_1_power"},  {"27", "p0_mem_2_power"},
468         {"35", "pcie_dcm0_power"}, {"36", "pcie_dcm1_power"},
469         {"37", "pcie_dcm2_power"}, {"38", "pcie_dcm3_power"},
470         {"39", "io_dcm0_power"},   {"40", "io_dcm1_power"},
471         {"41", "io_dcm2_power"},   {"42", "io_dcm3_power"},
472         {"43", "avdd_total_power"}};
473 
474     /** @brief The dimm temperature sensor names map  */
475     const std::map<uint32_t, std::string> dimmTempSensorName = {
476         {internalMemCtlr, "_intmb_temp"},
477         {dimm, "_dram_temp"},
478         {memCtrlAndDimm, "_dram_extmb_temp"},
479         {PMIC, "_pmic_temp"},
480         {memCtlrExSensor, "_extmb_temp"}};
481 
482     /** @brief The dimm DVFS temperature sensor names map  */
483     const std::map<uint32_t, std::string> dimmDVFSSensorName = {
484         {internalMemCtlr, "dimm_intmb_dvfs_temp"},
485         {dimm, "dimm_dram_dvfs_temp"},
486         {memCtrlAndDimm, "dimm_dram_extmb_dvfs_temp"},
487         {PMIC, "dimm_pmic_dvfs_temp"},
488         {memCtlrExSensor, "dimm_extmb_dvfs_temp"}};
489 #endif
490 
491     /** @brief Read the altitude from DBus */
492     void readAltitude();
493 
494     /** @brief Callback function when ambient temperature changes
495      *
496      *  @param[in]  msg - Data associated with subscribed signal
497      */
498     void ambientCallback(sdbusplus::message_t& msg);
499 
500     /** @brief Confirm that a single OCC master was found and start presence
501      * monitoring
502      */
503     void validateOccMaster();
504 };
505 
506 } // namespace occ
507 } // namespace open_power
508