1 #pragma once
2 
3 #include "device.hpp"
4 #include "gpio.hpp"
5 #include "pmbus.hpp"
6 #include "types.hpp"
7 
8 #include <sdbusplus/bus.hpp>
9 
10 #include <algorithm>
11 #include <filesystem>
12 #include <map>
13 #include <vector>
14 
15 namespace phosphor
16 {
17 namespace power
18 {
19 
20 // Error type, callout
21 using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>;
22 
23 /**
24  * @class UCD90160
25  *
26  * This class implements fault analysis for the UCD90160
27  * power sequencer device.
28  *
29  */
30 class UCD90160 : public Device
31 {
32   public:
33     UCD90160() = delete;
34     ~UCD90160() = default;
35     UCD90160(const UCD90160&) = delete;
36     UCD90160& operator=(const UCD90160&) = delete;
37     UCD90160(UCD90160&&) = default;
38     UCD90160& operator=(UCD90160&&) = default;
39 
40     /**
41      * Constructor
42      *
43      * @param[in] instance - the device instance number
44      * @param[in] bus - D-Bus bus object
45      */
46     UCD90160(size_t instance, sdbusplus::bus::bus& bus);
47 
48     /**
49      * Analyzes the device for errors when the device is
50      * known to be in an error state.  A log will be created.
51      */
52     void onFailure() override;
53 
54     /**
55      * Checks the device for errors and only creates a log
56      * if one is found.
57      */
58     void analyze() override;
59 
60     /**
61      * Clears faults in the device
62      */
63     void clearFaults() override
64     {
65     }
66 
67   private:
68     /**
69      * Reports an error for a GPU PGOOD failure
70      *
71      * @param[in] callout - the GPU callout string
72      */
73     void gpuPGOODError(const std::string& callout);
74 
75     /**
76      * Reports an error for a GPU OverTemp failure
77      *
78      * @param[in] callout - the GPU callout string
79      */
80     void gpuOverTempError(const std::string& callout);
81 
82     /**
83      * Reports an error for a MEM_GOODx failure.
84      *
85      * @param[in] callout - The MEM callout string
86      */
87     void memGoodError(const std::string& callout);
88 
89     /**
90      * Given the device path for a chip, find its gpiochip
91      * path
92      *
93      * @param[in] path - device path, like
94      *                   /sys/devices/.../i2c-11/11-0064
95      *
96      * @return fs::path - The gpiochip path, like
97      *                   /dev/gpiochip1
98      */
99     static std::filesystem::path
100         findGPIODevice(const std::filesystem::path& path);
101 
102     /**
103      * Checks for VOUT faults on the device.
104      *
105      * This device can monitor voltages of its dependent
106      * devices, and VOUT faults are voltage faults
107      * on these devices.
108      *
109      * @return bool - true if an error log was created
110      */
111     bool checkVOUTFaults();
112 
113     /**
114      * Checks for PGOOD faults on the device.
115      *
116      * This device can monitor the PGOOD signals of its dependent
117      * devices, and this check will look for faults of
118      * those PGOODs.
119      *
120      * @param[in] polling - If this is running while polling for errors,
121      *                      as opposing to analyzing a fail condition.
122      *
123      * @return bool - true if an error log was created
124      */
125     bool checkPGOODFaults(bool polling);
126 
127     /**
128      * Creates an error log when the device has an error
129      * but it isn't a PGOOD or voltage failure.
130      */
131     void createPowerFaultLog();
132 
133     /**
134      * Reads the status_word register
135      *
136      * @return uint16_t - the register contents
137      */
138     uint16_t readStatusWord();
139 
140     /**
141      * Reads the mfr_status register
142      *
143      * @return uint32_t - the register contents
144      */
145     uint32_t readMFRStatus();
146 
147     /**
148      * Does any additional fault analysis based on the
149      * value of the extraAnalysisType field in the GPIOConfig
150      * entry.
151      *
152      * Used to get better callouts.
153      *
154      * @param[in] config - the GPIOConfig entry to use
155      *
156      * @return bool - true if a HW error was found, false else
157      */
158     bool doExtraAnalysis(const ucd90160::GPIConfig& config);
159 
160     /**
161      * Does additional fault analysis using GPIOs to
162      * specifically identify the failing part.
163      *
164      * Used when there are too many PGOOD inputs for
165      * the UCD90160 to handle, so just a summary bit
166      * is wired into the chip, and then the specific
167      * fault GPIOs are off of a different GPIO device,
168      * like an IO expander.
169      *
170      * @param[in] type - the type of analysis to do
171      *
172      * @return bool - true if a HW error was found, false else
173      */
174     bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
175 
176     /**
177      * Says if we've already logged a Vout fault
178      *
179      * The policy is only 1 of the same error will
180      * be logged for the duration of a class instance.
181      *
182      * @param[in] page - the page to check
183      *
184      * @return bool - if we've already logged a fault against
185      *                this page
186      */
187     inline bool isVoutFaultLogged(uint32_t page) const
188     {
189         return std::find(voutErrors.begin(), voutErrors.end(), page) !=
190                voutErrors.end();
191     }
192 
193     /**
194      * Saves that a Vout fault has been logged
195      *
196      * @param[in] page - the page the error was logged against
197      */
198     inline void setVoutFaultLogged(uint32_t page)
199     {
200         voutErrors.push_back(page);
201     }
202 
203     /**
204      * Says if we've already logged a PGOOD fault
205      *
206      * The policy is only 1 of the same errors will
207      * be logged for the duration of a class instance.
208      *
209      * @param[in] input - the input to check
210      *
211      * @return bool - if we've already logged a fault against
212      *                this input
213      */
214     inline bool isPGOODFaultLogged(uint32_t input) const
215     {
216         return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) !=
217                pgoodErrors.end();
218     }
219 
220     /**
221      * Says if we've already logged a specific fault
222      * against a specific part
223      *
224      * @param[in] callout - error type and name tuple
225      *
226      * @return bool - if we've already logged this fault
227      *                against this part
228      */
229     inline bool isPartCalledOut(const PartCallout& callout) const
230     {
231         return std::find(callouts.begin(), callouts.end(), callout) !=
232                callouts.end();
233     }
234 
235     /**
236      * Saves that a PGOOD fault has been logged
237      *
238      * @param[in] input - the input the error was logged against
239      */
240     inline void setPGOODFaultLogged(uint32_t input)
241     {
242         pgoodErrors.push_back(input);
243     }
244 
245     /**
246      * Saves that a specific fault on a specific part has been done
247      *
248      * @param[in] callout - error type and name tuple
249      */
250     inline void setPartCallout(const PartCallout& callout)
251     {
252         callouts.push_back(callout);
253     }
254 
255     /**
256      * List of pages that Vout errors have
257      * already been logged against
258      */
259     std::vector<uint32_t> voutErrors;
260 
261     /**
262      * List of inputs that PGOOD errors have
263      * already been logged against
264      */
265     std::vector<uint32_t> pgoodErrors;
266 
267     /**
268      * List of callouts that already been done
269      */
270     std::vector<PartCallout> callouts;
271 
272     /**
273      * The read/write interface to this hardware
274      */
275     pmbus::PMBus interface;
276 
277     /**
278      * A map of GPI pin IDs to the GPIO object
279      * used to access them
280      */
281     std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
282 
283     /**
284      * Keeps track of device access errors to avoid repeatedly
285      * logging errors for bad hardware
286      */
287     bool accessError = false;
288 
289     /**
290      * Keeps track of GPIO access errors when doing the in depth
291      * PGOOD fault analysis to avoid repeatedly logging errors
292      * for bad hardware
293      */
294     bool gpioAccessError = false;
295 
296     /**
297      * The path to the GPIO device used to read
298      * the GPI (PGOOD) status
299      */
300     std::filesystem::path gpioDevice;
301 
302     /**
303      * The D-Bus bus object
304      */
305     sdbusplus::bus::bus& bus;
306 
307     /**
308      * Map of device instance to the instance specific data
309      */
310     static const ucd90160::DeviceMap deviceMap;
311 };
312 
313 } // namespace power
314 } // namespace phosphor
315