1 #pragma once
2 
3 #include "device.hpp"
4 #include "gpio.hpp"
5 #include "pmbus.hpp"
6 #include "types.hpp"
7 
8 #include <sdbusplus/bus.hpp>
9 
10 #include <algorithm>
11 #include <filesystem>
12 #include <map>
13 #include <vector>
14 
15 namespace witherspoon
16 {
17 namespace power
18 {
19 
20 // Error type, callout
21 using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>;
22 
23 /**
24  * @class UCD90160
25  *
26  * This class implements fault analysis for the UCD90160
27  * power sequencer device.
28  *
29  */
30 class UCD90160 : public Device
31 {
32   public:
33     UCD90160() = delete;
34     ~UCD90160() = default;
35     UCD90160(const UCD90160&) = delete;
36     UCD90160& operator=(const UCD90160&) = delete;
37     UCD90160(UCD90160&&) = default;
38     UCD90160& operator=(UCD90160&&) = default;
39 
40     /**
41      * Constructor
42      *
43      * @param[in] instance - the device instance number
44      * @param[in] bus - D-Bus bus object
45      */
46     UCD90160(size_t instance, sdbusplus::bus::bus& bus);
47 
48     /**
49      * Analyzes the device for errors when the device is
50      * known to be in an error state.  A log will be created.
51      */
52     void onFailure() override;
53 
54     /**
55      * Checks the device for errors and only creates a log
56      * if one is found.
57      */
58     void analyze() override;
59 
60     /**
61      * Clears faults in the device
62      */
63     void clearFaults() override
64     {}
65 
66   private:
67     /**
68      * Reports an error for a GPU PGOOD failure
69      *
70      * @param[in] callout - the GPU callout string
71      */
72     void gpuPGOODError(const std::string& callout);
73 
74     /**
75      * Reports an error for a GPU OverTemp failure
76      *
77      * @param[in] callout - the GPU callout string
78      */
79     void gpuOverTempError(const std::string& callout);
80 
81     /**
82      * Reports an error for a MEM_GOODx failure.
83      *
84      * @param[in] callout - The MEM callout string
85      */
86     void memGoodError(const std::string& callout);
87 
88     /**
89      * Given the device path for a chip, find its gpiochip
90      * path
91      *
92      * @param[in] path - device path, like
93      *                   /sys/devices/.../i2c-11/11-0064
94      *
95      * @return fs::path - The gpiochip path, like
96      *                   /dev/gpiochip1
97      */
98     static std::filesystem::path
99         findGPIODevice(const std::filesystem::path& path);
100 
101     /**
102      * Checks for VOUT faults on the device.
103      *
104      * This device can monitor voltages of its dependent
105      * devices, and VOUT faults are voltage faults
106      * on these devices.
107      *
108      * @return bool - true if an error log was created
109      */
110     bool checkVOUTFaults();
111 
112     /**
113      * Checks for PGOOD faults on the device.
114      *
115      * This device can monitor the PGOOD signals of its dependent
116      * devices, and this check will look for faults of
117      * those PGOODs.
118      *
119      * @param[in] polling - If this is running while polling for errors,
120      *                      as opposing to analyzing a fail condition.
121      *
122      * @return bool - true if an error log was created
123      */
124     bool checkPGOODFaults(bool polling);
125 
126     /**
127      * Creates an error log when the device has an error
128      * but it isn't a PGOOD or voltage failure.
129      */
130     void createPowerFaultLog();
131 
132     /**
133      * Reads the status_word register
134      *
135      * @return uint16_t - the register contents
136      */
137     uint16_t readStatusWord();
138 
139     /**
140      * Reads the mfr_status register
141      *
142      * @return uint32_t - the register contents
143      */
144     uint32_t readMFRStatus();
145 
146     /**
147      * Does any additional fault analysis based on the
148      * value of the extraAnalysisType field in the GPIOConfig
149      * entry.
150      *
151      * Used to get better callouts.
152      *
153      * @param[in] config - the GPIOConfig entry to use
154      *
155      * @return bool - true if a HW error was found, false else
156      */
157     bool doExtraAnalysis(const ucd90160::GPIConfig& config);
158 
159     /**
160      * Does additional fault analysis using GPIOs to
161      * specifically identify the failing part.
162      *
163      * Used when there are too many PGOOD inputs for
164      * the UCD90160 to handle, so just a summary bit
165      * is wired into the chip, and then the specific
166      * fault GPIOs are off of a different GPIO device,
167      * like an IO expander.
168      *
169      * @param[in] type - the type of analysis to do
170      *
171      * @return bool - true if a HW error was found, false else
172      */
173     bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
174 
175     /**
176      * Says if we've already logged a Vout fault
177      *
178      * The policy is only 1 of the same error will
179      * be logged for the duration of a class instance.
180      *
181      * @param[in] page - the page to check
182      *
183      * @return bool - if we've already logged a fault against
184      *                this page
185      */
186     inline bool isVoutFaultLogged(uint32_t page) const
187     {
188         return std::find(voutErrors.begin(), voutErrors.end(), page) !=
189                voutErrors.end();
190     }
191 
192     /**
193      * Saves that a Vout fault has been logged
194      *
195      * @param[in] page - the page the error was logged against
196      */
197     inline void setVoutFaultLogged(uint32_t page)
198     {
199         voutErrors.push_back(page);
200     }
201 
202     /**
203      * Says if we've already logged a PGOOD fault
204      *
205      * The policy is only 1 of the same errors will
206      * be logged for the duration of a class instance.
207      *
208      * @param[in] input - the input to check
209      *
210      * @return bool - if we've already logged a fault against
211      *                this input
212      */
213     inline bool isPGOODFaultLogged(uint32_t input) const
214     {
215         return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) !=
216                pgoodErrors.end();
217     }
218 
219     /**
220      * Says if we've already logged a specific fault
221      * against a specific part
222      *
223      * @param[in] callout - error type and name tuple
224      *
225      * @return bool - if we've already logged this fault
226      *                against this part
227      */
228     inline bool isPartCalledOut(const PartCallout& callout) const
229     {
230         return std::find(callouts.begin(), callouts.end(), callout) !=
231                callouts.end();
232     }
233 
234     /**
235      * Saves that a PGOOD fault has been logged
236      *
237      * @param[in] input - the input the error was logged against
238      */
239     inline void setPGOODFaultLogged(uint32_t input)
240     {
241         pgoodErrors.push_back(input);
242     }
243 
244     /**
245      * Saves that a specific fault on a specific part has been done
246      *
247      * @param[in] callout - error type and name tuple
248      */
249     inline void setPartCallout(const PartCallout& callout)
250     {
251         callouts.push_back(callout);
252     }
253 
254     /**
255      * List of pages that Vout errors have
256      * already been logged against
257      */
258     std::vector<uint32_t> voutErrors;
259 
260     /**
261      * List of inputs that PGOOD errors have
262      * already been logged against
263      */
264     std::vector<uint32_t> pgoodErrors;
265 
266     /**
267      * List of callouts that already been done
268      */
269     std::vector<PartCallout> callouts;
270 
271     /**
272      * The read/write interface to this hardware
273      */
274     pmbus::PMBus interface;
275 
276     /**
277      * A map of GPI pin IDs to the GPIO object
278      * used to access them
279      */
280     std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
281 
282     /**
283      * Keeps track of device access errors to avoid repeatedly
284      * logging errors for bad hardware
285      */
286     bool accessError = false;
287 
288     /**
289      * Keeps track of GPIO access errors when doing the in depth
290      * PGOOD fault analysis to avoid repeatedly logging errors
291      * for bad hardware
292      */
293     bool gpioAccessError = false;
294 
295     /**
296      * The path to the GPIO device used to read
297      * the GPI (PGOOD) status
298      */
299     std::filesystem::path gpioDevice;
300 
301     /**
302      * The D-Bus bus object
303      */
304     sdbusplus::bus::bus& bus;
305 
306     /**
307      * Map of device instance to the instance specific data
308      */
309     static const ucd90160::DeviceMap deviceMap;
310 };
311 
312 } // namespace power
313 } // namespace witherspoon
314