1 #pragma once
2 
3 #include "device.hpp"
4 #include "gpio.hpp"
5 #include "pmbus.hpp"
6 #include "types.hpp"
7 
8 #include <algorithm>
9 #include <filesystem>
10 #include <map>
11 #include <sdbusplus/bus.hpp>
12 #include <vector>
13 
14 namespace witherspoon
15 {
16 namespace power
17 {
18 
19 // Error type, callout
20 using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>;
21 
22 /**
23  * @class UCD90160
24  *
25  * This class implements fault analysis for the UCD90160
26  * power sequencer device.
27  *
28  */
29 class UCD90160 : public Device
30 {
31   public:
32     UCD90160() = delete;
33     ~UCD90160() = default;
34     UCD90160(const UCD90160&) = delete;
35     UCD90160& operator=(const UCD90160&) = delete;
36     UCD90160(UCD90160&&) = default;
37     UCD90160& operator=(UCD90160&&) = default;
38 
39     /**
40      * Constructor
41      *
42      * @param[in] instance - the device instance number
43      * @param[in] bus - D-Bus bus object
44      */
45     UCD90160(size_t instance, sdbusplus::bus::bus& bus);
46 
47     /**
48      * Analyzes the device for errors when the device is
49      * known to be in an error state.  A log will be created.
50      */
51     void onFailure() override;
52 
53     /**
54      * Checks the device for errors and only creates a log
55      * if one is found.
56      */
57     void analyze() override;
58 
59     /**
60      * Clears faults in the device
61      */
62     void clearFaults() override
63     {
64     }
65 
66   private:
67     /**
68      * Reports an error for a GPU PGOOD failure
69      *
70      * @param[in] callout - the GPU callout string
71      */
72     void gpuPGOODError(const std::string& callout);
73 
74     /**
75      * Reports an error for a GPU OverTemp failure
76      *
77      * @param[in] callout - the GPU callout string
78      */
79     void gpuOverTempError(const std::string& callout);
80 
81     /**
82      * Given the device path for a chip, find its gpiochip
83      * path
84      *
85      * @param[in] path - device path, like
86      *                   /sys/devices/.../i2c-11/11-0064
87      *
88      * @return fs::path - The gpiochip path, like
89      *                   /dev/gpiochip1
90      */
91     static std::filesystem::path
92         findGPIODevice(const std::filesystem::path& path);
93 
94     /**
95      * Checks for VOUT faults on the device.
96      *
97      * This device can monitor voltages of its dependent
98      * devices, and VOUT faults are voltage faults
99      * on these devices.
100      *
101      * @return bool - true if an error log was created
102      */
103     bool checkVOUTFaults();
104 
105     /**
106      * Checks for PGOOD faults on the device.
107      *
108      * This device can monitor the PGOOD signals of its dependent
109      * devices, and this check will look for faults of
110      * those PGOODs.
111      *
112      * @param[in] polling - If this is running while polling for errors,
113      *                      as opposing to analyzing a fail condition.
114      *
115      * @return bool - true if an error log was created
116      */
117     bool checkPGOODFaults(bool polling);
118 
119     /**
120      * Creates an error log when the device has an error
121      * but it isn't a PGOOD or voltage failure.
122      */
123     void createPowerFaultLog();
124 
125     /**
126      * Reads the status_word register
127      *
128      * @return uint16_t - the register contents
129      */
130     uint16_t readStatusWord();
131 
132     /**
133      * Reads the mfr_status register
134      *
135      * @return uint32_t - the register contents
136      */
137     uint32_t readMFRStatus();
138 
139     /**
140      * Does any additional fault analysis based on the
141      * value of the extraAnalysisType field in the GPIOConfig
142      * entry.
143      *
144      * Used to get better callouts.
145      *
146      * @param[in] config - the GPIOConfig entry to use
147      *
148      * @return bool - true if a HW error was found, false else
149      */
150     bool doExtraAnalysis(const ucd90160::GPIConfig& config);
151 
152     /**
153      * Does additional fault analysis using GPIOs to
154      * specifically identify the failing part.
155      *
156      * Used when there are too many PGOOD inputs for
157      * the UCD90160 to handle, so just a summary bit
158      * is wired into the chip, and then the specific
159      * fault GPIOs are off of a different GPIO device,
160      * like an IO expander.
161      *
162      * @param[in] type - the type of analysis to do
163      *
164      * @return bool - true if a HW error was found, false else
165      */
166     bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
167 
168     /**
169      * Says if we've already logged a Vout fault
170      *
171      * The policy is only 1 of the same error will
172      * be logged for the duration of a class instance.
173      *
174      * @param[in] page - the page to check
175      *
176      * @return bool - if we've already logged a fault against
177      *                this page
178      */
179     inline bool isVoutFaultLogged(uint32_t page) const
180     {
181         return std::find(voutErrors.begin(), voutErrors.end(), page) !=
182                voutErrors.end();
183     }
184 
185     /**
186      * Saves that a Vout fault has been logged
187      *
188      * @param[in] page - the page the error was logged against
189      */
190     inline void setVoutFaultLogged(uint32_t page)
191     {
192         voutErrors.push_back(page);
193     }
194 
195     /**
196      * Says if we've already logged a PGOOD fault
197      *
198      * The policy is only 1 of the same errors will
199      * be logged for the duration of a class instance.
200      *
201      * @param[in] input - the input to check
202      *
203      * @return bool - if we've already logged a fault against
204      *                this input
205      */
206     inline bool isPGOODFaultLogged(uint32_t input) const
207     {
208         return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) !=
209                pgoodErrors.end();
210     }
211 
212     /**
213      * Says if we've already logged a specific fault
214      * against a specific part
215      *
216      * @param[in] callout - error type and name tuple
217      *
218      * @return bool - if we've already logged this fault
219      *                against this part
220      */
221     inline bool isPartCalledOut(const PartCallout& callout) const
222     {
223         return std::find(callouts.begin(), callouts.end(), callout) !=
224                callouts.end();
225     }
226 
227     /**
228      * Saves that a PGOOD fault has been logged
229      *
230      * @param[in] input - the input the error was logged against
231      */
232     inline void setPGOODFaultLogged(uint32_t input)
233     {
234         pgoodErrors.push_back(input);
235     }
236 
237     /**
238      * Saves that a specific fault on a specific part has been done
239      *
240      * @param[in] callout - error type and name tuple
241      */
242     inline void setPartCallout(const PartCallout& callout)
243     {
244         callouts.push_back(callout);
245     }
246 
247     /**
248      * List of pages that Vout errors have
249      * already been logged against
250      */
251     std::vector<uint32_t> voutErrors;
252 
253     /**
254      * List of inputs that PGOOD errors have
255      * already been logged against
256      */
257     std::vector<uint32_t> pgoodErrors;
258 
259     /**
260      * List of callouts that already been done
261      */
262     std::vector<PartCallout> callouts;
263 
264     /**
265      * The read/write interface to this hardware
266      */
267     pmbus::PMBus interface;
268 
269     /**
270      * A map of GPI pin IDs to the GPIO object
271      * used to access them
272      */
273     std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
274 
275     /**
276      * Keeps track of device access errors to avoid repeatedly
277      * logging errors for bad hardware
278      */
279     bool accessError = false;
280 
281     /**
282      * Keeps track of GPIO access errors when doing the in depth
283      * PGOOD fault analysis to avoid repeatedly logging errors
284      * for bad hardware
285      */
286     bool gpioAccessError = false;
287 
288     /**
289      * The path to the GPIO device used to read
290      * the GPI (PGOOD) status
291      */
292     std::filesystem::path gpioDevice;
293 
294     /**
295      * The D-Bus bus object
296      */
297     sdbusplus::bus::bus& bus;
298 
299     /**
300      * Map of device instance to the instance specific data
301      */
302     static const ucd90160::DeviceMap deviceMap;
303 };
304 
305 } // namespace power
306 } // namespace witherspoon
307