1 #pragma once
2 
3 #include <algorithm>
4 #include <experimental/filesystem>
5 #include <map>
6 #include <vector>
7 #include "device.hpp"
8 #include "gpio.hpp"
9 #include "pmbus.hpp"
10 #include "types.hpp"
11 
12 namespace witherspoon
13 {
14 namespace power
15 {
16 
17 //Error type, callout
18 using PartCallout =
19         std::tuple<ucd90160::extraAnalysisType, std::string>;
20 
21 /**
22  * @class UCD90160
23  *
24  * This class implements fault analysis for the UCD90160
25  * power sequencer device.
26  *
27  */
28 class UCD90160 : public Device
29 {
30     public:
31 
32         UCD90160() = delete;
33         ~UCD90160() = default;
34         UCD90160(const UCD90160&) = delete;
35         UCD90160& operator=(const UCD90160&) = delete;
36         UCD90160(UCD90160&&) = default;
37         UCD90160& operator=(UCD90160&&) = default;
38 
39         /**
40          * Constructor
41          *
42          * @param[in] instance - the device instance number
43          */
44         UCD90160(size_t instance);
45 
46         /**
47          * Analyzes the device for errors when the device is
48          * known to be in an error state.  A log will be created.
49          */
50         void onFailure() override;
51 
52         /**
53          * Checks the device for errors and only creates a log
54          * if one is found.
55          */
56         void analyze() override;
57 
58         /**
59          * Clears faults in the device
60          */
61         void clearFaults() override
62         {
63         }
64 
65     private:
66 
67         /**
68          * Reports an error for a GPU PGOOD failure
69          *
70          * @param[in] callout - the GPU callout string
71          */
72         void gpuPGOODError(const std::string& callout);
73 
74         /**
75          * Reports an error for a GPU OverTemp failure
76          *
77          * @param[in] callout - the GPU callout string
78          */
79         void gpuOverTempError(const std::string& callout);
80 
81         /**
82          * Given the device path for a chip, find its gpiochip
83          * path
84          *
85          * @param[in] path - device path, like
86          *                   /sys/devices/.../i2c-11/11-0064
87          *
88          * @return fs::path - The gpiochip path, like
89          *                   /dev/gpiochip1
90          */
91         static std::experimental::filesystem::path findGPIODevice(
92                 const std::experimental::filesystem::path& path);
93 
94         /**
95          * Checks for VOUT faults on the device.
96          *
97          * This device can monitor voltages of its dependent
98          * devices, and VOUT faults are voltage faults
99          * on these devices.
100          *
101          * @return bool - true if an error log was created
102          */
103         bool checkVOUTFaults();
104 
105         /**
106          * Checks for PGOOD faults on the device.
107          *
108          * This device can monitor the PGOOD signals of its dependent
109          * devices, and this check will look for faults of
110          * those PGOODs.
111          *
112          * @param[in] polling - If this is running while polling for errors,
113          *                      as opposing to analyzing a fail condition.
114          *
115          * @return bool - true if an error log was created
116          */
117          bool checkPGOODFaults(bool polling);
118 
119         /**
120          * Creates an error log when the device has an error
121          * but it isn't a PGOOD or voltage failure.
122          */
123         void createPowerFaultLog();
124 
125         /**
126          * Reads the status_word register
127          *
128          * @return uint16_t - the register contents
129          */
130         uint16_t readStatusWord();
131 
132         /**
133          * Reads the mfr_status register
134          *
135          * @return uint32_t - the register contents
136          */
137         uint32_t readMFRStatus();
138 
139         /**
140          * Does any additional fault analysis based on the
141          * value of the extraAnalysisType field in the GPIOConfig
142          * entry.
143          *
144          * Used to get better callouts.
145          *
146          * @param[in] config - the GPIOConfig entry to use
147          *
148          * @return bool - true if a HW error was found, false else
149          */
150         bool doExtraAnalysis(const ucd90160::GPIConfig& config);
151 
152         /**
153          * Does additional fault analysis using GPIOs to
154          * specifically identify the failing part.
155          *
156          * Used when there are too many PGOOD inputs for
157          * the UCD90160 to handle, so just a summary bit
158          * is wired into the chip, and then the specific
159          * fault GPIOs are off of a different GPIO device,
160          * like an IO expander.
161          *
162          * @param[in] type - the type of analysis to do
163          *
164          * @return bool - true if a HW error was found, false else
165          */
166         bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
167 
168         /**
169          * Says if we've already logged a Vout fault
170          *
171          * The policy is only 1 of the same error will
172          * be logged for the duration of a class instance.
173          *
174          * @param[in] page - the page to check
175          *
176          * @return bool - if we've already logged a fault against
177          *                this page
178          */
179         inline bool isVoutFaultLogged(uint32_t page) const
180         {
181             return std::find(voutErrors.begin(),
182                              voutErrors.end(),
183                              page) != voutErrors.end();
184         }
185 
186         /**
187          * Saves that a Vout fault has been logged
188          *
189          * @param[in] page - the page the error was logged against
190          */
191         inline void setVoutFaultLogged(uint32_t page)
192         {
193             voutErrors.push_back(page);
194         }
195 
196         /**
197          * Says if we've already logged a PGOOD fault
198          *
199          * The policy is only 1 of the same errors will
200          * be logged for the duration of a class instance.
201          *
202          * @param[in] input - the input to check
203          *
204          * @return bool - if we've already logged a fault against
205          *                this input
206          */
207         inline bool isPGOODFaultLogged(uint32_t input) const
208         {
209             return std::find(pgoodErrors.begin(),
210                              pgoodErrors.end(),
211                              input) != pgoodErrors.end();
212         }
213 
214         /**
215          * Says if we've already logged a specific fault
216          * against a specific part
217          *
218          * @param[in] callout - error type and name tuple
219          *
220          * @return bool - if we've already logged this fault
221          *                against this part
222          */
223         inline bool isPartCalledOut(const PartCallout& callout) const
224         {
225             return std::find(callouts.begin(),
226                              callouts.end(),
227                              callout) != callouts.end();
228         }
229 
230         /**
231          * Saves that a PGOOD fault has been logged
232          *
233          * @param[in] input - the input the error was logged against
234          */
235         inline void setPGOODFaultLogged(uint32_t input)
236         {
237             pgoodErrors.push_back(input);
238         }
239 
240         /**
241          * Saves that a specific fault on a specific part has been done
242          *
243          * @param[in] callout - error type and name tuple
244          */
245         inline void setPartCallout(const PartCallout& callout)
246         {
247             callouts.push_back(callout);
248         }
249 
250         /**
251          * List of pages that Vout errors have
252          * already been logged against
253          */
254         std::vector<uint32_t> voutErrors;
255 
256         /**
257          * List of inputs that PGOOD errors have
258          * already been logged against
259          */
260         std::vector<uint32_t> pgoodErrors;
261 
262         /**
263          * List of callouts that already been done
264          */
265         std::vector<PartCallout> callouts;
266 
267         /**
268          * The read/write interface to this hardware
269          */
270         pmbus::PMBus interface;
271 
272         /**
273          * A map of GPI pin IDs to the GPIO object
274          * used to access them
275          */
276         std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
277 
278         /**
279          * Keeps track of device access errors to avoid repeatedly
280          * logging errors for bad hardware
281          */
282         bool accessError = false;
283 
284         /**
285          * Keeps track of GPIO access errors when doing the in depth
286          * PGOOD fault analysis to avoid repeatedly logging errors
287          * for bad hardware
288          */
289         bool gpioAccessError = false;
290 
291         /**
292          * The path to the GPIO device used to read
293          * the GPI (PGOOD) status
294          */
295         std::experimental::filesystem::path gpioDevice;
296 
297         /**
298          * Map of device instance to the instance specific data
299          */
300         static const ucd90160::DeviceMap deviceMap;
301 };
302 
303 }
304 }
305