1 #pragma once
2 
3 #include <algorithm>
4 #include <experimental/filesystem>
5 #include <map>
6 #include <sdbusplus/bus.hpp>
7 #include <vector>
8 #include "device.hpp"
9 #include "gpio.hpp"
10 #include "pmbus.hpp"
11 #include "types.hpp"
12 
13 namespace witherspoon
14 {
15 namespace power
16 {
17 
18 //Error type, callout
19 using PartCallout =
20         std::tuple<ucd90160::extraAnalysisType, std::string>;
21 
22 /**
23  * @class UCD90160
24  *
25  * This class implements fault analysis for the UCD90160
26  * power sequencer device.
27  *
28  */
29 class UCD90160 : public Device
30 {
31     public:
32 
33         UCD90160() = delete;
34         ~UCD90160() = default;
35         UCD90160(const UCD90160&) = delete;
36         UCD90160& operator=(const UCD90160&) = delete;
37         UCD90160(UCD90160&&) = default;
38         UCD90160& operator=(UCD90160&&) = default;
39 
40         /**
41          * Constructor
42          *
43          * @param[in] instance - the device instance number
44          * @param[in] bus - D-Bus bus object
45          */
46         UCD90160(size_t instance, sdbusplus::bus::bus& bus);
47 
48         /**
49          * Analyzes the device for errors when the device is
50          * known to be in an error state.  A log will be created.
51          */
52         void onFailure() override;
53 
54         /**
55          * Checks the device for errors and only creates a log
56          * if one is found.
57          */
58         void analyze() override;
59 
60         /**
61          * Clears faults in the device
62          */
63         void clearFaults() override
64         {
65         }
66 
67     private:
68 
69         /**
70          * Reports an error for a GPU PGOOD failure
71          *
72          * @param[in] callout - the GPU callout string
73          */
74         void gpuPGOODError(const std::string& callout);
75 
76         /**
77          * Reports an error for a GPU OverTemp failure
78          *
79          * @param[in] callout - the GPU callout string
80          */
81         void gpuOverTempError(const std::string& callout);
82 
83         /**
84          * Given the device path for a chip, find its gpiochip
85          * path
86          *
87          * @param[in] path - device path, like
88          *                   /sys/devices/.../i2c-11/11-0064
89          *
90          * @return fs::path - The gpiochip path, like
91          *                   /dev/gpiochip1
92          */
93         static std::experimental::filesystem::path findGPIODevice(
94                 const std::experimental::filesystem::path& path);
95 
96         /**
97          * Checks for VOUT faults on the device.
98          *
99          * This device can monitor voltages of its dependent
100          * devices, and VOUT faults are voltage faults
101          * on these devices.
102          *
103          * @return bool - true if an error log was created
104          */
105         bool checkVOUTFaults();
106 
107         /**
108          * Checks for PGOOD faults on the device.
109          *
110          * This device can monitor the PGOOD signals of its dependent
111          * devices, and this check will look for faults of
112          * those PGOODs.
113          *
114          * @param[in] polling - If this is running while polling for errors,
115          *                      as opposing to analyzing a fail condition.
116          *
117          * @return bool - true if an error log was created
118          */
119          bool checkPGOODFaults(bool polling);
120 
121         /**
122          * Creates an error log when the device has an error
123          * but it isn't a PGOOD or voltage failure.
124          */
125         void createPowerFaultLog();
126 
127         /**
128          * Reads the status_word register
129          *
130          * @return uint16_t - the register contents
131          */
132         uint16_t readStatusWord();
133 
134         /**
135          * Reads the mfr_status register
136          *
137          * @return uint32_t - the register contents
138          */
139         uint32_t readMFRStatus();
140 
141         /**
142          * Does any additional fault analysis based on the
143          * value of the extraAnalysisType field in the GPIOConfig
144          * entry.
145          *
146          * Used to get better callouts.
147          *
148          * @param[in] config - the GPIOConfig entry to use
149          *
150          * @return bool - true if a HW error was found, false else
151          */
152         bool doExtraAnalysis(const ucd90160::GPIConfig& config);
153 
154         /**
155          * Does additional fault analysis using GPIOs to
156          * specifically identify the failing part.
157          *
158          * Used when there are too many PGOOD inputs for
159          * the UCD90160 to handle, so just a summary bit
160          * is wired into the chip, and then the specific
161          * fault GPIOs are off of a different GPIO device,
162          * like an IO expander.
163          *
164          * @param[in] type - the type of analysis to do
165          *
166          * @return bool - true if a HW error was found, false else
167          */
168         bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
169 
170         /**
171          * Says if we've already logged a Vout fault
172          *
173          * The policy is only 1 of the same error will
174          * be logged for the duration of a class instance.
175          *
176          * @param[in] page - the page to check
177          *
178          * @return bool - if we've already logged a fault against
179          *                this page
180          */
181         inline bool isVoutFaultLogged(uint32_t page) const
182         {
183             return std::find(voutErrors.begin(),
184                              voutErrors.end(),
185                              page) != voutErrors.end();
186         }
187 
188         /**
189          * Saves that a Vout fault has been logged
190          *
191          * @param[in] page - the page the error was logged against
192          */
193         inline void setVoutFaultLogged(uint32_t page)
194         {
195             voutErrors.push_back(page);
196         }
197 
198         /**
199          * Says if we've already logged a PGOOD fault
200          *
201          * The policy is only 1 of the same errors will
202          * be logged for the duration of a class instance.
203          *
204          * @param[in] input - the input to check
205          *
206          * @return bool - if we've already logged a fault against
207          *                this input
208          */
209         inline bool isPGOODFaultLogged(uint32_t input) const
210         {
211             return std::find(pgoodErrors.begin(),
212                              pgoodErrors.end(),
213                              input) != pgoodErrors.end();
214         }
215 
216         /**
217          * Says if we've already logged a specific fault
218          * against a specific part
219          *
220          * @param[in] callout - error type and name tuple
221          *
222          * @return bool - if we've already logged this fault
223          *                against this part
224          */
225         inline bool isPartCalledOut(const PartCallout& callout) const
226         {
227             return std::find(callouts.begin(),
228                              callouts.end(),
229                              callout) != callouts.end();
230         }
231 
232         /**
233          * Saves that a PGOOD fault has been logged
234          *
235          * @param[in] input - the input the error was logged against
236          */
237         inline void setPGOODFaultLogged(uint32_t input)
238         {
239             pgoodErrors.push_back(input);
240         }
241 
242         /**
243          * Saves that a specific fault on a specific part has been done
244          *
245          * @param[in] callout - error type and name tuple
246          */
247         inline void setPartCallout(const PartCallout& callout)
248         {
249             callouts.push_back(callout);
250         }
251 
252         /**
253          * List of pages that Vout errors have
254          * already been logged against
255          */
256         std::vector<uint32_t> voutErrors;
257 
258         /**
259          * List of inputs that PGOOD errors have
260          * already been logged against
261          */
262         std::vector<uint32_t> pgoodErrors;
263 
264         /**
265          * List of callouts that already been done
266          */
267         std::vector<PartCallout> callouts;
268 
269         /**
270          * The read/write interface to this hardware
271          */
272         pmbus::PMBus interface;
273 
274         /**
275          * A map of GPI pin IDs to the GPIO object
276          * used to access them
277          */
278         std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
279 
280         /**
281          * Keeps track of device access errors to avoid repeatedly
282          * logging errors for bad hardware
283          */
284         bool accessError = false;
285 
286         /**
287          * Keeps track of GPIO access errors when doing the in depth
288          * PGOOD fault analysis to avoid repeatedly logging errors
289          * for bad hardware
290          */
291         bool gpioAccessError = false;
292 
293         /**
294          * The path to the GPIO device used to read
295          * the GPI (PGOOD) status
296          */
297         std::experimental::filesystem::path gpioDevice;
298 
299         /**
300          * The D-Bus bus object
301          */
302         sdbusplus::bus::bus& bus;
303 
304         /**
305          * Map of device instance to the instance specific data
306          */
307         static const ucd90160::DeviceMap deviceMap;
308 };
309 
310 }
311 }
312