1 #pragma once
2 
3 #include "device.hpp"
4 #include "gpio.hpp"
5 #include "pmbus.hpp"
6 #include "types.hpp"
7 
8 #include <sdbusplus/bus.hpp>
9 
10 #include <algorithm>
11 #include <filesystem>
12 #include <map>
13 #include <vector>
14 
15 namespace phosphor
16 {
17 namespace power
18 {
19 
20 // Error type, callout
21 using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>;
22 
23 /**
24  * @class UCD90160
25  *
26  * This class implements fault analysis for the UCD90160
27  * power sequencer device.
28  *
29  */
30 class UCD90160 : public Device
31 {
32   public:
33     UCD90160() = delete;
34     ~UCD90160() = default;
35     UCD90160(const UCD90160&) = delete;
36     UCD90160& operator=(const UCD90160&) = delete;
37     UCD90160(UCD90160&&) = default;
38     UCD90160& operator=(UCD90160&&) = default;
39 
40     /**
41      * Constructor
42      *
43      * @param[in] instance - the device instance number
44      * @param[in] bus - D-Bus bus object
45      */
46     UCD90160(size_t instance, sdbusplus::bus_t& bus);
47 
48     /**
49      * Analyzes the device for errors when the device is
50      * known to be in an error state.  A log will be created.
51      */
52     void onFailure() override;
53 
54     /**
55      * Checks the device for errors and only creates a log
56      * if one is found.
57      */
58     void analyze() override;
59 
60     /**
61      * Clears faults in the device
62      */
clearFaults()63     void clearFaults() override {}
64 
65   private:
66     /**
67      * Reports an error for a GPU PGOOD failure
68      *
69      * @param[in] callout - the GPU callout string
70      */
71     void gpuPGOODError(const std::string& callout);
72 
73     /**
74      * Reports an error for a GPU OverTemp failure
75      *
76      * @param[in] callout - the GPU callout string
77      */
78     void gpuOverTempError(const std::string& callout);
79 
80     /**
81      * Reports an error for a MEM_GOODx failure.
82      *
83      * @param[in] callout - The MEM callout string
84      */
85     void memGoodError(const std::string& callout);
86 
87     /**
88      * Given the device path for a chip, find its gpiochip
89      * path
90      *
91      * @param[in] path - device path, like
92      *                   /sys/devices/.../i2c-11/11-0064
93      *
94      * @return fs::path - The gpiochip path, like
95      *                   /dev/gpiochip1
96      */
97     static std::filesystem::path
98         findGPIODevice(const std::filesystem::path& path);
99 
100     /**
101      * Checks for VOUT faults on the device.
102      *
103      * This device can monitor voltages of its dependent
104      * devices, and VOUT faults are voltage faults
105      * on these devices.
106      *
107      * @return bool - true if an error log was created
108      */
109     bool checkVOUTFaults();
110 
111     /**
112      * Checks for PGOOD faults on the device.
113      *
114      * This device can monitor the PGOOD signals of its dependent
115      * devices, and this check will look for faults of
116      * those PGOODs.
117      *
118      * @param[in] polling - If this is running while polling for errors,
119      *                      as opposing to analyzing a fail condition.
120      *
121      * @return bool - true if an error log was created
122      */
123     bool checkPGOODFaults(bool polling);
124 
125     /**
126      * Creates an error log when the device has an error
127      * but it isn't a PGOOD or voltage failure.
128      */
129     void createPowerFaultLog();
130 
131     /**
132      * Reads the status_word register
133      *
134      * @return uint16_t - the register contents
135      */
136     uint16_t readStatusWord();
137 
138     /**
139      * Reads the mfr_status register
140      *
141      * @return uint32_t - the register contents
142      */
143     uint32_t readMFRStatus();
144 
145     /**
146      * Does any additional fault analysis based on the
147      * value of the extraAnalysisType field in the GPIOConfig
148      * entry.
149      *
150      * Used to get better callouts.
151      *
152      * @param[in] config - the GPIOConfig entry to use
153      *
154      * @return bool - true if a HW error was found, false else
155      */
156     bool doExtraAnalysis(const ucd90160::GPIConfig& config);
157 
158     /**
159      * Does additional fault analysis using GPIOs to
160      * specifically identify the failing part.
161      *
162      * Used when there are too many PGOOD inputs for
163      * the UCD90160 to handle, so just a summary bit
164      * is wired into the chip, and then the specific
165      * fault GPIOs are off of a different GPIO device,
166      * like an IO expander.
167      *
168      * @param[in] type - the type of analysis to do
169      *
170      * @return bool - true if a HW error was found, false else
171      */
172     bool doGPIOAnalysis(ucd90160::extraAnalysisType type);
173 
174     /**
175      * Says if we've already logged a Vout fault
176      *
177      * The policy is only 1 of the same error will
178      * be logged for the duration of a class instance.
179      *
180      * @param[in] page - the page to check
181      *
182      * @return bool - if we've already logged a fault against
183      *                this page
184      */
isVoutFaultLogged(uint32_t page) const185     inline bool isVoutFaultLogged(uint32_t page) const
186     {
187         return std::find(voutErrors.begin(), voutErrors.end(), page) !=
188                voutErrors.end();
189     }
190 
191     /**
192      * Saves that a Vout fault has been logged
193      *
194      * @param[in] page - the page the error was logged against
195      */
setVoutFaultLogged(uint32_t page)196     inline void setVoutFaultLogged(uint32_t page)
197     {
198         voutErrors.push_back(page);
199     }
200 
201     /**
202      * Says if we've already logged a PGOOD fault
203      *
204      * The policy is only 1 of the same errors will
205      * be logged for the duration of a class instance.
206      *
207      * @param[in] input - the input to check
208      *
209      * @return bool - if we've already logged a fault against
210      *                this input
211      */
isPGOODFaultLogged(uint32_t input) const212     inline bool isPGOODFaultLogged(uint32_t input) const
213     {
214         return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) !=
215                pgoodErrors.end();
216     }
217 
218     /**
219      * Says if we've already logged a specific fault
220      * against a specific part
221      *
222      * @param[in] callout - error type and name tuple
223      *
224      * @return bool - if we've already logged this fault
225      *                against this part
226      */
isPartCalledOut(const PartCallout & callout) const227     inline bool isPartCalledOut(const PartCallout& callout) const
228     {
229         return std::find(callouts.begin(), callouts.end(), callout) !=
230                callouts.end();
231     }
232 
233     /**
234      * Saves that a PGOOD fault has been logged
235      *
236      * @param[in] input - the input the error was logged against
237      */
setPGOODFaultLogged(uint32_t input)238     inline void setPGOODFaultLogged(uint32_t input)
239     {
240         pgoodErrors.push_back(input);
241     }
242 
243     /**
244      * Saves that a specific fault on a specific part has been done
245      *
246      * @param[in] callout - error type and name tuple
247      */
setPartCallout(const PartCallout & callout)248     inline void setPartCallout(const PartCallout& callout)
249     {
250         callouts.push_back(callout);
251     }
252 
253     /**
254      * List of pages that Vout errors have
255      * already been logged against
256      */
257     std::vector<uint32_t> voutErrors;
258 
259     /**
260      * List of inputs that PGOOD errors have
261      * already been logged against
262      */
263     std::vector<uint32_t> pgoodErrors;
264 
265     /**
266      * List of callouts that already been done
267      */
268     std::vector<PartCallout> callouts;
269 
270     /**
271      * The read/write interface to this hardware
272      */
273     pmbus::PMBus interface;
274 
275     /**
276      * A map of GPI pin IDs to the GPIO object
277      * used to access them
278      */
279     std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios;
280 
281     /**
282      * Keeps track of device access errors to avoid repeatedly
283      * logging errors for bad hardware
284      */
285     bool accessError = false;
286 
287     /**
288      * Keeps track of GPIO access errors when doing the in depth
289      * PGOOD fault analysis to avoid repeatedly logging errors
290      * for bad hardware
291      */
292     bool gpioAccessError = false;
293 
294     /**
295      * The path to the GPIO device used to read
296      * the GPI (PGOOD) status
297      */
298     std::filesystem::path gpioDevice;
299 
300     /**
301      * The D-Bus bus object
302      */
303     sdbusplus::bus_t& bus;
304 
305     /**
306      * Map of device instance to the instance specific data
307      */
308     static const ucd90160::DeviceMap deviceMap;
309 };
310 
311 } // namespace power
312 } // namespace phosphor
313