1 #pragma once 2 3 #include <algorithm> 4 #include <experimental/filesystem> 5 #include <map> 6 #include <sdbusplus/bus.hpp> 7 #include <vector> 8 #include "device.hpp" 9 #include "gpio.hpp" 10 #include "pmbus.hpp" 11 #include "types.hpp" 12 13 namespace witherspoon 14 { 15 namespace power 16 { 17 18 //Error type, callout 19 using PartCallout = 20 std::tuple<ucd90160::extraAnalysisType, std::string>; 21 22 /** 23 * @class UCD90160 24 * 25 * This class implements fault analysis for the UCD90160 26 * power sequencer device. 27 * 28 */ 29 class UCD90160 : public Device 30 { 31 public: 32 33 UCD90160() = delete; 34 ~UCD90160() = default; 35 UCD90160(const UCD90160&) = delete; 36 UCD90160& operator=(const UCD90160&) = delete; 37 UCD90160(UCD90160&&) = default; 38 UCD90160& operator=(UCD90160&&) = default; 39 40 /** 41 * Constructor 42 * 43 * @param[in] instance - the device instance number 44 * @param[in] bus - D-Bus bus object 45 */ 46 UCD90160(size_t instance, sdbusplus::bus::bus& bus); 47 48 /** 49 * Analyzes the device for errors when the device is 50 * known to be in an error state. A log will be created. 51 */ 52 void onFailure() override; 53 54 /** 55 * Checks the device for errors and only creates a log 56 * if one is found. 57 */ 58 void analyze() override; 59 60 /** 61 * Clears faults in the device 62 */ 63 void clearFaults() override 64 { 65 } 66 67 private: 68 69 /** 70 * Reports an error for a GPU PGOOD failure 71 * 72 * @param[in] callout - the GPU callout string 73 */ 74 void gpuPGOODError(const std::string& callout); 75 76 /** 77 * Reports an error for a GPU OverTemp failure 78 * 79 * @param[in] callout - the GPU callout string 80 */ 81 void gpuOverTempError(const std::string& callout); 82 83 /** 84 * Given the device path for a chip, find its gpiochip 85 * path 86 * 87 * @param[in] path - device path, like 88 * /sys/devices/.../i2c-11/11-0064 89 * 90 * @return fs::path - The gpiochip path, like 91 * /dev/gpiochip1 92 */ 93 static std::experimental::filesystem::path findGPIODevice( 94 const std::experimental::filesystem::path& path); 95 96 /** 97 * Checks for VOUT faults on the device. 98 * 99 * This device can monitor voltages of its dependent 100 * devices, and VOUT faults are voltage faults 101 * on these devices. 102 * 103 * @return bool - true if an error log was created 104 */ 105 bool checkVOUTFaults(); 106 107 /** 108 * Checks for PGOOD faults on the device. 109 * 110 * This device can monitor the PGOOD signals of its dependent 111 * devices, and this check will look for faults of 112 * those PGOODs. 113 * 114 * @param[in] polling - If this is running while polling for errors, 115 * as opposing to analyzing a fail condition. 116 * 117 * @return bool - true if an error log was created 118 */ 119 bool checkPGOODFaults(bool polling); 120 121 /** 122 * Creates an error log when the device has an error 123 * but it isn't a PGOOD or voltage failure. 124 */ 125 void createPowerFaultLog(); 126 127 /** 128 * Reads the status_word register 129 * 130 * @return uint16_t - the register contents 131 */ 132 uint16_t readStatusWord(); 133 134 /** 135 * Reads the mfr_status register 136 * 137 * @return uint32_t - the register contents 138 */ 139 uint32_t readMFRStatus(); 140 141 /** 142 * Does any additional fault analysis based on the 143 * value of the extraAnalysisType field in the GPIOConfig 144 * entry. 145 * 146 * Used to get better callouts. 147 * 148 * @param[in] config - the GPIOConfig entry to use 149 * 150 * @return bool - true if a HW error was found, false else 151 */ 152 bool doExtraAnalysis(const ucd90160::GPIConfig& config); 153 154 /** 155 * Does additional fault analysis using GPIOs to 156 * specifically identify the failing part. 157 * 158 * Used when there are too many PGOOD inputs for 159 * the UCD90160 to handle, so just a summary bit 160 * is wired into the chip, and then the specific 161 * fault GPIOs are off of a different GPIO device, 162 * like an IO expander. 163 * 164 * @param[in] type - the type of analysis to do 165 * 166 * @return bool - true if a HW error was found, false else 167 */ 168 bool doGPIOAnalysis(ucd90160::extraAnalysisType type); 169 170 /** 171 * Says if we've already logged a Vout fault 172 * 173 * The policy is only 1 of the same error will 174 * be logged for the duration of a class instance. 175 * 176 * @param[in] page - the page to check 177 * 178 * @return bool - if we've already logged a fault against 179 * this page 180 */ 181 inline bool isVoutFaultLogged(uint32_t page) const 182 { 183 return std::find(voutErrors.begin(), 184 voutErrors.end(), 185 page) != voutErrors.end(); 186 } 187 188 /** 189 * Saves that a Vout fault has been logged 190 * 191 * @param[in] page - the page the error was logged against 192 */ 193 inline void setVoutFaultLogged(uint32_t page) 194 { 195 voutErrors.push_back(page); 196 } 197 198 /** 199 * Says if we've already logged a PGOOD fault 200 * 201 * The policy is only 1 of the same errors will 202 * be logged for the duration of a class instance. 203 * 204 * @param[in] input - the input to check 205 * 206 * @return bool - if we've already logged a fault against 207 * this input 208 */ 209 inline bool isPGOODFaultLogged(uint32_t input) const 210 { 211 return std::find(pgoodErrors.begin(), 212 pgoodErrors.end(), 213 input) != pgoodErrors.end(); 214 } 215 216 /** 217 * Says if we've already logged a specific fault 218 * against a specific part 219 * 220 * @param[in] callout - error type and name tuple 221 * 222 * @return bool - if we've already logged this fault 223 * against this part 224 */ 225 inline bool isPartCalledOut(const PartCallout& callout) const 226 { 227 return std::find(callouts.begin(), 228 callouts.end(), 229 callout) != callouts.end(); 230 } 231 232 /** 233 * Saves that a PGOOD fault has been logged 234 * 235 * @param[in] input - the input the error was logged against 236 */ 237 inline void setPGOODFaultLogged(uint32_t input) 238 { 239 pgoodErrors.push_back(input); 240 } 241 242 /** 243 * Saves that a specific fault on a specific part has been done 244 * 245 * @param[in] callout - error type and name tuple 246 */ 247 inline void setPartCallout(const PartCallout& callout) 248 { 249 callouts.push_back(callout); 250 } 251 252 /** 253 * List of pages that Vout errors have 254 * already been logged against 255 */ 256 std::vector<uint32_t> voutErrors; 257 258 /** 259 * List of inputs that PGOOD errors have 260 * already been logged against 261 */ 262 std::vector<uint32_t> pgoodErrors; 263 264 /** 265 * List of callouts that already been done 266 */ 267 std::vector<PartCallout> callouts; 268 269 /** 270 * The read/write interface to this hardware 271 */ 272 pmbus::PMBus interface; 273 274 /** 275 * A map of GPI pin IDs to the GPIO object 276 * used to access them 277 */ 278 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios; 279 280 /** 281 * Keeps track of device access errors to avoid repeatedly 282 * logging errors for bad hardware 283 */ 284 bool accessError = false; 285 286 /** 287 * Keeps track of GPIO access errors when doing the in depth 288 * PGOOD fault analysis to avoid repeatedly logging errors 289 * for bad hardware 290 */ 291 bool gpioAccessError = false; 292 293 /** 294 * The path to the GPIO device used to read 295 * the GPI (PGOOD) status 296 */ 297 std::experimental::filesystem::path gpioDevice; 298 299 /** 300 * The D-Bus bus object 301 */ 302 sdbusplus::bus::bus& bus; 303 304 /** 305 * Map of device instance to the instance specific data 306 */ 307 static const ucd90160::DeviceMap deviceMap; 308 }; 309 310 } 311 } 312