1 #pragma once 2 3 #include "device.hpp" 4 #include "gpio.hpp" 5 #include "pmbus.hpp" 6 #include "types.hpp" 7 8 #include <algorithm> 9 #include <filesystem> 10 #include <map> 11 #include <sdbusplus/bus.hpp> 12 #include <vector> 13 14 namespace witherspoon 15 { 16 namespace power 17 { 18 19 // Error type, callout 20 using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>; 21 22 /** 23 * @class UCD90160 24 * 25 * This class implements fault analysis for the UCD90160 26 * power sequencer device. 27 * 28 */ 29 class UCD90160 : public Device 30 { 31 public: 32 UCD90160() = delete; 33 ~UCD90160() = default; 34 UCD90160(const UCD90160&) = delete; 35 UCD90160& operator=(const UCD90160&) = delete; 36 UCD90160(UCD90160&&) = default; 37 UCD90160& operator=(UCD90160&&) = default; 38 39 /** 40 * Constructor 41 * 42 * @param[in] instance - the device instance number 43 * @param[in] bus - D-Bus bus object 44 */ 45 UCD90160(size_t instance, sdbusplus::bus::bus& bus); 46 47 /** 48 * Analyzes the device for errors when the device is 49 * known to be in an error state. A log will be created. 50 */ 51 void onFailure() override; 52 53 /** 54 * Checks the device for errors and only creates a log 55 * if one is found. 56 */ 57 void analyze() override; 58 59 /** 60 * Clears faults in the device 61 */ 62 void clearFaults() override 63 { 64 } 65 66 private: 67 /** 68 * Reports an error for a GPU PGOOD failure 69 * 70 * @param[in] callout - the GPU callout string 71 */ 72 void gpuPGOODError(const std::string& callout); 73 74 /** 75 * Reports an error for a GPU OverTemp failure 76 * 77 * @param[in] callout - the GPU callout string 78 */ 79 void gpuOverTempError(const std::string& callout); 80 81 /** 82 * Given the device path for a chip, find its gpiochip 83 * path 84 * 85 * @param[in] path - device path, like 86 * /sys/devices/.../i2c-11/11-0064 87 * 88 * @return fs::path - The gpiochip path, like 89 * /dev/gpiochip1 90 */ 91 static std::filesystem::path 92 findGPIODevice(const std::filesystem::path& path); 93 94 /** 95 * Checks for VOUT faults on the device. 96 * 97 * This device can monitor voltages of its dependent 98 * devices, and VOUT faults are voltage faults 99 * on these devices. 100 * 101 * @return bool - true if an error log was created 102 */ 103 bool checkVOUTFaults(); 104 105 /** 106 * Checks for PGOOD faults on the device. 107 * 108 * This device can monitor the PGOOD signals of its dependent 109 * devices, and this check will look for faults of 110 * those PGOODs. 111 * 112 * @param[in] polling - If this is running while polling for errors, 113 * as opposing to analyzing a fail condition. 114 * 115 * @return bool - true if an error log was created 116 */ 117 bool checkPGOODFaults(bool polling); 118 119 /** 120 * Creates an error log when the device has an error 121 * but it isn't a PGOOD or voltage failure. 122 */ 123 void createPowerFaultLog(); 124 125 /** 126 * Reads the status_word register 127 * 128 * @return uint16_t - the register contents 129 */ 130 uint16_t readStatusWord(); 131 132 /** 133 * Reads the mfr_status register 134 * 135 * @return uint32_t - the register contents 136 */ 137 uint32_t readMFRStatus(); 138 139 /** 140 * Does any additional fault analysis based on the 141 * value of the extraAnalysisType field in the GPIOConfig 142 * entry. 143 * 144 * Used to get better callouts. 145 * 146 * @param[in] config - the GPIOConfig entry to use 147 * 148 * @return bool - true if a HW error was found, false else 149 */ 150 bool doExtraAnalysis(const ucd90160::GPIConfig& config); 151 152 /** 153 * Does additional fault analysis using GPIOs to 154 * specifically identify the failing part. 155 * 156 * Used when there are too many PGOOD inputs for 157 * the UCD90160 to handle, so just a summary bit 158 * is wired into the chip, and then the specific 159 * fault GPIOs are off of a different GPIO device, 160 * like an IO expander. 161 * 162 * @param[in] type - the type of analysis to do 163 * 164 * @return bool - true if a HW error was found, false else 165 */ 166 bool doGPIOAnalysis(ucd90160::extraAnalysisType type); 167 168 /** 169 * Says if we've already logged a Vout fault 170 * 171 * The policy is only 1 of the same error will 172 * be logged for the duration of a class instance. 173 * 174 * @param[in] page - the page to check 175 * 176 * @return bool - if we've already logged a fault against 177 * this page 178 */ 179 inline bool isVoutFaultLogged(uint32_t page) const 180 { 181 return std::find(voutErrors.begin(), voutErrors.end(), page) != 182 voutErrors.end(); 183 } 184 185 /** 186 * Saves that a Vout fault has been logged 187 * 188 * @param[in] page - the page the error was logged against 189 */ 190 inline void setVoutFaultLogged(uint32_t page) 191 { 192 voutErrors.push_back(page); 193 } 194 195 /** 196 * Says if we've already logged a PGOOD fault 197 * 198 * The policy is only 1 of the same errors will 199 * be logged for the duration of a class instance. 200 * 201 * @param[in] input - the input to check 202 * 203 * @return bool - if we've already logged a fault against 204 * this input 205 */ 206 inline bool isPGOODFaultLogged(uint32_t input) const 207 { 208 return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) != 209 pgoodErrors.end(); 210 } 211 212 /** 213 * Says if we've already logged a specific fault 214 * against a specific part 215 * 216 * @param[in] callout - error type and name tuple 217 * 218 * @return bool - if we've already logged this fault 219 * against this part 220 */ 221 inline bool isPartCalledOut(const PartCallout& callout) const 222 { 223 return std::find(callouts.begin(), callouts.end(), callout) != 224 callouts.end(); 225 } 226 227 /** 228 * Saves that a PGOOD fault has been logged 229 * 230 * @param[in] input - the input the error was logged against 231 */ 232 inline void setPGOODFaultLogged(uint32_t input) 233 { 234 pgoodErrors.push_back(input); 235 } 236 237 /** 238 * Saves that a specific fault on a specific part has been done 239 * 240 * @param[in] callout - error type and name tuple 241 */ 242 inline void setPartCallout(const PartCallout& callout) 243 { 244 callouts.push_back(callout); 245 } 246 247 /** 248 * List of pages that Vout errors have 249 * already been logged against 250 */ 251 std::vector<uint32_t> voutErrors; 252 253 /** 254 * List of inputs that PGOOD errors have 255 * already been logged against 256 */ 257 std::vector<uint32_t> pgoodErrors; 258 259 /** 260 * List of callouts that already been done 261 */ 262 std::vector<PartCallout> callouts; 263 264 /** 265 * The read/write interface to this hardware 266 */ 267 pmbus::PMBus interface; 268 269 /** 270 * A map of GPI pin IDs to the GPIO object 271 * used to access them 272 */ 273 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios; 274 275 /** 276 * Keeps track of device access errors to avoid repeatedly 277 * logging errors for bad hardware 278 */ 279 bool accessError = false; 280 281 /** 282 * Keeps track of GPIO access errors when doing the in depth 283 * PGOOD fault analysis to avoid repeatedly logging errors 284 * for bad hardware 285 */ 286 bool gpioAccessError = false; 287 288 /** 289 * The path to the GPIO device used to read 290 * the GPI (PGOOD) status 291 */ 292 std::filesystem::path gpioDevice; 293 294 /** 295 * The D-Bus bus object 296 */ 297 sdbusplus::bus::bus& bus; 298 299 /** 300 * Map of device instance to the instance specific data 301 */ 302 static const ucd90160::DeviceMap deviceMap; 303 }; 304 305 } // namespace power 306 } // namespace witherspoon 307