1 #pragma once 2 3 #include "device.hpp" 4 #include "gpio.hpp" 5 #include "pmbus.hpp" 6 #include "types.hpp" 7 8 #include <sdbusplus/bus.hpp> 9 10 #include <algorithm> 11 #include <filesystem> 12 #include <map> 13 #include <vector> 14 15 namespace phosphor 16 { 17 namespace power 18 { 19 20 // Error type, callout 21 using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>; 22 23 /** 24 * @class UCD90160 25 * 26 * This class implements fault analysis for the UCD90160 27 * power sequencer device. 28 * 29 */ 30 class UCD90160 : public Device 31 { 32 public: 33 UCD90160() = delete; 34 ~UCD90160() = default; 35 UCD90160(const UCD90160&) = delete; 36 UCD90160& operator=(const UCD90160&) = delete; 37 UCD90160(UCD90160&&) = default; 38 UCD90160& operator=(UCD90160&&) = default; 39 40 /** 41 * Constructor 42 * 43 * @param[in] instance - the device instance number 44 * @param[in] bus - D-Bus bus object 45 */ 46 UCD90160(size_t instance, sdbusplus::bus_t& bus); 47 48 /** 49 * Analyzes the device for errors when the device is 50 * known to be in an error state. A log will be created. 51 */ 52 void onFailure() override; 53 54 /** 55 * Checks the device for errors and only creates a log 56 * if one is found. 57 */ 58 void analyze() override; 59 60 /** 61 * Clears faults in the device 62 */ clearFaults()63 void clearFaults() override {} 64 65 private: 66 /** 67 * Reports an error for a GPU PGOOD failure 68 * 69 * @param[in] callout - the GPU callout string 70 */ 71 void gpuPGOODError(const std::string& callout); 72 73 /** 74 * Reports an error for a GPU OverTemp failure 75 * 76 * @param[in] callout - the GPU callout string 77 */ 78 void gpuOverTempError(const std::string& callout); 79 80 /** 81 * Reports an error for a MEM_GOODx failure. 82 * 83 * @param[in] callout - The MEM callout string 84 */ 85 void memGoodError(const std::string& callout); 86 87 /** 88 * Given the device path for a chip, find its gpiochip 89 * path 90 * 91 * @param[in] path - device path, like 92 * /sys/devices/.../i2c-11/11-0064 93 * 94 * @return fs::path - The gpiochip path, like 95 * /dev/gpiochip1 96 */ 97 static std::filesystem::path findGPIODevice( 98 const std::filesystem::path& path); 99 100 /** 101 * Checks for VOUT faults on the device. 102 * 103 * This device can monitor voltages of its dependent 104 * devices, and VOUT faults are voltage faults 105 * on these devices. 106 * 107 * @return bool - true if an error log was created 108 */ 109 bool checkVOUTFaults(); 110 111 /** 112 * Checks for PGOOD faults on the device. 113 * 114 * This device can monitor the PGOOD signals of its dependent 115 * devices, and this check will look for faults of 116 * those PGOODs. 117 * 118 * @param[in] polling - If this is running while polling for errors, 119 * as opposing to analyzing a fail condition. 120 * 121 * @return bool - true if an error log was created 122 */ 123 bool checkPGOODFaults(bool polling); 124 125 /** 126 * Creates an error log when the device has an error 127 * but it isn't a PGOOD or voltage failure. 128 */ 129 void createPowerFaultLog(); 130 131 /** 132 * Reads the status_word register 133 * 134 * @return uint16_t - the register contents 135 */ 136 uint16_t readStatusWord(); 137 138 /** 139 * Reads the mfr_status register 140 * 141 * @return uint32_t - the register contents 142 */ 143 uint32_t readMFRStatus(); 144 145 /** 146 * Does any additional fault analysis based on the 147 * value of the extraAnalysisType field in the GPIOConfig 148 * entry. 149 * 150 * Used to get better callouts. 151 * 152 * @param[in] config - the GPIOConfig entry to use 153 * 154 * @return bool - true if a HW error was found, false else 155 */ 156 bool doExtraAnalysis(const ucd90160::GPIConfig& config); 157 158 /** 159 * Does additional fault analysis using GPIOs to 160 * specifically identify the failing part. 161 * 162 * Used when there are too many PGOOD inputs for 163 * the UCD90160 to handle, so just a summary bit 164 * is wired into the chip, and then the specific 165 * fault GPIOs are off of a different GPIO device, 166 * like an IO expander. 167 * 168 * @param[in] type - the type of analysis to do 169 * 170 * @return bool - true if a HW error was found, false else 171 */ 172 bool doGPIOAnalysis(ucd90160::extraAnalysisType type); 173 174 /** 175 * Says if we've already logged a Vout fault 176 * 177 * The policy is only 1 of the same error will 178 * be logged for the duration of a class instance. 179 * 180 * @param[in] page - the page to check 181 * 182 * @return bool - if we've already logged a fault against 183 * this page 184 */ isVoutFaultLogged(uint32_t page) const185 inline bool isVoutFaultLogged(uint32_t page) const 186 { 187 return std::find(voutErrors.begin(), voutErrors.end(), page) != 188 voutErrors.end(); 189 } 190 191 /** 192 * Saves that a Vout fault has been logged 193 * 194 * @param[in] page - the page the error was logged against 195 */ setVoutFaultLogged(uint32_t page)196 inline void setVoutFaultLogged(uint32_t page) 197 { 198 voutErrors.push_back(page); 199 } 200 201 /** 202 * Says if we've already logged a PGOOD fault 203 * 204 * The policy is only 1 of the same errors will 205 * be logged for the duration of a class instance. 206 * 207 * @param[in] input - the input to check 208 * 209 * @return bool - if we've already logged a fault against 210 * this input 211 */ isPGOODFaultLogged(uint32_t input) const212 inline bool isPGOODFaultLogged(uint32_t input) const 213 { 214 return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) != 215 pgoodErrors.end(); 216 } 217 218 /** 219 * Says if we've already logged a specific fault 220 * against a specific part 221 * 222 * @param[in] callout - error type and name tuple 223 * 224 * @return bool - if we've already logged this fault 225 * against this part 226 */ isPartCalledOut(const PartCallout & callout) const227 inline bool isPartCalledOut(const PartCallout& callout) const 228 { 229 return std::find(callouts.begin(), callouts.end(), callout) != 230 callouts.end(); 231 } 232 233 /** 234 * Saves that a PGOOD fault has been logged 235 * 236 * @param[in] input - the input the error was logged against 237 */ setPGOODFaultLogged(uint32_t input)238 inline void setPGOODFaultLogged(uint32_t input) 239 { 240 pgoodErrors.push_back(input); 241 } 242 243 /** 244 * Saves that a specific fault on a specific part has been done 245 * 246 * @param[in] callout - error type and name tuple 247 */ setPartCallout(const PartCallout & callout)248 inline void setPartCallout(const PartCallout& callout) 249 { 250 callouts.push_back(callout); 251 } 252 253 /** 254 * List of pages that Vout errors have 255 * already been logged against 256 */ 257 std::vector<uint32_t> voutErrors; 258 259 /** 260 * List of inputs that PGOOD errors have 261 * already been logged against 262 */ 263 std::vector<uint32_t> pgoodErrors; 264 265 /** 266 * List of callouts that already been done 267 */ 268 std::vector<PartCallout> callouts; 269 270 /** 271 * The read/write interface to this hardware 272 */ 273 pmbus::PMBus interface; 274 275 /** 276 * A map of GPI pin IDs to the GPIO object 277 * used to access them 278 */ 279 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios; 280 281 /** 282 * Keeps track of device access errors to avoid repeatedly 283 * logging errors for bad hardware 284 */ 285 bool accessError = false; 286 287 /** 288 * Keeps track of GPIO access errors when doing the in depth 289 * PGOOD fault analysis to avoid repeatedly logging errors 290 * for bad hardware 291 */ 292 bool gpioAccessError = false; 293 294 /** 295 * The path to the GPIO device used to read 296 * the GPI (PGOOD) status 297 */ 298 std::filesystem::path gpioDevice; 299 300 /** 301 * The D-Bus bus object 302 */ 303 sdbusplus::bus_t& bus; 304 305 /** 306 * Map of device instance to the instance specific data 307 */ 308 static const ucd90160::DeviceMap deviceMap; 309 }; 310 311 } // namespace power 312 } // namespace phosphor 313