1 #pragma once 2 3 #include "device.hpp" 4 #include "gpio.hpp" 5 #include "pmbus.hpp" 6 #include "types.hpp" 7 8 #include <sdbusplus/bus.hpp> 9 10 #include <algorithm> 11 #include <filesystem> 12 #include <map> 13 #include <vector> 14 15 namespace phosphor 16 { 17 namespace power 18 { 19 20 // Error type, callout 21 using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>; 22 23 /** 24 * @class UCD90160 25 * 26 * This class implements fault analysis for the UCD90160 27 * power sequencer device. 28 * 29 */ 30 class UCD90160 : public Device 31 { 32 public: 33 UCD90160() = delete; 34 ~UCD90160() = default; 35 UCD90160(const UCD90160&) = delete; 36 UCD90160& operator=(const UCD90160&) = delete; 37 UCD90160(UCD90160&&) = default; 38 UCD90160& operator=(UCD90160&&) = default; 39 40 /** 41 * Constructor 42 * 43 * @param[in] instance - the device instance number 44 * @param[in] bus - D-Bus bus object 45 */ 46 UCD90160(size_t instance, sdbusplus::bus::bus& bus); 47 48 /** 49 * Analyzes the device for errors when the device is 50 * known to be in an error state. A log will be created. 51 */ 52 void onFailure() override; 53 54 /** 55 * Checks the device for errors and only creates a log 56 * if one is found. 57 */ 58 void analyze() override; 59 60 /** 61 * Clears faults in the device 62 */ 63 void clearFaults() override 64 { 65 } 66 67 private: 68 /** 69 * Reports an error for a GPU PGOOD failure 70 * 71 * @param[in] callout - the GPU callout string 72 */ 73 void gpuPGOODError(const std::string& callout); 74 75 /** 76 * Reports an error for a GPU OverTemp failure 77 * 78 * @param[in] callout - the GPU callout string 79 */ 80 void gpuOverTempError(const std::string& callout); 81 82 /** 83 * Reports an error for a MEM_GOODx failure. 84 * 85 * @param[in] callout - The MEM callout string 86 */ 87 void memGoodError(const std::string& callout); 88 89 /** 90 * Given the device path for a chip, find its gpiochip 91 * path 92 * 93 * @param[in] path - device path, like 94 * /sys/devices/.../i2c-11/11-0064 95 * 96 * @return fs::path - The gpiochip path, like 97 * /dev/gpiochip1 98 */ 99 static std::filesystem::path 100 findGPIODevice(const std::filesystem::path& path); 101 102 /** 103 * Checks for VOUT faults on the device. 104 * 105 * This device can monitor voltages of its dependent 106 * devices, and VOUT faults are voltage faults 107 * on these devices. 108 * 109 * @return bool - true if an error log was created 110 */ 111 bool checkVOUTFaults(); 112 113 /** 114 * Checks for PGOOD faults on the device. 115 * 116 * This device can monitor the PGOOD signals of its dependent 117 * devices, and this check will look for faults of 118 * those PGOODs. 119 * 120 * @param[in] polling - If this is running while polling for errors, 121 * as opposing to analyzing a fail condition. 122 * 123 * @return bool - true if an error log was created 124 */ 125 bool checkPGOODFaults(bool polling); 126 127 /** 128 * Creates an error log when the device has an error 129 * but it isn't a PGOOD or voltage failure. 130 */ 131 void createPowerFaultLog(); 132 133 /** 134 * Reads the status_word register 135 * 136 * @return uint16_t - the register contents 137 */ 138 uint16_t readStatusWord(); 139 140 /** 141 * Reads the mfr_status register 142 * 143 * @return uint32_t - the register contents 144 */ 145 uint32_t readMFRStatus(); 146 147 /** 148 * Does any additional fault analysis based on the 149 * value of the extraAnalysisType field in the GPIOConfig 150 * entry. 151 * 152 * Used to get better callouts. 153 * 154 * @param[in] config - the GPIOConfig entry to use 155 * 156 * @return bool - true if a HW error was found, false else 157 */ 158 bool doExtraAnalysis(const ucd90160::GPIConfig& config); 159 160 /** 161 * Does additional fault analysis using GPIOs to 162 * specifically identify the failing part. 163 * 164 * Used when there are too many PGOOD inputs for 165 * the UCD90160 to handle, so just a summary bit 166 * is wired into the chip, and then the specific 167 * fault GPIOs are off of a different GPIO device, 168 * like an IO expander. 169 * 170 * @param[in] type - the type of analysis to do 171 * 172 * @return bool - true if a HW error was found, false else 173 */ 174 bool doGPIOAnalysis(ucd90160::extraAnalysisType type); 175 176 /** 177 * Says if we've already logged a Vout fault 178 * 179 * The policy is only 1 of the same error will 180 * be logged for the duration of a class instance. 181 * 182 * @param[in] page - the page to check 183 * 184 * @return bool - if we've already logged a fault against 185 * this page 186 */ 187 inline bool isVoutFaultLogged(uint32_t page) const 188 { 189 return std::find(voutErrors.begin(), voutErrors.end(), page) != 190 voutErrors.end(); 191 } 192 193 /** 194 * Saves that a Vout fault has been logged 195 * 196 * @param[in] page - the page the error was logged against 197 */ 198 inline void setVoutFaultLogged(uint32_t page) 199 { 200 voutErrors.push_back(page); 201 } 202 203 /** 204 * Says if we've already logged a PGOOD fault 205 * 206 * The policy is only 1 of the same errors will 207 * be logged for the duration of a class instance. 208 * 209 * @param[in] input - the input to check 210 * 211 * @return bool - if we've already logged a fault against 212 * this input 213 */ 214 inline bool isPGOODFaultLogged(uint32_t input) const 215 { 216 return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) != 217 pgoodErrors.end(); 218 } 219 220 /** 221 * Says if we've already logged a specific fault 222 * against a specific part 223 * 224 * @param[in] callout - error type and name tuple 225 * 226 * @return bool - if we've already logged this fault 227 * against this part 228 */ 229 inline bool isPartCalledOut(const PartCallout& callout) const 230 { 231 return std::find(callouts.begin(), callouts.end(), callout) != 232 callouts.end(); 233 } 234 235 /** 236 * Saves that a PGOOD fault has been logged 237 * 238 * @param[in] input - the input the error was logged against 239 */ 240 inline void setPGOODFaultLogged(uint32_t input) 241 { 242 pgoodErrors.push_back(input); 243 } 244 245 /** 246 * Saves that a specific fault on a specific part has been done 247 * 248 * @param[in] callout - error type and name tuple 249 */ 250 inline void setPartCallout(const PartCallout& callout) 251 { 252 callouts.push_back(callout); 253 } 254 255 /** 256 * List of pages that Vout errors have 257 * already been logged against 258 */ 259 std::vector<uint32_t> voutErrors; 260 261 /** 262 * List of inputs that PGOOD errors have 263 * already been logged against 264 */ 265 std::vector<uint32_t> pgoodErrors; 266 267 /** 268 * List of callouts that already been done 269 */ 270 std::vector<PartCallout> callouts; 271 272 /** 273 * The read/write interface to this hardware 274 */ 275 pmbus::PMBus interface; 276 277 /** 278 * A map of GPI pin IDs to the GPIO object 279 * used to access them 280 */ 281 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios; 282 283 /** 284 * Keeps track of device access errors to avoid repeatedly 285 * logging errors for bad hardware 286 */ 287 bool accessError = false; 288 289 /** 290 * Keeps track of GPIO access errors when doing the in depth 291 * PGOOD fault analysis to avoid repeatedly logging errors 292 * for bad hardware 293 */ 294 bool gpioAccessError = false; 295 296 /** 297 * The path to the GPIO device used to read 298 * the GPI (PGOOD) status 299 */ 300 std::filesystem::path gpioDevice; 301 302 /** 303 * The D-Bus bus object 304 */ 305 sdbusplus::bus::bus& bus; 306 307 /** 308 * Map of device instance to the instance specific data 309 */ 310 static const ucd90160::DeviceMap deviceMap; 311 }; 312 313 } // namespace power 314 } // namespace phosphor 315