1 #pragma once 2 3 #include "device.hpp" 4 #include "gpio.hpp" 5 #include "pmbus.hpp" 6 #include "types.hpp" 7 8 #include <algorithm> 9 #include <filesystem> 10 #include <map> 11 #include <sdbusplus/bus.hpp> 12 #include <vector> 13 14 namespace phosphor 15 { 16 namespace power 17 { 18 19 // Error type, callout 20 using PartCallout = std::tuple<ucd90160::extraAnalysisType, std::string>; 21 22 /** 23 * @class UCD90160 24 * 25 * This class implements fault analysis for the UCD90160 26 * power sequencer device. 27 * 28 */ 29 class UCD90160 : public Device 30 { 31 public: 32 UCD90160() = delete; 33 ~UCD90160() = default; 34 UCD90160(const UCD90160&) = delete; 35 UCD90160& operator=(const UCD90160&) = delete; 36 UCD90160(UCD90160&&) = default; 37 UCD90160& operator=(UCD90160&&) = default; 38 39 /** 40 * Constructor 41 * 42 * @param[in] instance - the device instance number 43 * @param[in] bus - D-Bus bus object 44 */ 45 UCD90160(size_t instance, sdbusplus::bus::bus& bus); 46 47 /** 48 * Analyzes the device for errors when the device is 49 * known to be in an error state. A log will be created. 50 */ 51 void onFailure() override; 52 53 /** 54 * Checks the device for errors and only creates a log 55 * if one is found. 56 */ 57 void analyze() override; 58 59 /** 60 * Clears faults in the device 61 */ 62 void clearFaults() override 63 { 64 } 65 66 private: 67 /** 68 * Reports an error for a GPU PGOOD failure 69 * 70 * @param[in] callout - the GPU callout string 71 */ 72 void gpuPGOODError(const std::string& callout); 73 74 /** 75 * Reports an error for a GPU OverTemp failure 76 * 77 * @param[in] callout - the GPU callout string 78 */ 79 void gpuOverTempError(const std::string& callout); 80 81 /** 82 * Reports an error for a MEM_GOODx failure. 83 * 84 * @param[in] callout - The MEM callout string 85 */ 86 void memGoodError(const std::string& callout); 87 88 /** 89 * Given the device path for a chip, find its gpiochip 90 * path 91 * 92 * @param[in] path - device path, like 93 * /sys/devices/.../i2c-11/11-0064 94 * 95 * @return fs::path - The gpiochip path, like 96 * /dev/gpiochip1 97 */ 98 static std::filesystem::path 99 findGPIODevice(const std::filesystem::path& path); 100 101 /** 102 * Checks for VOUT faults on the device. 103 * 104 * This device can monitor voltages of its dependent 105 * devices, and VOUT faults are voltage faults 106 * on these devices. 107 * 108 * @return bool - true if an error log was created 109 */ 110 bool checkVOUTFaults(); 111 112 /** 113 * Checks for PGOOD faults on the device. 114 * 115 * This device can monitor the PGOOD signals of its dependent 116 * devices, and this check will look for faults of 117 * those PGOODs. 118 * 119 * @param[in] polling - If this is running while polling for errors, 120 * as opposing to analyzing a fail condition. 121 * 122 * @return bool - true if an error log was created 123 */ 124 bool checkPGOODFaults(bool polling); 125 126 /** 127 * Creates an error log when the device has an error 128 * but it isn't a PGOOD or voltage failure. 129 */ 130 void createPowerFaultLog(); 131 132 /** 133 * Reads the status_word register 134 * 135 * @return uint16_t - the register contents 136 */ 137 uint16_t readStatusWord(); 138 139 /** 140 * Reads the mfr_status register 141 * 142 * @return uint32_t - the register contents 143 */ 144 uint32_t readMFRStatus(); 145 146 /** 147 * Does any additional fault analysis based on the 148 * value of the extraAnalysisType field in the GPIOConfig 149 * entry. 150 * 151 * Used to get better callouts. 152 * 153 * @param[in] config - the GPIOConfig entry to use 154 * 155 * @return bool - true if a HW error was found, false else 156 */ 157 bool doExtraAnalysis(const ucd90160::GPIConfig& config); 158 159 /** 160 * Does additional fault analysis using GPIOs to 161 * specifically identify the failing part. 162 * 163 * Used when there are too many PGOOD inputs for 164 * the UCD90160 to handle, so just a summary bit 165 * is wired into the chip, and then the specific 166 * fault GPIOs are off of a different GPIO device, 167 * like an IO expander. 168 * 169 * @param[in] type - the type of analysis to do 170 * 171 * @return bool - true if a HW error was found, false else 172 */ 173 bool doGPIOAnalysis(ucd90160::extraAnalysisType type); 174 175 /** 176 * Says if we've already logged a Vout fault 177 * 178 * The policy is only 1 of the same error will 179 * be logged for the duration of a class instance. 180 * 181 * @param[in] page - the page to check 182 * 183 * @return bool - if we've already logged a fault against 184 * this page 185 */ 186 inline bool isVoutFaultLogged(uint32_t page) const 187 { 188 return std::find(voutErrors.begin(), voutErrors.end(), page) != 189 voutErrors.end(); 190 } 191 192 /** 193 * Saves that a Vout fault has been logged 194 * 195 * @param[in] page - the page the error was logged against 196 */ 197 inline void setVoutFaultLogged(uint32_t page) 198 { 199 voutErrors.push_back(page); 200 } 201 202 /** 203 * Says if we've already logged a PGOOD fault 204 * 205 * The policy is only 1 of the same errors will 206 * be logged for the duration of a class instance. 207 * 208 * @param[in] input - the input to check 209 * 210 * @return bool - if we've already logged a fault against 211 * this input 212 */ 213 inline bool isPGOODFaultLogged(uint32_t input) const 214 { 215 return std::find(pgoodErrors.begin(), pgoodErrors.end(), input) != 216 pgoodErrors.end(); 217 } 218 219 /** 220 * Says if we've already logged a specific fault 221 * against a specific part 222 * 223 * @param[in] callout - error type and name tuple 224 * 225 * @return bool - if we've already logged this fault 226 * against this part 227 */ 228 inline bool isPartCalledOut(const PartCallout& callout) const 229 { 230 return std::find(callouts.begin(), callouts.end(), callout) != 231 callouts.end(); 232 } 233 234 /** 235 * Saves that a PGOOD fault has been logged 236 * 237 * @param[in] input - the input the error was logged against 238 */ 239 inline void setPGOODFaultLogged(uint32_t input) 240 { 241 pgoodErrors.push_back(input); 242 } 243 244 /** 245 * Saves that a specific fault on a specific part has been done 246 * 247 * @param[in] callout - error type and name tuple 248 */ 249 inline void setPartCallout(const PartCallout& callout) 250 { 251 callouts.push_back(callout); 252 } 253 254 /** 255 * List of pages that Vout errors have 256 * already been logged against 257 */ 258 std::vector<uint32_t> voutErrors; 259 260 /** 261 * List of inputs that PGOOD errors have 262 * already been logged against 263 */ 264 std::vector<uint32_t> pgoodErrors; 265 266 /** 267 * List of callouts that already been done 268 */ 269 std::vector<PartCallout> callouts; 270 271 /** 272 * The read/write interface to this hardware 273 */ 274 pmbus::PMBus interface; 275 276 /** 277 * A map of GPI pin IDs to the GPIO object 278 * used to access them 279 */ 280 std::map<size_t, std::unique_ptr<gpio::GPIO>> gpios; 281 282 /** 283 * Keeps track of device access errors to avoid repeatedly 284 * logging errors for bad hardware 285 */ 286 bool accessError = false; 287 288 /** 289 * Keeps track of GPIO access errors when doing the in depth 290 * PGOOD fault analysis to avoid repeatedly logging errors 291 * for bad hardware 292 */ 293 bool gpioAccessError = false; 294 295 /** 296 * The path to the GPIO device used to read 297 * the GPI (PGOOD) status 298 */ 299 std::filesystem::path gpioDevice; 300 301 /** 302 * The D-Bus bus object 303 */ 304 sdbusplus::bus::bus& bus; 305 306 /** 307 * Map of device instance to the instance specific data 308 */ 309 static const ucd90160::DeviceMap deviceMap; 310 }; 311 312 } // namespace power 313 } // namespace phosphor 314