1 /**
2  * Copyright © 2017 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <map>
17 #include <memory>
18 #include <phosphor-logging/elog.hpp>
19 #include <phosphor-logging/log.hpp>
20 #include <elog-errors.hpp>
21 #include <org/open_power/Witherspoon/Fault/error.hpp>
22 #include <xyz/openbmc_project/Common/Device/error.hpp>
23 #include "names_values.hpp"
24 #include "ucd90160.hpp"
25 #include "utility.hpp"
26 
27 namespace witherspoon
28 {
29 namespace power
30 {
31 
32 using namespace std::string_literals;
33 
34 const auto MFR_STATUS = "mfr_status"s;
35 
36 const auto DEVICE_NAME = "UCD90160"s;
37 const auto DRIVER_NAME = "ucd9000"s;
38 constexpr auto NUM_PAGES = 16;
39 
40 namespace fs = std::experimental::filesystem;
41 using namespace gpio;
42 using namespace pmbus;
43 using namespace phosphor::logging;
44 
45 namespace device_error = sdbusplus::xyz::openbmc_project::
46         Common::Device::Error;
47 namespace power_error = sdbusplus::org::open_power::
48         Witherspoon::Fault::Error;
49 
50 UCD90160::UCD90160(size_t instance, sdbusplus::bus::bus& bus) :
51         Device(DEVICE_NAME, instance),
52         interface(std::get<ucd90160::pathField>(
53                           deviceMap.find(instance)->second),
54                   DRIVER_NAME,
55                   instance),
56         gpioDevice(findGPIODevice(interface.path())),
57         bus(bus)
58 {
59 }
60 
61 void UCD90160::onFailure()
62 {
63     try
64     {
65         auto voutError = checkVOUTFaults();
66 
67         auto pgoodError = checkPGOODFaults(false);
68 
69         //Not a voltage or PGOOD fault, but we know something
70         //failed so still create an error log.
71         if (!voutError && !pgoodError)
72         {
73             createPowerFaultLog();
74         }
75     }
76     catch (device_error::ReadFailure& e)
77     {
78         if (!accessError)
79         {
80             commit<device_error::ReadFailure>();
81             accessError = true;
82         }
83     }
84 }
85 
86 void UCD90160::analyze()
87 {
88     try
89     {
90         //Note: Voltage faults are always fatal, so they just
91         //need to be analyzed in onFailure().
92 
93         checkPGOODFaults(true);
94     }
95     catch (device_error::ReadFailure& e)
96     {
97         if (!accessError)
98         {
99             commit<device_error::ReadFailure>();
100             accessError = true;
101         }
102     }
103 }
104 
105 uint16_t UCD90160::readStatusWord()
106 {
107     return interface.read(STATUS_WORD, Type::Debug);
108 }
109 
110 uint32_t UCD90160::readMFRStatus()
111 {
112     return interface.read(MFR_STATUS, Type::DeviceDebug);
113 }
114 
115 bool UCD90160::checkVOUTFaults()
116 {
117     bool errorCreated = false;
118     auto statusWord = readStatusWord();
119 
120     //The status_word register has a summary bit to tell us
121     //if each page even needs to be checked
122     if (!(statusWord & status_word::VOUT_FAULT))
123     {
124         return errorCreated;
125     }
126 
127     for (size_t page = 0; page < NUM_PAGES; page++)
128     {
129         if (isVoutFaultLogged(page))
130         {
131             continue;
132         }
133 
134         auto statusVout = interface.insertPageNum(STATUS_VOUT, page);
135         uint8_t vout = interface.read(statusVout, Type::Debug);
136 
137         //If any bits are on log them, though some are just
138         //warnings so they won't cause errors
139         if (vout)
140         {
141             log<level::INFO>("A voltage rail has bits on in STATUS_VOUT",
142                     entry("STATUS_VOUT=0x%X", vout),
143                     entry("PAGE=%d", page));
144         }
145 
146         //Log errors if any non-warning bits on
147         if (vout & ~status_vout::WARNING_MASK)
148         {
149             auto& railNames = std::get<ucd90160::railNamesField>(
150                     deviceMap.find(getInstance())->second);
151             auto railName = railNames.at(page);
152 
153             util::NamesValues nv;
154             nv.add("STATUS_WORD", statusWord);
155             nv.add("STATUS_VOUT", vout);
156             nv.add("MFR_STATUS", readMFRStatus());
157 
158             using metadata = org::open_power::Witherspoon::Fault::
159                     PowerSequencerVoltageFault;
160 
161             report<power_error::PowerSequencerVoltageFault>(
162                     metadata::RAIL(page),
163                     metadata::RAIL_NAME(railName.c_str()),
164                     metadata::RAW_STATUS(nv.get().c_str()));
165 
166             setVoutFaultLogged(page);
167             errorCreated = true;
168         }
169     }
170 
171     return errorCreated;
172 }
173 
174 bool UCD90160::checkPGOODFaults(bool polling)
175 {
176     bool errorCreated = false;
177 
178     //While PGOOD faults could show up in MFR_STATUS (and we could then
179     //check the summary bit in STATUS_WORD first), they are edge triggered,
180     //and as the device driver sends a clear faults command every time we
181     //do a read, we will never see them.  So, we'll have to just read the
182     //real time GPI status GPIO.
183 
184     //Check only the GPIs configured on this system.
185     auto& gpiConfigs = std::get<ucd90160::gpiConfigField>(
186             deviceMap.find(getInstance())->second);
187 
188     for (const auto& gpiConfig : gpiConfigs)
189     {
190         auto gpiNum = std::get<ucd90160::gpiNumField>(gpiConfig);
191         auto doPoll = std::get<ucd90160::pollField>(gpiConfig);
192 
193         //Can skip this one if there is already an error on this input,
194         //or we are polling and these inputs don't need to be polled
195         //(because errors on them are fatal).
196         if (isPGOODFaultLogged(gpiNum) || (polling && !doPoll))
197         {
198             continue;
199         }
200 
201         //The real time status is read via the pin ID
202         auto pinID = std::get<ucd90160::pinIDField>(gpiConfig);
203         auto gpio = gpios.find(pinID);
204         Value gpiStatus;
205 
206         try
207         {
208             //The first time through, create the GPIO objects
209             if (gpio == gpios.end())
210             {
211                 gpios.emplace(
212                         pinID,
213                         std::make_unique<GPIO>(
214                                 gpioDevice, pinID, Direction::input));
215                 gpio = gpios.find(pinID);
216             }
217 
218             gpiStatus = gpio->second->read();
219         }
220         catch (std::exception& e)
221         {
222             if (!accessError)
223             {
224                 log<level::ERR>(e.what());
225                 accessError = true;
226             }
227             continue;
228         }
229 
230         if (gpiStatus == Value::low)
231         {
232             //There may be some extra analysis we can do to narrow the
233             //error down further.  Note that finding an error here won't
234             //prevent us from checking this GPI again.
235             errorCreated = doExtraAnalysis(gpiConfig);
236 
237             if (errorCreated)
238             {
239                 continue;
240             }
241 
242             auto& gpiName = std::get<ucd90160::gpiNameField>(gpiConfig);
243             auto status = (gpiStatus == Value::low) ? 0 : 1;
244 
245             util::NamesValues nv;
246             nv.add("STATUS_WORD", readStatusWord());
247             nv.add("MFR_STATUS", readMFRStatus());
248             nv.add("INPUT_STATUS", status);
249 
250             using metadata =  org::open_power::Witherspoon::Fault::
251                     PowerSequencerPGOODFault;
252 
253             report<power_error::PowerSequencerPGOODFault>(
254                     metadata::INPUT_NUM(gpiNum),
255                     metadata::INPUT_NAME(gpiName.c_str()),
256                     metadata::RAW_STATUS(nv.get().c_str()));
257 
258             setPGOODFaultLogged(gpiNum);
259             errorCreated = true;
260         }
261     }
262 
263     return errorCreated;
264 }
265 
266 void UCD90160::createPowerFaultLog()
267 {
268     util::NamesValues nv;
269     nv.add("STATUS_WORD", readStatusWord());
270     nv.add("MFR_STATUS", readMFRStatus());
271 
272     using metadata = org::open_power::Witherspoon::Fault::
273         PowerSequencerFault;
274 
275     report<power_error::PowerSequencerFault>(
276             metadata::RAW_STATUS(nv.get().c_str()));
277 }
278 
279 fs::path UCD90160::findGPIODevice(const fs::path& path)
280 {
281     fs::path gpioDevicePath;
282 
283     //In the driver directory, look for a subdirectory
284     //named gpiochipX, where X is some number.  Then
285     //we'll access the GPIO at /dev/gpiochipX.
286     if (fs::is_directory(path))
287     {
288         for (auto& f : fs::directory_iterator(path))
289         {
290             if (f.path().filename().string().find("gpiochip") !=
291                     std::string::npos)
292             {
293                 gpioDevicePath = "/dev" / f.path().filename();
294                 break;
295             }
296         }
297     }
298 
299     if (gpioDevicePath.empty())
300     {
301         log<level::ERR>("Could not find GPIO device path",
302                 entry("BASE_PATH=%s", path.c_str()));
303     }
304 
305     return gpioDevicePath;
306 }
307 
308 bool UCD90160::doExtraAnalysis(const ucd90160::GPIConfig& config)
309 {
310 
311     auto type = std::get<ucd90160::extraAnalysisField>(config);
312     if (type == ucd90160::extraAnalysisType::none)
313     {
314         return false;
315     }
316 
317     //Currently the only extra analysis to do is to check other GPIOs.
318     return doGPIOAnalysis(type);
319 }
320 
321 bool UCD90160::doGPIOAnalysis(ucd90160::extraAnalysisType type)
322 {
323     bool errorFound = false;
324     bool shutdown = false;
325 
326     const auto& analysisConfig = std::get<ucd90160::gpioAnalysisField>(
327             deviceMap.find(getInstance())->second);
328 
329     auto gpioConfig = analysisConfig.find(type);
330     if (gpioConfig == analysisConfig.end())
331     {
332         return errorFound;
333     }
334 
335     auto path = std::get<ucd90160::gpioDevicePathField>(
336             gpioConfig->second);
337 
338     //The /dev/gpiochipX device
339     auto device = findGPIODevice(path);
340 
341     //The GPIO value of the fault condition
342     auto polarity = std::get<ucd90160::gpioPolarityField>(
343             gpioConfig->second);
344 
345     //The GPIOs to check
346     auto& gpios = std::get<ucd90160::gpioDefinitionField>(
347             gpioConfig->second);
348 
349     for (const auto& gpio : gpios)
350     {
351         gpio::Value value;
352 
353         try
354         {
355             GPIO g{device,
356                    std::get<ucd90160::gpioNumField>(gpio),
357                    Direction::input};
358 
359             value = g.read();
360         }
361         catch (std::exception& e)
362         {
363             if (!gpioAccessError)
364             {
365                 //GPIO only throws InternalErrors - not worth committing.
366                 log<level::ERR>(
367                         "GPIO read failed while analyzing a power fault",
368                         entry("CHIP_PATH=%s", path.c_str()));
369 
370                 gpioAccessError = true;
371             }
372             continue;
373         }
374 
375         if (value == polarity)
376         {
377             errorFound = true;
378 
379             auto part = std::get<ucd90160::gpioCalloutField>(gpio);
380             PartCallout callout{type, part};
381 
382             if (isPartCalledOut(callout))
383             {
384                 continue;
385             }
386 
387             //Look up and call the error creation function
388             auto logError = std::get<ucd90160::errorFunctionField>(
389                     gpioConfig->second);
390 
391             logError(*this, part);
392 
393             //Save the part callout so we don't call it out again
394             setPartCallout(callout);
395 
396             //Some errors (like overtemps) require a shutdown
397             auto actions = static_cast<uint32_t>(
398                     std::get<ucd90160::optionFlagsField>(gpioConfig->second));
399 
400             if (actions & static_cast<decltype(actions)>(
401                         ucd90160::optionFlags::shutdownOnFault))
402             {
403                 shutdown = true;
404             }
405         }
406     }
407 
408     if (shutdown)
409     {
410         //Will be replaced with a GPU specific error in a future commit
411         util::powerOff<power_error::Shutdown>(bus);
412     }
413 
414     return errorFound;
415 }
416 
417 void UCD90160::gpuPGOODError(const std::string& callout)
418 {
419     util::NamesValues nv;
420     nv.add("STATUS_WORD", readStatusWord());
421     nv.add("MFR_STATUS", readMFRStatus());
422 
423     using metadata = org::open_power::Witherspoon::Fault::GPUPowerFault;
424 
425     report<power_error::GPUPowerFault>(
426             metadata::RAW_STATUS(nv.get().c_str()),
427             metadata::GPU(callout.c_str()));
428 }
429 
430 void UCD90160::gpuOverTempError(const std::string& callout)
431 {
432     util::NamesValues nv;
433     nv.add("STATUS_WORD", readStatusWord());
434     nv.add("MFR_STATUS", readMFRStatus());
435 
436     using metadata = org::open_power::Witherspoon::Fault::GPUOverTemp;
437 
438     report<power_error::GPUOverTemp>(
439             metadata::RAW_STATUS(nv.get().c_str()),
440             metadata::GPU(callout.c_str()));
441 }
442 
443 }
444 }
445