1 /**
2  * Copyright © 2017 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "power_supply.hpp"
19 
20 #include "elog-errors.hpp"
21 #include "gpio.hpp"
22 #include "names_values.hpp"
23 #include "pmbus.hpp"
24 #include "types.hpp"
25 #include "utility.hpp"
26 
27 #include <org/open_power/Witherspoon/Fault/error.hpp>
28 #include <phosphor-logging/log.hpp>
29 #include <xyz/openbmc_project/Common/Device/error.hpp>
30 
31 #include <functional>
32 
33 namespace phosphor
34 {
35 namespace power
36 {
37 namespace psu
38 {
39 
40 using namespace phosphor::logging;
41 using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error;
42 using namespace sdbusplus::xyz::openbmc_project::Common::Device::Error;
43 
44 PowerSupply::PowerSupply(const std::string& name, size_t inst,
45                          const std::string& objpath, const std::string& invpath,
46                          sdbusplus::bus::bus& bus, const sdeventplus::Event& e,
47                          std::chrono::seconds& t, std::chrono::seconds& p) :
48     Device(name, inst),
49     monitorPath(objpath), pmbusIntf(objpath),
50     inventoryPath(INVENTORY_OBJ_PATH + invpath), bus(bus), presentInterval(p),
51     presentTimer(e, std::bind([this]() {
52                      // The hwmon path may have changed.
53                      pmbusIntf.findHwmonDir();
54                      this->present = true;
55 
56                      // Sync the INPUT_HISTORY data for all PSs
57                      syncHistory();
58 
59                      // Update the inventory for the new device
60                      updateInventory();
61                  })),
62     powerOnInterval(t),
63     powerOnTimer(e, std::bind([this]() { this->powerOn = true; }))
64 {
65     getAccessType();
66 
67     using namespace sdbusplus::bus;
68     using namespace phosphor::pmbus;
69     std::uint16_t statusWord = 0;
70     try
71     {
72         // Read the 2 byte STATUS_WORD value to check for faults.
73         statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug);
74         if (!((statusWord & status_word::INPUT_FAULT_WARN) ||
75               (statusWord & status_word::VIN_UV_FAULT)))
76         {
77             resolveError(inventoryPath,
78                          std::string(PowerSupplyInputFault::errName));
79         }
80     }
81     catch (ReadFailure& e)
82     {
83         log<level::INFO>("Unable to read the 2 byte STATUS_WORD value to check "
84                          "for power-supply input faults.");
85     }
86     presentMatch = std::make_unique<match_t>(
87         bus, match::rules::propertiesChanged(inventoryPath, INVENTORY_IFACE),
88         [this](auto& msg) { this->inventoryChanged(msg); });
89     // Get initial presence state.
90     updatePresence();
91 
92     // Write the SN, PN, etc to the inventory
93     updateInventory();
94 
95     // Subscribe to power state changes
96     powerOnMatch = std::make_unique<match_t>(
97         bus, match::rules::propertiesChanged(POWER_OBJ_PATH, POWER_IFACE),
98         [this](auto& msg) { this->powerStateChanged(msg); });
99     // Get initial power state.
100     updatePowerState();
101 }
102 
103 void PowerSupply::getAccessType()
104 {
105     using namespace phosphor::power::util;
106     fruJson = loadJSONFromFile(PSU_JSON_PATH);
107     if (fruJson == nullptr)
108     {
109         log<level::ERR>("InternalFailure when parsing the JSON file");
110         return;
111     }
112     inventoryPMBusAccessType = getPMBusAccessType(fruJson);
113 }
114 
115 void PowerSupply::captureCmd(util::NamesValues& nv, const std::string& cmd,
116                              phosphor::pmbus::Type type)
117 {
118     if (pmbusIntf.exists(cmd, type))
119     {
120         try
121         {
122             auto val = pmbusIntf.read(cmd, type);
123             nv.add(cmd, val);
124         }
125         catch (std::exception& e)
126         {
127             log<level::INFO>("Unable to capture metadata",
128                              entry("CMD=%s", cmd.c_str()));
129         }
130     }
131 }
132 
133 void PowerSupply::analyze()
134 {
135     using namespace phosphor::pmbus;
136 
137     try
138     {
139         if (present)
140         {
141             std::uint16_t statusWord = 0;
142 
143             // Read the 2 byte STATUS_WORD value to check for faults.
144             statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug);
145             readFail = 0;
146 
147             checkInputFault(statusWord);
148 
149             if (powerOn && (inputFault == 0) && !faultFound)
150             {
151                 checkFanFault(statusWord);
152                 checkTemperatureFault(statusWord);
153                 checkOutputOvervoltageFault(statusWord);
154                 checkCurrentOutOverCurrentFault(statusWord);
155                 checkPGOrUnitOffFault(statusWord);
156             }
157 
158             updateHistory();
159         }
160     }
161     catch (ReadFailure& e)
162     {
163         if (readFail < FAULT_COUNT)
164         {
165             readFail++;
166         }
167 
168         if (!readFailLogged && readFail >= FAULT_COUNT)
169         {
170             commit<ReadFailure>();
171             readFailLogged = true;
172         }
173     }
174 
175     return;
176 }
177 
178 void PowerSupply::inventoryChanged(sdbusplus::message::message& msg)
179 {
180     std::string msgSensor;
181     std::map<std::string, sdbusplus::message::variant<uint32_t, bool>> msgData;
182     msg.read(msgSensor, msgData);
183 
184     // Check if it was the Present property that changed.
185     auto valPropMap = msgData.find(PRESENT_PROP);
186     if (valPropMap != msgData.end())
187     {
188         if (sdbusplus::message::variant_ns::get<bool>(valPropMap->second))
189         {
190             clearFaults();
191             presentTimer.restartOnce(presentInterval);
192         }
193         else
194         {
195             present = false;
196             presentTimer.setEnabled(false);
197 
198             // Clear out the now outdated inventory properties
199             updateInventory();
200         }
201     }
202 
203     return;
204 }
205 
206 void PowerSupply::updatePresence()
207 {
208     // Use getProperty utility function to get presence status.
209     std::string service = "xyz.openbmc_project.Inventory.Manager";
210     util::getProperty(INVENTORY_IFACE, PRESENT_PROP, inventoryPath, service,
211                       bus, this->present);
212 }
213 
214 void PowerSupply::powerStateChanged(sdbusplus::message::message& msg)
215 {
216     int32_t state = 0;
217     std::string msgSensor;
218     std::map<std::string, sdbusplus::message::variant<int32_t>> msgData;
219     msg.read(msgSensor, msgData);
220 
221     // Check if it was the Present property that changed.
222     auto valPropMap = msgData.find("state");
223     if (valPropMap != msgData.end())
224     {
225         state =
226             sdbusplus::message::variant_ns::get<int32_t>(valPropMap->second);
227 
228         // Power is on when state=1. Set the fault logged variables to false
229         // and start the power on timer when the state changes to 1.
230         if (state)
231         {
232             clearFaults();
233             powerOnTimer.restartOnce(powerOnInterval);
234         }
235         else
236         {
237             powerOnTimer.setEnabled(false);
238             powerOn = false;
239         }
240     }
241 }
242 
243 void PowerSupply::updatePowerState()
244 {
245     powerOn = util::isPoweredOn(bus);
246 }
247 
248 void PowerSupply::checkInputFault(const uint16_t statusWord)
249 {
250     using namespace phosphor::pmbus;
251 
252     if ((inputFault < FAULT_COUNT) &&
253         ((statusWord & status_word::INPUT_FAULT_WARN) ||
254          (statusWord & status_word::VIN_UV_FAULT)))
255     {
256         if (inputFault == 0)
257         {
258             log<level::INFO>("INPUT or VIN_UV fault",
259                              entry("STATUS_WORD=0x%04X", statusWord));
260         }
261 
262         inputFault++;
263     }
264     else
265     {
266         if ((inputFault > 0) && !(statusWord & status_word::INPUT_FAULT_WARN) &&
267             !(statusWord & status_word::VIN_UV_FAULT))
268         {
269             inputFault = 0;
270             faultFound = false;
271             // When an input fault occurs, the power supply cannot be on.
272             // However, the check for the case where the power supply should be
273             // on will stop when there is a fault found.
274             // Clear the powerOnFault when the inputFault is cleared to reset
275             // the powerOnFault de-glitching.
276             powerOnFault = 0;
277 
278             log<level::INFO>("INPUT_FAULT_WARN cleared",
279                              entry("POWERSUPPLY=%s", inventoryPath.c_str()));
280 
281             resolveError(inventoryPath,
282                          std::string(PowerSupplyInputFault::errName));
283 
284             if (powerOn)
285             {
286                 // The power supply will not be immediately powered on after
287                 // the input power is restored.
288                 powerOn = false;
289                 // Start up the timer that will set the state to indicate we
290                 // are ready for the powered on fault checks.
291                 powerOnTimer.restartOnce(powerOnInterval);
292             }
293         }
294     }
295 
296     if (!faultFound && (inputFault >= FAULT_COUNT))
297     {
298         // If the power is on, report the fault in an error log entry.
299         if (powerOn)
300         {
301             util::NamesValues nv;
302             nv.add("STATUS_WORD", statusWord);
303             captureCmd(nv, STATUS_INPUT, Type::Debug);
304 
305             using metadata =
306                 org::open_power::Witherspoon::Fault::PowerSupplyInputFault;
307 
308             report<PowerSupplyInputFault>(
309                 metadata::RAW_STATUS(nv.get().c_str()),
310                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
311 
312             faultFound = true;
313         }
314     }
315 }
316 
317 void PowerSupply::checkPGOrUnitOffFault(const uint16_t statusWord)
318 {
319     using namespace phosphor::pmbus;
320 
321     if (powerOnFault < FAULT_COUNT)
322     {
323         // Check PG# and UNIT_IS_OFF
324         if ((statusWord & status_word::POWER_GOOD_NEGATED) ||
325             (statusWord & status_word::UNIT_IS_OFF))
326         {
327             log<level::INFO>("PGOOD or UNIT_IS_OFF bit bad",
328                              entry("STATUS_WORD=0x%04X", statusWord));
329             powerOnFault++;
330         }
331         else
332         {
333             if (powerOnFault > 0)
334             {
335                 log<level::INFO>("PGOOD and UNIT_IS_OFF bits good");
336                 powerOnFault = 0;
337             }
338         }
339 
340         if (!faultFound && (powerOnFault >= FAULT_COUNT))
341         {
342             faultFound = true;
343 
344             util::NamesValues nv;
345             nv.add("STATUS_WORD", statusWord);
346             captureCmd(nv, STATUS_INPUT, Type::Debug);
347             auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0);
348             captureCmd(nv, status0Vout, Type::Debug);
349             captureCmd(nv, STATUS_IOUT, Type::Debug);
350             captureCmd(nv, STATUS_MFR, Type::Debug);
351 
352             using metadata =
353                 org::open_power::Witherspoon::Fault::PowerSupplyShouldBeOn;
354 
355             // A power supply is OFF (or pgood low) but should be on.
356             report<PowerSupplyShouldBeOn>(
357                 metadata::RAW_STATUS(nv.get().c_str()),
358                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
359         }
360     }
361 }
362 
363 void PowerSupply::checkCurrentOutOverCurrentFault(const uint16_t statusWord)
364 {
365     using namespace phosphor::pmbus;
366 
367     if (outputOCFault < FAULT_COUNT)
368     {
369         // Check for an output overcurrent fault.
370         if ((statusWord & status_word::IOUT_OC_FAULT))
371         {
372             outputOCFault++;
373         }
374         else
375         {
376             if (outputOCFault > 0)
377             {
378                 outputOCFault = 0;
379             }
380         }
381 
382         if (!faultFound && (outputOCFault >= FAULT_COUNT))
383         {
384             util::NamesValues nv;
385             nv.add("STATUS_WORD", statusWord);
386             captureCmd(nv, STATUS_INPUT, Type::Debug);
387             auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0);
388             captureCmd(nv, status0Vout, Type::Debug);
389             captureCmd(nv, STATUS_IOUT, Type::Debug);
390             captureCmd(nv, STATUS_MFR, Type::Debug);
391 
392             using metadata = org::open_power::Witherspoon::Fault::
393                 PowerSupplyOutputOvercurrent;
394 
395             report<PowerSupplyOutputOvercurrent>(
396                 metadata::RAW_STATUS(nv.get().c_str()),
397                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
398 
399             faultFound = true;
400         }
401     }
402 }
403 
404 void PowerSupply::checkOutputOvervoltageFault(const uint16_t statusWord)
405 {
406     using namespace phosphor::pmbus;
407 
408     if (outputOVFault < FAULT_COUNT)
409     {
410         // Check for an output overvoltage fault.
411         if (statusWord & status_word::VOUT_OV_FAULT)
412         {
413             outputOVFault++;
414         }
415         else
416         {
417             if (outputOVFault > 0)
418             {
419                 outputOVFault = 0;
420             }
421         }
422 
423         if (!faultFound && (outputOVFault >= FAULT_COUNT))
424         {
425             util::NamesValues nv;
426             nv.add("STATUS_WORD", statusWord);
427             captureCmd(nv, STATUS_INPUT, Type::Debug);
428             auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0);
429             captureCmd(nv, status0Vout, Type::Debug);
430             captureCmd(nv, STATUS_IOUT, Type::Debug);
431             captureCmd(nv, STATUS_MFR, Type::Debug);
432 
433             using metadata = org::open_power::Witherspoon::Fault::
434                 PowerSupplyOutputOvervoltage;
435 
436             report<PowerSupplyOutputOvervoltage>(
437                 metadata::RAW_STATUS(nv.get().c_str()),
438                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
439 
440             faultFound = true;
441         }
442     }
443 }
444 
445 void PowerSupply::checkFanFault(const uint16_t statusWord)
446 {
447     using namespace phosphor::pmbus;
448 
449     if (fanFault < FAULT_COUNT)
450     {
451         // Check for a fan fault or warning condition
452         if (statusWord & status_word::FAN_FAULT)
453         {
454             fanFault++;
455         }
456         else
457         {
458             if (fanFault > 0)
459             {
460                 fanFault = 0;
461             }
462         }
463 
464         if (!faultFound && (fanFault >= FAULT_COUNT))
465         {
466             util::NamesValues nv;
467             nv.add("STATUS_WORD", statusWord);
468             captureCmd(nv, STATUS_MFR, Type::Debug);
469             captureCmd(nv, STATUS_TEMPERATURE, Type::Debug);
470             captureCmd(nv, STATUS_FANS_1_2, Type::Debug);
471 
472             using metadata =
473                 org::open_power::Witherspoon::Fault::PowerSupplyFanFault;
474 
475             report<PowerSupplyFanFault>(
476                 metadata::RAW_STATUS(nv.get().c_str()),
477                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
478 
479             faultFound = true;
480         }
481     }
482 }
483 
484 void PowerSupply::checkTemperatureFault(const uint16_t statusWord)
485 {
486     using namespace phosphor::pmbus;
487 
488     // Due to how the PMBus core device driver sends a clear faults command
489     // the bit in STATUS_WORD will likely be cleared when we attempt to examine
490     // it for a Thermal Fault or Warning. So, check the STATUS_WORD and the
491     // STATUS_TEMPERATURE bits. If either indicates a fault, proceed with
492     // logging the over-temperature condition.
493     std::uint8_t statusTemperature = 0;
494     statusTemperature = pmbusIntf.read(STATUS_TEMPERATURE, Type::Debug);
495     if (temperatureFault < FAULT_COUNT)
496     {
497         if ((statusWord & status_word::TEMPERATURE_FAULT_WARN) ||
498             (statusTemperature & status_temperature::OT_FAULT))
499         {
500             temperatureFault++;
501         }
502         else
503         {
504             if (temperatureFault > 0)
505             {
506                 temperatureFault = 0;
507             }
508         }
509 
510         if (!faultFound && (temperatureFault >= FAULT_COUNT))
511         {
512             // The power supply has had an over-temperature condition.
513             // This may not result in a shutdown if experienced for a short
514             // duration.
515             // This should not occur under normal conditions.
516             // The power supply may be faulty, or the paired supply may be
517             // putting out less current.
518             // Capture command responses with potentially relevant information,
519             // and call out the power supply reporting the condition.
520             util::NamesValues nv;
521             nv.add("STATUS_WORD", statusWord);
522             captureCmd(nv, STATUS_MFR, Type::Debug);
523             captureCmd(nv, STATUS_IOUT, Type::Debug);
524             nv.add("STATUS_TEMPERATURE", statusTemperature);
525             captureCmd(nv, STATUS_FANS_1_2, Type::Debug);
526 
527             using metadata = org::open_power::Witherspoon::Fault::
528                 PowerSupplyTemperatureFault;
529 
530             report<PowerSupplyTemperatureFault>(
531                 metadata::RAW_STATUS(nv.get().c_str()),
532                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
533 
534             faultFound = true;
535         }
536     }
537 }
538 
539 void PowerSupply::clearFaults()
540 {
541     readFail = 0;
542     readFailLogged = false;
543     inputFault = 0;
544     powerOnFault = 0;
545     outputOCFault = 0;
546     outputOVFault = 0;
547     fanFault = 0;
548     temperatureFault = 0;
549     faultFound = false;
550 
551     return;
552 }
553 
554 void PowerSupply::resolveError(const std::string& callout,
555                                const std::string& message)
556 {
557     using EndpointList = std::vector<std::string>;
558 
559     try
560     {
561         auto path = callout + "/fault";
562         // Get the service name from the mapper for the fault callout
563         auto service = util::getService(path, ASSOCIATION_IFACE, bus);
564 
565         // Use getProperty utility function to get log entries (endpoints)
566         EndpointList logEntries;
567         util::getProperty(ASSOCIATION_IFACE, ENDPOINTS_PROP, path, service, bus,
568                           logEntries);
569 
570         // It is possible that all such entries for this callout have since
571         // been deleted.
572         if (logEntries.empty())
573         {
574             return;
575         }
576 
577         auto logEntryService =
578             util::getService(logEntries[0], LOGGING_IFACE, bus);
579         if (logEntryService.empty())
580         {
581             return;
582         }
583 
584         // go through each log entry that matches this callout path
585         std::string logMessage;
586         for (const auto& logEntry : logEntries)
587         {
588             // Check to see if this logEntry has a message that matches.
589             util::getProperty(LOGGING_IFACE, MESSAGE_PROP, logEntry,
590                               logEntryService, bus, logMessage);
591 
592             if (message == logMessage)
593             {
594                 // Log entry matches call out and message, set Resolved to true
595                 bool resolved = true;
596                 util::setProperty(LOGGING_IFACE, RESOLVED_PROP, logEntry,
597                                   logEntryService, bus, resolved);
598             }
599         }
600     }
601     catch (std::exception& e)
602     {
603         log<level::INFO>("Failed to resolve error",
604                          entry("CALLOUT=%s", callout.c_str()),
605                          entry("ERROR=%s", message.c_str()));
606     }
607 }
608 
609 void PowerSupply::updateInventory()
610 {
611     using namespace phosphor::pmbus;
612     using namespace sdbusplus::message;
613 
614     // Build the object map and send it to the inventory
615     using Properties = std::map<std::string, variant<std::string>>;
616     using Interfaces = std::map<std::string, Properties>;
617     using Object = std::map<object_path, Interfaces>;
618     Properties assetProps;
619     Interfaces interfaces;
620     Object object;
621 
622     // If any of these accesses fail, the fields will just be
623     // blank in the inventory.  Leave logging ReadFailure errors
624     // to analyze() as it runs continuously and will most
625     // likely hit and threshold them first anyway.  The
626     // readString() function will do the tracing of the failing
627     // path so this code doesn't need to.
628     for (const auto& fru : fruJson.at("fruConfigs"))
629     {
630         if (fru.at("interface") == ASSET_IFACE)
631         {
632             try
633             {
634                 assetProps.emplace(
635                     fru.at("propertyName"),
636                     present ? pmbusIntf.readString(fru.at("fileName"),
637                                                    inventoryPMBusAccessType)
638                             : "");
639             }
640             catch (ReadFailure& e)
641             {
642             }
643         }
644     }
645 
646     interfaces.emplace(ASSET_IFACE, std::move(assetProps));
647 
648     // For Notify(), just send the relative path of the inventory
649     // object so remove the INVENTORY_OBJ_PATH prefix
650     auto path = inventoryPath.substr(strlen(INVENTORY_OBJ_PATH));
651 
652     object.emplace(path, std::move(interfaces));
653 
654     try
655     {
656         auto service =
657             util::getService(INVENTORY_OBJ_PATH, INVENTORY_MGR_IFACE, bus);
658 
659         if (service.empty())
660         {
661             log<level::ERR>("Unable to get inventory manager service");
662             return;
663         }
664 
665         auto method = bus.new_method_call(service.c_str(), INVENTORY_OBJ_PATH,
666                                           INVENTORY_MGR_IFACE, "Notify");
667 
668         method.append(std::move(object));
669 
670         auto reply = bus.call(method);
671     }
672     catch (std::exception& e)
673     {
674         log<level::ERR>(e.what(), entry("PATH=%s", inventoryPath.c_str()));
675     }
676 }
677 
678 void PowerSupply::syncHistory()
679 {
680     using namespace phosphor::gpio;
681 
682     if (syncGPIODevPath.empty())
683     {
684         // Sync not implemented
685         return;
686     }
687 
688     GPIO gpio{syncGPIODevPath, static_cast<gpioNum_t>(syncGPIONumber),
689               Direction::output};
690 
691     try
692     {
693         gpio.set(Value::low);
694 
695         std::this_thread::sleep_for(std::chrono::milliseconds{5});
696 
697         gpio.set(Value::high);
698 
699         recordManager->clear();
700     }
701     catch (std::exception& e)
702     {
703         // Do nothing.  There would already be a journal entry.
704     }
705 }
706 
707 void PowerSupply::enableHistory(const std::string& objectPath,
708                                 size_t numRecords,
709                                 const std::string& syncGPIOPath,
710                                 size_t syncGPIONum)
711 {
712     historyObjectPath = objectPath;
713     syncGPIODevPath = syncGPIOPath;
714     syncGPIONumber = syncGPIONum;
715 
716     recordManager = std::make_unique<history::RecordManager>(numRecords);
717 
718     auto avgPath = historyObjectPath + '/' + history::Average::name;
719     auto maxPath = historyObjectPath + '/' + history::Maximum::name;
720 
721     average = std::make_unique<history::Average>(bus, avgPath);
722 
723     maximum = std::make_unique<history::Maximum>(bus, maxPath);
724 }
725 
726 void PowerSupply::updateHistory()
727 {
728     if (!recordManager)
729     {
730         // Not enabled
731         return;
732     }
733 
734     // Read just the most recent average/max record
735     auto data =
736         pmbusIntf.readBinary(INPUT_HISTORY, pmbus::Type::HwmonDeviceDebug,
737                              history::RecordManager::RAW_RECORD_SIZE);
738 
739     // Update D-Bus only if something changed (a new record ID, or cleared out)
740     auto changed = recordManager->add(data);
741     if (changed)
742     {
743         average->values(std::move(recordManager->getAverageRecords()));
744         maximum->values(std::move(recordManager->getMaximumRecords()));
745     }
746 }
747 
748 } // namespace psu
749 } // namespace power
750 } // namespace phosphor
751