1 /**
2  * Copyright © 2017 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "power_supply.hpp"
19 
20 #include "elog-errors.hpp"
21 #include "gpio.hpp"
22 #include "names_values.hpp"
23 #include "pmbus.hpp"
24 #include "types.hpp"
25 #include "utility.hpp"
26 
27 #include <org/open_power/Witherspoon/Fault/error.hpp>
28 #include <phosphor-logging/log.hpp>
29 #include <xyz/openbmc_project/Common/Device/error.hpp>
30 
31 #include <functional>
32 
33 namespace phosphor
34 {
35 namespace power
36 {
37 namespace psu
38 {
39 
40 using namespace phosphor::logging;
41 using namespace sdbusplus::org::open_power::Witherspoon::Fault::Error;
42 using namespace sdbusplus::xyz::openbmc_project::Common::Device::Error;
43 
44 PowerSupply::PowerSupply(const std::string& name, size_t inst,
45                          const std::string& objpath, const std::string& invpath,
46                          sdbusplus::bus_t& bus, const sdeventplus::Event& e,
47                          std::chrono::seconds& t, std::chrono::seconds& p) :
48     Device(name, inst),
49     monitorPath(objpath), pmbusIntf(objpath),
50     inventoryPath(INVENTORY_OBJ_PATH + invpath), bus(bus), presentInterval(p),
51     presentTimer(e, std::bind([this]() {
52                      // The hwmon path may have changed.
53                      pmbusIntf.findHwmonDir();
54                      this->present = true;
55 
56                      // Sync the INPUT_HISTORY data for all PSs
57                      syncHistory();
58 
59                      // Update the inventory for the new device
60                      updateInventory();
61                  })),
62     powerOnInterval(t),
63     powerOnTimer(e, std::bind([this]() { this->powerOn = true; }))
64 {
65     getAccessType();
66 
67     using namespace sdbusplus::bus;
68     using namespace phosphor::pmbus;
69     std::uint16_t statusWord = 0;
70     try
71     {
72         // Read the 2 byte STATUS_WORD value to check for faults.
73         statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug);
74         if (!((statusWord & status_word::INPUT_FAULT_WARN) ||
75               (statusWord & status_word::VIN_UV_FAULT)))
76         {
77             resolveError(inventoryPath,
78                          std::string(PowerSupplyInputFault::errName));
79         }
80     }
81     catch (const ReadFailure& e)
82     {
83         log<level::INFO>("Unable to read the 2 byte STATUS_WORD value to check "
84                          "for power-supply input faults.");
85     }
86     presentMatch = std::make_unique<match_t>(
87         bus, match::rules::propertiesChanged(inventoryPath, INVENTORY_IFACE),
88         [this](auto& msg) { this->inventoryChanged(msg); });
89     // Get initial presence state.
90     updatePresence();
91 
92     // Write the SN, PN, etc to the inventory
93     updateInventory();
94 
95     // Subscribe to power state changes
96     powerOnMatch = std::make_unique<match_t>(
97         bus, match::rules::propertiesChanged(POWER_OBJ_PATH, POWER_IFACE),
98         [this](auto& msg) { this->powerStateChanged(msg); });
99     // Get initial power state.
100     updatePowerState();
101 }
102 
103 void PowerSupply::getAccessType()
104 {
105     using namespace phosphor::power::util;
106     fruJson = loadJSONFromFile(PSU_JSON_PATH);
107     if (fruJson == nullptr)
108     {
109         log<level::ERR>("InternalFailure when parsing the JSON file");
110         return;
111     }
112     inventoryPMBusAccessType = getPMBusAccessType(fruJson);
113 }
114 
115 void PowerSupply::captureCmd(util::NamesValues& nv, const std::string& cmd,
116                              phosphor::pmbus::Type type)
117 {
118     if (pmbusIntf.exists(cmd, type))
119     {
120         try
121         {
122             auto val = pmbusIntf.read(cmd, type);
123             nv.add(cmd, val);
124         }
125         catch (const std::exception& e)
126         {
127             log<level::INFO>("Unable to capture metadata",
128                              entry("CMD=%s", cmd.c_str()));
129         }
130     }
131 }
132 
133 void PowerSupply::analyze()
134 {
135     using namespace phosphor::pmbus;
136 
137     try
138     {
139         if (present)
140         {
141             std::uint16_t statusWord = 0;
142 
143             // Read the 2 byte STATUS_WORD value to check for faults.
144             statusWord = pmbusIntf.read(STATUS_WORD, Type::Debug);
145             readFail = 0;
146 
147             checkInputFault(statusWord);
148 
149             if (powerOn && (inputFault == 0) && !faultFound)
150             {
151                 checkFanFault(statusWord);
152                 checkTemperatureFault(statusWord);
153                 checkOutputOvervoltageFault(statusWord);
154                 checkCurrentOutOverCurrentFault(statusWord);
155                 checkPGOrUnitOffFault(statusWord);
156             }
157 
158             updateHistory();
159         }
160     }
161     catch (const ReadFailure& e)
162     {
163         if (readFail < FAULT_COUNT)
164         {
165             readFail++;
166         }
167 
168         if (!readFailLogged && readFail >= FAULT_COUNT)
169         {
170             commit<ReadFailure>();
171             readFailLogged = true;
172         }
173     }
174 
175     return;
176 }
177 
178 void PowerSupply::inventoryChanged(sdbusplus::message_t& msg)
179 {
180     std::string msgSensor;
181     std::map<std::string, std::variant<uint32_t, bool>> msgData;
182     msg.read(msgSensor, msgData);
183 
184     // Check if it was the Present property that changed.
185     auto valPropMap = msgData.find(PRESENT_PROP);
186     if (valPropMap != msgData.end())
187     {
188         if (std::get<bool>(valPropMap->second))
189         {
190             clearFaults();
191             presentTimer.restartOnce(presentInterval);
192         }
193         else
194         {
195             present = false;
196             presentTimer.setEnabled(false);
197 
198             // Clear out the now outdated inventory properties
199             updateInventory();
200         }
201     }
202 
203     return;
204 }
205 
206 void PowerSupply::updatePresence()
207 {
208     // Use getProperty utility function to get presence status.
209     std::string service = "xyz.openbmc_project.Inventory.Manager";
210     util::getProperty(INVENTORY_IFACE, PRESENT_PROP, inventoryPath, service,
211                       bus, this->present);
212 }
213 
214 void PowerSupply::powerStateChanged(sdbusplus::message_t& msg)
215 {
216     int32_t state = 0;
217     std::string msgSensor;
218     std::map<std::string, std::variant<int32_t>> msgData;
219     msg.read(msgSensor, msgData);
220 
221     // Check if it was the Present property that changed.
222     auto valPropMap = msgData.find("state");
223     if (valPropMap != msgData.end())
224     {
225         state = std::get<int32_t>(valPropMap->second);
226 
227         // Power is on when state=1. Set the fault logged variables to false
228         // and start the power on timer when the state changes to 1.
229         if (state)
230         {
231             clearFaults();
232             powerOnTimer.restartOnce(powerOnInterval);
233         }
234         else
235         {
236             powerOnTimer.setEnabled(false);
237             powerOn = false;
238         }
239     }
240 }
241 
242 void PowerSupply::updatePowerState()
243 {
244     powerOn = util::isPoweredOn(bus);
245 }
246 
247 void PowerSupply::checkInputFault(const uint16_t statusWord)
248 {
249     using namespace phosphor::pmbus;
250 
251     if ((inputFault < FAULT_COUNT) &&
252         ((statusWord & status_word::INPUT_FAULT_WARN) ||
253          (statusWord & status_word::VIN_UV_FAULT)))
254     {
255         if (inputFault == 0)
256         {
257             log<level::INFO>("INPUT or VIN_UV fault",
258                              entry("STATUS_WORD=0x%04X", statusWord));
259         }
260 
261         inputFault++;
262     }
263     else
264     {
265         if ((inputFault > 0) && !(statusWord & status_word::INPUT_FAULT_WARN) &&
266             !(statusWord & status_word::VIN_UV_FAULT))
267         {
268             inputFault = 0;
269             faultFound = false;
270             // When an input fault occurs, the power supply cannot be on.
271             // However, the check for the case where the power supply should be
272             // on will stop when there is a fault found.
273             // Clear the powerOnFault when the inputFault is cleared to reset
274             // the powerOnFault de-glitching.
275             powerOnFault = 0;
276 
277             log<level::INFO>("INPUT_FAULT_WARN cleared",
278                              entry("POWERSUPPLY=%s", inventoryPath.c_str()));
279 
280             resolveError(inventoryPath,
281                          std::string(PowerSupplyInputFault::errName));
282 
283             if (powerOn)
284             {
285                 // The power supply will not be immediately powered on after
286                 // the input power is restored.
287                 powerOn = false;
288                 // Start up the timer that will set the state to indicate we
289                 // are ready for the powered on fault checks.
290                 powerOnTimer.restartOnce(powerOnInterval);
291             }
292         }
293     }
294 
295     if (!faultFound && (inputFault >= FAULT_COUNT))
296     {
297         // If the power is on, report the fault in an error log entry.
298         if (powerOn)
299         {
300             util::NamesValues nv;
301             nv.add("STATUS_WORD", statusWord);
302             captureCmd(nv, STATUS_INPUT, Type::Debug);
303 
304             using metadata =
305                 org::open_power::Witherspoon::Fault::PowerSupplyInputFault;
306 
307             report<PowerSupplyInputFault>(
308                 metadata::RAW_STATUS(nv.get().c_str()),
309                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
310 
311             faultFound = true;
312         }
313     }
314 }
315 
316 void PowerSupply::checkPGOrUnitOffFault(const uint16_t statusWord)
317 {
318     using namespace phosphor::pmbus;
319 
320     if (powerOnFault < FAULT_COUNT)
321     {
322         // Check PG# and UNIT_IS_OFF
323         if ((statusWord & status_word::POWER_GOOD_NEGATED) ||
324             (statusWord & status_word::UNIT_IS_OFF))
325         {
326             log<level::INFO>("PGOOD or UNIT_IS_OFF bit bad",
327                              entry("STATUS_WORD=0x%04X", statusWord));
328             powerOnFault++;
329         }
330         else
331         {
332             if (powerOnFault > 0)
333             {
334                 log<level::INFO>("PGOOD and UNIT_IS_OFF bits good");
335                 powerOnFault = 0;
336             }
337         }
338 
339         if (!faultFound && (powerOnFault >= FAULT_COUNT))
340         {
341             faultFound = true;
342 
343             util::NamesValues nv;
344             nv.add("STATUS_WORD", statusWord);
345             captureCmd(nv, STATUS_INPUT, Type::Debug);
346             auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0);
347             captureCmd(nv, status0Vout, Type::Debug);
348             captureCmd(nv, STATUS_IOUT, Type::Debug);
349             captureCmd(nv, STATUS_MFR, Type::Debug);
350 
351             using metadata =
352                 org::open_power::Witherspoon::Fault::PowerSupplyShouldBeOn;
353 
354             // A power supply is OFF (or pgood low) but should be on.
355             report<PowerSupplyShouldBeOn>(
356                 metadata::RAW_STATUS(nv.get().c_str()),
357                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
358         }
359     }
360 }
361 
362 void PowerSupply::checkCurrentOutOverCurrentFault(const uint16_t statusWord)
363 {
364     using namespace phosphor::pmbus;
365 
366     if (outputOCFault < FAULT_COUNT)
367     {
368         // Check for an output overcurrent fault.
369         if ((statusWord & status_word::IOUT_OC_FAULT))
370         {
371             outputOCFault++;
372         }
373         else
374         {
375             if (outputOCFault > 0)
376             {
377                 outputOCFault = 0;
378             }
379         }
380 
381         if (!faultFound && (outputOCFault >= FAULT_COUNT))
382         {
383             util::NamesValues nv;
384             nv.add("STATUS_WORD", statusWord);
385             captureCmd(nv, STATUS_INPUT, Type::Debug);
386             auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0);
387             captureCmd(nv, status0Vout, Type::Debug);
388             captureCmd(nv, STATUS_IOUT, Type::Debug);
389             captureCmd(nv, STATUS_MFR, Type::Debug);
390 
391             using metadata = org::open_power::Witherspoon::Fault::
392                 PowerSupplyOutputOvercurrent;
393 
394             report<PowerSupplyOutputOvercurrent>(
395                 metadata::RAW_STATUS(nv.get().c_str()),
396                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
397 
398             faultFound = true;
399         }
400     }
401 }
402 
403 void PowerSupply::checkOutputOvervoltageFault(const uint16_t statusWord)
404 {
405     using namespace phosphor::pmbus;
406 
407     if (outputOVFault < FAULT_COUNT)
408     {
409         // Check for an output overvoltage fault.
410         if (statusWord & status_word::VOUT_OV_FAULT)
411         {
412             outputOVFault++;
413         }
414         else
415         {
416             if (outputOVFault > 0)
417             {
418                 outputOVFault = 0;
419             }
420         }
421 
422         if (!faultFound && (outputOVFault >= FAULT_COUNT))
423         {
424             util::NamesValues nv;
425             nv.add("STATUS_WORD", statusWord);
426             captureCmd(nv, STATUS_INPUT, Type::Debug);
427             auto status0Vout = pmbusIntf.insertPageNum(STATUS_VOUT, 0);
428             captureCmd(nv, status0Vout, Type::Debug);
429             captureCmd(nv, STATUS_IOUT, Type::Debug);
430             captureCmd(nv, STATUS_MFR, Type::Debug);
431 
432             using metadata = org::open_power::Witherspoon::Fault::
433                 PowerSupplyOutputOvervoltage;
434 
435             report<PowerSupplyOutputOvervoltage>(
436                 metadata::RAW_STATUS(nv.get().c_str()),
437                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
438 
439             faultFound = true;
440         }
441     }
442 }
443 
444 void PowerSupply::checkFanFault(const uint16_t statusWord)
445 {
446     using namespace phosphor::pmbus;
447 
448     if (fanFault < FAULT_COUNT)
449     {
450         // Check for a fan fault or warning condition
451         if (statusWord & status_word::FAN_FAULT)
452         {
453             fanFault++;
454         }
455         else
456         {
457             if (fanFault > 0)
458             {
459                 fanFault = 0;
460             }
461         }
462 
463         if (!faultFound && (fanFault >= FAULT_COUNT))
464         {
465             util::NamesValues nv;
466             nv.add("STATUS_WORD", statusWord);
467             captureCmd(nv, STATUS_MFR, Type::Debug);
468             captureCmd(nv, STATUS_TEMPERATURE, Type::Debug);
469             captureCmd(nv, STATUS_FANS_1_2, Type::Debug);
470 
471             using metadata =
472                 org::open_power::Witherspoon::Fault::PowerSupplyFanFault;
473 
474             report<PowerSupplyFanFault>(
475                 metadata::RAW_STATUS(nv.get().c_str()),
476                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
477 
478             faultFound = true;
479         }
480     }
481 }
482 
483 void PowerSupply::checkTemperatureFault(const uint16_t statusWord)
484 {
485     using namespace phosphor::pmbus;
486 
487     // Due to how the PMBus core device driver sends a clear faults command
488     // the bit in STATUS_WORD will likely be cleared when we attempt to examine
489     // it for a Thermal Fault or Warning. So, check the STATUS_WORD and the
490     // STATUS_TEMPERATURE bits. If either indicates a fault, proceed with
491     // logging the over-temperature condition.
492     std::uint8_t statusTemperature = 0;
493     statusTemperature = pmbusIntf.read(STATUS_TEMPERATURE, Type::Debug);
494     if (temperatureFault < FAULT_COUNT)
495     {
496         if ((statusWord & status_word::TEMPERATURE_FAULT_WARN) ||
497             (statusTemperature & status_temperature::OT_FAULT))
498         {
499             temperatureFault++;
500         }
501         else
502         {
503             if (temperatureFault > 0)
504             {
505                 temperatureFault = 0;
506             }
507         }
508 
509         if (!faultFound && (temperatureFault >= FAULT_COUNT))
510         {
511             // The power supply has had an over-temperature condition.
512             // This may not result in a shutdown if experienced for a short
513             // duration.
514             // This should not occur under normal conditions.
515             // The power supply may be faulty, or the paired supply may be
516             // putting out less current.
517             // Capture command responses with potentially relevant information,
518             // and call out the power supply reporting the condition.
519             util::NamesValues nv;
520             nv.add("STATUS_WORD", statusWord);
521             captureCmd(nv, STATUS_MFR, Type::Debug);
522             captureCmd(nv, STATUS_IOUT, Type::Debug);
523             nv.add("STATUS_TEMPERATURE", statusTemperature);
524             captureCmd(nv, STATUS_FANS_1_2, Type::Debug);
525 
526             using metadata = org::open_power::Witherspoon::Fault::
527                 PowerSupplyTemperatureFault;
528 
529             report<PowerSupplyTemperatureFault>(
530                 metadata::RAW_STATUS(nv.get().c_str()),
531                 metadata::CALLOUT_INVENTORY_PATH(inventoryPath.c_str()));
532 
533             faultFound = true;
534         }
535     }
536 }
537 
538 void PowerSupply::clearFaults()
539 {
540     readFail = 0;
541     readFailLogged = false;
542     inputFault = 0;
543     powerOnFault = 0;
544     outputOCFault = 0;
545     outputOVFault = 0;
546     fanFault = 0;
547     temperatureFault = 0;
548     faultFound = false;
549 
550     return;
551 }
552 
553 void PowerSupply::resolveError(const std::string& callout,
554                                const std::string& message)
555 {
556     using EndpointList = std::vector<std::string>;
557 
558     try
559     {
560         auto path = callout + "/fault";
561         // Get the service name from the mapper for the fault callout
562         auto service = util::getService(path, ASSOCIATION_IFACE, bus);
563 
564         // Use getProperty utility function to get log entries (endpoints)
565         EndpointList logEntries;
566         util::getProperty(ASSOCIATION_IFACE, ENDPOINTS_PROP, path, service, bus,
567                           logEntries);
568 
569         // It is possible that all such entries for this callout have since
570         // been deleted.
571         if (logEntries.empty())
572         {
573             return;
574         }
575 
576         auto logEntryService = util::getService(logEntries[0], LOGGING_IFACE,
577                                                 bus);
578         if (logEntryService.empty())
579         {
580             return;
581         }
582 
583         // go through each log entry that matches this callout path
584         std::string logMessage;
585         for (const auto& logEntry : logEntries)
586         {
587             // Check to see if this logEntry has a message that matches.
588             util::getProperty(LOGGING_IFACE, MESSAGE_PROP, logEntry,
589                               logEntryService, bus, logMessage);
590 
591             if (message == logMessage)
592             {
593                 // Log entry matches call out and message, set Resolved to true
594                 bool resolved = true;
595                 util::setProperty(LOGGING_IFACE, RESOLVED_PROP, logEntry,
596                                   logEntryService, bus, resolved);
597             }
598         }
599     }
600     catch (const std::exception& e)
601     {
602         log<level::INFO>("Failed to resolve error",
603                          entry("CALLOUT=%s", callout.c_str()),
604                          entry("ERROR=%s", message.c_str()));
605     }
606 }
607 
608 void PowerSupply::updateInventory()
609 {
610     using namespace phosphor::pmbus;
611     using namespace sdbusplus::message;
612 
613     // Build the object map and send it to the inventory
614     using Properties = std::map<std::string, std::variant<std::string, bool>>;
615     using Interfaces = std::map<std::string, Properties>;
616     using Object = std::map<object_path, Interfaces>;
617     Properties assetProps;
618     Properties operProps;
619     Interfaces interfaces;
620     Object object;
621 
622     // If any of these accesses fail, the fields will just be
623     // blank in the inventory.  Leave logging ReadFailure errors
624     // to analyze() as it runs continuously and will most
625     // likely hit and threshold them first anyway.  The
626     // readString() function will do the tracing of the failing
627     // path so this code doesn't need to.
628     for (const auto& fru : fruJson.at("fruConfigs"))
629     {
630         if (fru.at("interface") == ASSET_IFACE)
631         {
632             try
633             {
634                 assetProps.emplace(
635                     fru.at("propertyName"),
636                     present ? pmbusIntf.readString(fru.at("fileName"),
637                                                    inventoryPMBusAccessType)
638                             : "");
639             }
640             catch (const ReadFailure& e)
641             {}
642         }
643     }
644 
645     operProps.emplace(FUNCTIONAL_PROP, present);
646     interfaces.emplace(ASSET_IFACE, std::move(assetProps));
647     interfaces.emplace(OPERATIONAL_STATE_IFACE, std::move(operProps));
648 
649     // For Notify(), just send the relative path of the inventory
650     // object so remove the INVENTORY_OBJ_PATH prefix
651     auto path = inventoryPath.substr(strlen(INVENTORY_OBJ_PATH));
652 
653     object.emplace(path, std::move(interfaces));
654 
655     try
656     {
657         auto service = util::getService(INVENTORY_OBJ_PATH, INVENTORY_MGR_IFACE,
658                                         bus);
659 
660         if (service.empty())
661         {
662             log<level::ERR>("Unable to get inventory manager service");
663             return;
664         }
665 
666         auto method = bus.new_method_call(service.c_str(), INVENTORY_OBJ_PATH,
667                                           INVENTORY_MGR_IFACE, "Notify");
668 
669         method.append(std::move(object));
670 
671         auto reply = bus.call(method);
672     }
673     catch (const std::exception& e)
674     {
675         log<level::ERR>(e.what(), entry("PATH=%s", inventoryPath.c_str()));
676     }
677 }
678 
679 void PowerSupply::syncHistory()
680 {
681     using namespace phosphor::gpio;
682 
683     if (syncGPIODevPath.empty())
684     {
685         // Sync not implemented
686         return;
687     }
688 
689     GPIO gpio{syncGPIODevPath, static_cast<gpioNum_t>(syncGPIONumber),
690               Direction::output};
691 
692     try
693     {
694         gpio.set(Value::low);
695 
696         std::this_thread::sleep_for(std::chrono::milliseconds{5});
697 
698         gpio.set(Value::high);
699 
700         recordManager->clear();
701     }
702     catch (const std::exception& e)
703     {
704         // Do nothing.  There would already be a journal entry.
705     }
706 }
707 
708 void PowerSupply::enableHistory(const std::string& objectPath,
709                                 size_t numRecords,
710                                 const std::string& syncGPIOPath,
711                                 size_t syncGPIONum)
712 {
713     historyObjectPath = objectPath;
714     syncGPIODevPath = syncGPIOPath;
715     syncGPIONumber = syncGPIONum;
716 
717     recordManager = std::make_unique<history::RecordManager>(numRecords);
718 
719     auto avgPath = historyObjectPath + '/' + history::Average::name;
720     auto maxPath = historyObjectPath + '/' + history::Maximum::name;
721 
722     average = std::make_unique<history::Average>(bus, avgPath);
723 
724     maximum = std::make_unique<history::Maximum>(bus, maxPath);
725 }
726 
727 void PowerSupply::updateHistory()
728 {
729     if (!recordManager)
730     {
731         // Not enabled
732         return;
733     }
734 
735     // Read just the most recent average/max record
736     auto data = pmbusIntf.readBinary(INPUT_HISTORY,
737                                      pmbus::Type::HwmonDeviceDebug,
738                                      history::RecordManager::RAW_RECORD_SIZE);
739 
740     // Update D-Bus only if something changed (a new record ID, or cleared out)
741     auto changed = recordManager->add(data);
742     if (changed)
743     {
744         average->values(recordManager->getAverageRecords());
745         maximum->values(recordManager->getMaximumRecords());
746     }
747 }
748 
749 } // namespace psu
750 } // namespace power
751 } // namespace phosphor
752