1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "threshold_alarm_logger.hpp"
17 
18 #include "sdbusplus.hpp"
19 
20 #include <unistd.h>
21 
22 #include <phosphor-logging/log.hpp>
23 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
24 
25 #include <format>
26 
27 namespace sensor::monitor
28 {
29 
30 using namespace sdbusplus::xyz::openbmc_project::Logging::server;
31 using namespace phosphor::logging;
32 using namespace phosphor::fan;
33 using namespace phosphor::fan::util;
34 
35 const std::string warningInterface =
36     "xyz.openbmc_project.Sensor.Threshold.Warning";
37 const std::string criticalInterface =
38     "xyz.openbmc_project.Sensor.Threshold.Critical";
39 const std::string perfLossInterface =
40     "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
41 constexpr auto loggingService = "xyz.openbmc_project.Logging";
42 constexpr auto loggingPath = "/xyz/openbmc_project/logging";
43 constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
44 constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
45 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
46 constexpr auto assocInterface = "xyz.openbmc_project.Association";
47 
48 const std::vector<std::string> thresholdIfaceNames{
49     warningInterface, criticalInterface, perfLossInterface};
50 
51 using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
52 
53 /**
54  * Map of threshold interfaces and alarm properties and values to error data.
55  */
56 const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
57     thresholdData{
58 
59         {warningInterface,
60          {{"WarningAlarmHigh",
61            {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
62             {false,
63              ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
64           {"WarningAlarmLow",
65            {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
66             {false,
67              ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
68 
69         {criticalInterface,
70          {{"CriticalAlarmHigh",
71            {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
72             {false,
73              ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
74           {"CriticalAlarmLow",
75            {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
76             {false, ErrorData{"CriticalLow", "Clear",
77                               Entry::Level::Informational}}}}}},
78 
79         {perfLossInterface,
80          {{"PerfLossAlarmHigh",
81            {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
82             {false, ErrorData{"PerformanceLossHigh", "Clear",
83                               Entry::Level::Informational}}}},
84           {"PerfLossAlarmLow",
85            {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
86             {false, ErrorData{"PerformanceLossLow", "Clear",
87                               Entry::Level::Informational}}}}}}};
88 
ThresholdAlarmLogger(sdbusplus::bus_t & bus,sdeventplus::Event & event,std::shared_ptr<PowerState> powerState)89 ThresholdAlarmLogger::ThresholdAlarmLogger(
90     sdbusplus::bus_t& bus, sdeventplus::Event& event,
91     std::shared_ptr<PowerState> powerState) :
92     bus(bus),
93     event(event), _powerState(std::move(powerState)),
94     warningMatch(bus,
95                  "type='signal',member='PropertiesChanged',"
96                  "path_namespace='/xyz/openbmc_project/sensors',"
97                  "arg0='" +
98                      warningInterface + "'",
99                  std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
100                            std::placeholders::_1)),
101     criticalMatch(bus,
102                   "type='signal',member='PropertiesChanged',"
103                   "path_namespace='/xyz/openbmc_project/sensors',"
104                   "arg0='" +
105                       criticalInterface + "'",
106                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
107                             std::placeholders::_1)),
108     perfLossMatch(bus,
109                   "type='signal',member='PropertiesChanged',"
110                   "path_namespace='/xyz/openbmc_project/sensors',"
111                   "arg0='" +
112                       perfLossInterface + "'",
113                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
114                             std::placeholders::_1)),
115     ifacesRemovedMatch(bus,
116                        "type='signal',member='InterfacesRemoved',arg0path="
117                        "'/xyz/openbmc_project/sensors/'",
118                        std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
119                                  std::placeholders::_1)),
120     ifacesAddedMatch(bus,
121                      "type='signal',member='InterfacesAdded',arg0path="
122                      "'/xyz/openbmc_project/sensors/'",
123                      std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
124                                std::placeholders::_1))
125 {
126     _powerState->addCallback("thresholdMon",
127                              std::bind(&ThresholdAlarmLogger::powerStateChanged,
128                                        this, std::placeholders::_1));
129 
130     // check for any currently asserted threshold alarms
131     std::for_each(thresholdData.begin(), thresholdData.end(),
132                   [this](const auto& thresholdInterface) {
133         const auto& interface = thresholdInterface.first;
134         auto objects = SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
135         std::for_each(objects.begin(), objects.end(),
136                       [interface, this](const auto& object) {
137             const auto& path = object.first;
138             const auto& service = object.second.begin()->first;
139             checkThresholds(interface, path, service);
140         });
141     });
142 }
143 
propertiesChanged(sdbusplus::message_t & msg)144 void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
145 {
146     std::map<std::string, std::variant<bool>> properties;
147     std::string sensorPath = msg.get_path();
148     std::string interface;
149 
150     msg.read(interface, properties);
151 
152     checkProperties(sensorPath, interface, properties);
153 }
154 
interfacesRemoved(sdbusplus::message_t & msg)155 void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
156 {
157     sdbusplus::message::object_path path;
158     std::vector<std::string> interfaces;
159 
160     msg.read(path, interfaces);
161 
162     for (const auto& interface : interfaces)
163     {
164         if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
165                       interface) != thresholdIfaceNames.end())
166         {
167             alarms.erase(InterfaceKey{path, interface});
168         }
169     }
170 }
171 
interfacesAdded(sdbusplus::message_t & msg)172 void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
173 {
174     sdbusplus::message::object_path path;
175     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
176 
177     msg.read(path, interfaces);
178 
179     for (const auto& [interface, properties] : interfaces)
180     {
181         if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
182                       interface) != thresholdIfaceNames.end())
183         {
184             checkProperties(path, interface, properties);
185         }
186     }
187 }
188 
checkProperties(const std::string & sensorPath,const std::string & interface,const std::map<std::string,std::variant<bool>> & properties)189 void ThresholdAlarmLogger::checkProperties(
190     const std::string& sensorPath, const std::string& interface,
191     const std::map<std::string, std::variant<bool>>& properties)
192 {
193     auto alarmProperties = thresholdData.find(interface);
194     if (alarmProperties == thresholdData.end())
195     {
196         return;
197     }
198 
199     for (const auto& [propertyName, propertyValue] : properties)
200     {
201         if (alarmProperties->second.find(propertyName) !=
202             alarmProperties->second.end())
203         {
204             // If this is the first time we've seen this alarm, then
205             // assume it was off before so it doesn't create an event
206             // log for a value of false.
207 
208             InterfaceKey key{sensorPath, interface};
209             if (alarms.find(key) == alarms.end())
210             {
211                 alarms[key][propertyName] = false;
212             }
213 
214             // Check if the value changed from what was there before.
215             auto alarmValue = std::get<bool>(propertyValue);
216             if (alarmValue != alarms[key][propertyName])
217             {
218                 alarms[key][propertyName] = alarmValue;
219 
220                 if (_powerState->isPowerOn())
221                 {
222                     createEventLog(sensorPath, interface, propertyName,
223                                    alarmValue);
224                 }
225             }
226         }
227     }
228 }
229 
checkThresholds(const std::string & interface,const std::string & sensorPath,const std::string & service)230 void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
231                                            const std::string& sensorPath,
232                                            const std::string& service)
233 {
234     auto properties = thresholdData.find(interface);
235     if (properties == thresholdData.end())
236     {
237         return;
238     }
239 
240     for (const auto& [property, unused] : properties->second)
241     {
242         try
243         {
244             auto alarmValue = SDBusPlus::getProperty<bool>(
245                 bus, service, sensorPath, interface, property);
246             alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
247 
248             // This is just for checking alarms on startup,
249             // so only look for active alarms.
250             if (alarmValue && _powerState->isPowerOn())
251             {
252                 createEventLog(sensorPath, interface, property, alarmValue);
253             }
254         }
255         catch (const sdbusplus::exception_t& e)
256         {
257             // Sensor daemons that get their direction from entity manager
258             // may only be putting either the high alarm or low alarm on
259             // D-Bus, not both.
260             continue;
261         }
262     }
263 }
264 
createEventLog(const std::string & sensorPath,const std::string & interface,const std::string & alarmProperty,bool alarmValue)265 void ThresholdAlarmLogger::createEventLog(const std::string& sensorPath,
266                                           const std::string& interface,
267                                           const std::string& alarmProperty,
268                                           bool alarmValue)
269 {
270     std::map<std::string, std::string> ad;
271 
272     auto type = getSensorType(sensorPath);
273     if (skipSensorType(type))
274     {
275         return;
276     }
277 
278     auto it = thresholdData.find(interface);
279     if (it == thresholdData.end())
280     {
281         return;
282     }
283 
284     auto properties = it->second.find(alarmProperty);
285     if (properties == it->second.end())
286     {
287         log<level::INFO>(
288             std::format("Could not find {} in threshold alarms map",
289                         alarmProperty)
290                 .c_str());
291         return;
292     }
293 
294     ad.emplace("SENSOR_NAME", sensorPath);
295     ad.emplace("_PID", std::to_string(getpid()));
296 
297     try
298     {
299         auto sensorValue = SDBusPlus::getProperty<double>(
300             bus, sensorPath, valueInterface, "Value");
301 
302         ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
303 
304         log<level::INFO>(
305             std::format("Threshold Event {} {} = {} (sensor value {})",
306                         sensorPath, alarmProperty, alarmValue, sensorValue)
307                 .c_str());
308     }
309     catch (const DBusServiceError& e)
310     {
311         // If the sensor was just added, the Value interface for it may
312         // not be in the mapper yet.  This could only happen if the sensor
313         // application was started up after this one and the value exceeded the
314         // threshold immediately.
315         log<level::INFO>(std::format("Threshold Event {} {} = {}", sensorPath,
316                                      alarmProperty, alarmValue)
317                              .c_str());
318     }
319 
320     auto callout = getCallout(sensorPath);
321     if (!callout.empty())
322     {
323         ad.emplace("CALLOUT_INVENTORY_PATH", callout);
324     }
325 
326     auto errorData = properties->second.find(alarmValue);
327 
328     // Add the base error name and the sensor type (like Temperature) to the
329     // error name that's in the thresholdData name to get something like
330     // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
331     const auto& [name, status, severity] = errorData->second;
332 
333     try
334     {
335         auto thresholdValue = SDBusPlus::getProperty<double>(bus, sensorPath,
336                                                              interface, name);
337 
338         ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
339 
340         log<level::INFO>(
341             std::format("Threshold Event {} {} = {} (threshold value {})",
342                         sensorPath, alarmProperty, alarmValue, thresholdValue)
343                 .c_str());
344     }
345     catch (const DBusServiceError& e)
346     {
347         log<level::INFO>(std::format("Threshold Event {} {} = {}", sensorPath,
348                                      alarmProperty, alarmValue)
349                              .c_str());
350     }
351 
352     type.front() = toupper(type.front());
353     std::string errorName = errorNameBase + type + name + status;
354 
355     SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
356                           "Create", errorName, convertForMessage(severity), ad);
357 }
358 
getSensorType(std::string sensorPath)359 std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
360 {
361     auto pos = sensorPath.find_last_of('/');
362     if ((sensorPath.back() == '/') || (pos == std::string::npos))
363     {
364         log<level::ERR>(
365             std::format("Cannot get sensor type from sensor path {}",
366                         sensorPath)
367                 .c_str());
368         throw std::runtime_error("Invalid sensor path");
369     }
370 
371     sensorPath = sensorPath.substr(0, pos);
372     return sensorPath.substr(sensorPath.find_last_of('/') + 1);
373 }
374 
skipSensorType(const std::string & type)375 bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
376 {
377     return (type == "utilization");
378 }
379 
getCallout(const std::string & sensorPath)380 std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
381 {
382     const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
383 
384     // Different implementations handle the association to the FRU
385     // differently:
386     //  * phosphor-inventory-manager uses the 'inventory' association
387     //    to point to the FRU.
388     //  * dbus-sensors/entity-manager uses the 'chassis' association'.
389     //  * For virtual sensors, no association.
390 
391     for (const auto& assocType : assocTypes)
392     {
393         auto assocPath = sensorPath + "/" + assocType;
394 
395         try
396         {
397             auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
398                 bus, assocPath, assocInterface, "endpoints");
399 
400             if (!endpoints.empty())
401             {
402                 return endpoints[0];
403             }
404         }
405         catch (const DBusServiceError& e)
406         {
407             // The association doesn't exist
408             continue;
409         }
410     }
411 
412     return std::string{};
413 }
414 
powerStateChanged(bool powerStateOn)415 void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
416 {
417     if (powerStateOn)
418     {
419         checkThresholds();
420     }
421 }
422 
checkThresholds()423 void ThresholdAlarmLogger::checkThresholds()
424 {
425     std::vector<InterfaceKey> toErase;
426 
427     for (const auto& [interfaceKey, alarmMap] : alarms)
428     {
429         for (const auto& [propertyName, alarmValue] : alarmMap)
430         {
431             if (alarmValue)
432             {
433                 const auto& sensorPath = std::get<0>(interfaceKey);
434                 const auto& interface = std::get<1>(interfaceKey);
435                 std::string service;
436 
437                 try
438                 {
439                     // Check that the service that provides the alarm is still
440                     // running, because if it died when the alarm was active
441                     // there would be no indication of it unless we listened
442                     // for NameOwnerChanged and tracked services, and this is
443                     // easier.
444                     service = SDBusPlus::getService(bus, sensorPath, interface);
445                 }
446                 catch (const DBusServiceError& e)
447                 {
448                     // No longer on D-Bus delete the alarm entry
449                     toErase.emplace_back(sensorPath, interface);
450                 }
451 
452                 if (!service.empty())
453                 {
454                     createEventLog(sensorPath, interface, propertyName,
455                                    alarmValue);
456                 }
457             }
458         }
459     }
460 
461     for (const auto& e : toErase)
462     {
463         alarms.erase(e);
464     }
465 }
466 
467 } // namespace sensor::monitor
468