xref: /openbmc/phosphor-fan-presence/sensor-monitor/threshold_alarm_logger.cpp (revision 32c4feff9b1cd1fd4d75bb530761f70f0e5922e0)
1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "threshold_alarm_logger.hpp"
19 
20 #include "sdbusplus.hpp"
21 
22 #include <unistd.h>
23 
24 #include <phosphor-logging/lg2.hpp>
25 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
26 
27 namespace sensor::monitor
28 {
29 
30 using namespace sdbusplus::xyz::openbmc_project::Logging::server;
31 using namespace phosphor::fan;
32 using namespace phosphor::fan::util;
33 
34 const std::string warningInterface =
35     "xyz.openbmc_project.Sensor.Threshold.Warning";
36 const std::string criticalInterface =
37     "xyz.openbmc_project.Sensor.Threshold.Critical";
38 const std::string perfLossInterface =
39     "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
40 constexpr auto loggingService = "xyz.openbmc_project.Logging";
41 constexpr auto loggingPath = "/xyz/openbmc_project/logging";
42 constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
43 constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
44 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
45 constexpr auto assocInterface = "xyz.openbmc_project.Association";
46 
47 const std::vector<std::string> thresholdIfaceNames{
48     warningInterface, criticalInterface, perfLossInterface};
49 
50 using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
51 
52 /**
53  * Map of threshold interfaces and alarm properties and values to error data.
54  */
55 const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
56     thresholdData{
57 
58         {warningInterface,
59          {{"WarningAlarmHigh",
60            {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
61             {false,
62              ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
63           {"WarningAlarmLow",
64            {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
65             {false,
66              ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
67 
68         {criticalInterface,
69          {{"CriticalAlarmHigh",
70            {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
71             {false,
72              ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
73           {"CriticalAlarmLow",
74            {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
75             {false, ErrorData{"CriticalLow", "Clear",
76                               Entry::Level::Informational}}}}}},
77 
78         {perfLossInterface,
79          {{"PerfLossAlarmHigh",
80            {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
81             {false, ErrorData{"PerformanceLossHigh", "Clear",
82                               Entry::Level::Informational}}}},
83           {"PerfLossAlarmLow",
84            {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
85             {false, ErrorData{"PerformanceLossLow", "Clear",
86                               Entry::Level::Informational}}}}}}};
87 
ThresholdAlarmLogger(sdbusplus::bus_t & bus,sdeventplus::Event & event,std::shared_ptr<PowerState> powerState)88 ThresholdAlarmLogger::ThresholdAlarmLogger(
89     sdbusplus::bus_t& bus, sdeventplus::Event& event,
90     std::shared_ptr<PowerState> powerState) :
91     bus(bus), event(event), _powerState(std::move(powerState)),
92     warningMatch(bus,
93                  "type='signal',member='PropertiesChanged',"
94                  "path_namespace='/xyz/openbmc_project/sensors',"
95                  "arg0='" +
96                      warningInterface + "'",
97                  std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
98                            std::placeholders::_1)),
99     criticalMatch(bus,
100                   "type='signal',member='PropertiesChanged',"
101                   "path_namespace='/xyz/openbmc_project/sensors',"
102                   "arg0='" +
103                       criticalInterface + "'",
104                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
105                             std::placeholders::_1)),
106     perfLossMatch(bus,
107                   "type='signal',member='PropertiesChanged',"
108                   "path_namespace='/xyz/openbmc_project/sensors',"
109                   "arg0='" +
110                       perfLossInterface + "'",
111                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
112                             std::placeholders::_1)),
113     ifacesRemovedMatch(bus,
114                        "type='signal',member='InterfacesRemoved',arg0path="
115                        "'/xyz/openbmc_project/sensors/'",
116                        std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
117                                  std::placeholders::_1)),
118     ifacesAddedMatch(bus,
119                      "type='signal',member='InterfacesAdded',arg0path="
120                      "'/xyz/openbmc_project/sensors/'",
121                      std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
122                                std::placeholders::_1))
123 {
124     _powerState->addCallback("thresholdMon",
125                              std::bind(&ThresholdAlarmLogger::powerStateChanged,
126                                        this, std::placeholders::_1));
127 
128     // check for any currently asserted threshold alarms
129     std::for_each(
130         thresholdData.begin(), thresholdData.end(),
131         [this](const auto& thresholdInterface) {
132             const auto& interface = thresholdInterface.first;
133             auto objects =
134                 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
135             std::for_each(objects.begin(), objects.end(),
136                           [interface, this](const auto& object) {
137                               const auto& path = object.first;
138                               const auto& service =
139                                   object.second.begin()->first;
140                               checkThresholds(interface, path, service);
141                           });
142         });
143 }
144 
propertiesChanged(sdbusplus::message_t & msg)145 void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
146 {
147     std::map<std::string, std::variant<bool>> properties;
148     std::string sensorPath = msg.get_path();
149     std::string interface;
150 
151     msg.read(interface, properties);
152 
153     checkProperties(sensorPath, interface, properties);
154 }
155 
interfacesRemoved(sdbusplus::message_t & msg)156 void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
157 {
158     sdbusplus::message::object_path path;
159     std::vector<std::string> interfaces;
160 
161     msg.read(path, interfaces);
162 
163     for (const auto& interface : interfaces)
164     {
165         if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
166                       interface) != thresholdIfaceNames.end())
167         {
168             alarms.erase(InterfaceKey{path, interface});
169         }
170     }
171 }
172 
interfacesAdded(sdbusplus::message_t & msg)173 void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
174 {
175     sdbusplus::message::object_path path;
176     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
177 
178     msg.read(path, interfaces);
179 
180     for (const auto& [interface, properties] : interfaces)
181     {
182         if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
183                       interface) != thresholdIfaceNames.end())
184         {
185             checkProperties(path, interface, properties);
186         }
187     }
188 }
189 
checkProperties(const std::string & sensorPath,const std::string & interface,const std::map<std::string,std::variant<bool>> & properties)190 void ThresholdAlarmLogger::checkProperties(
191     const std::string& sensorPath, const std::string& interface,
192     const std::map<std::string, std::variant<bool>>& properties)
193 {
194     auto alarmProperties = thresholdData.find(interface);
195     if (alarmProperties == thresholdData.end())
196     {
197         return;
198     }
199 
200     for (const auto& [propertyName, propertyValue] : properties)
201     {
202         if (alarmProperties->second.find(propertyName) !=
203             alarmProperties->second.end())
204         {
205             // If this is the first time we've seen this alarm, then
206             // assume it was off before so it doesn't create an event
207             // log for a value of false.
208 
209             InterfaceKey key{sensorPath, interface};
210             if (alarms.find(key) == alarms.end())
211             {
212                 alarms[key][propertyName] = false;
213             }
214 
215             // Check if the value changed from what was there before.
216             auto alarmValue = std::get<bool>(propertyValue);
217             if (alarmValue != alarms[key][propertyName])
218             {
219                 alarms[key][propertyName] = alarmValue;
220 #ifndef SKIP_POWER_CHECKING
221                 if (_powerState->isPowerOn())
222 #endif
223                 {
224                     createEventLog(sensorPath, interface, propertyName,
225                                    alarmValue);
226                 }
227             }
228         }
229     }
230 }
231 
checkThresholds(const std::string & interface,const std::string & sensorPath,const std::string & service)232 void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
233                                            const std::string& sensorPath,
234                                            const std::string& service)
235 {
236     auto properties = thresholdData.find(interface);
237     if (properties == thresholdData.end())
238     {
239         return;
240     }
241 
242     for (const auto& [property, unused] : properties->second)
243     {
244         try
245         {
246             auto alarmValue = SDBusPlus::getProperty<bool>(
247                 bus, service, sensorPath, interface, property);
248             alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
249 
250             // This is just for checking alarms on startup,
251             // so only look for active alarms.
252 #ifdef SKIP_POWER_CHECKING
253             if (alarmValue)
254 #else
255             if (alarmValue && _powerState->isPowerOn())
256 #endif
257             {
258                 createEventLog(sensorPath, interface, property, alarmValue);
259             }
260         }
261         catch (const sdbusplus::exception_t& e)
262         {
263             // Sensor daemons that get their direction from entity manager
264             // may only be putting either the high alarm or low alarm on
265             // D-Bus, not both.
266             continue;
267         }
268     }
269 }
270 
createEventLog(const std::string & sensorPath,const std::string & interface,const std::string & alarmProperty,bool alarmValue)271 void ThresholdAlarmLogger::createEventLog(
272     const std::string& sensorPath, const std::string& interface,
273     const std::string& alarmProperty, bool alarmValue)
274 {
275     std::map<std::string, std::string> ad;
276 
277     auto type = getSensorType(sensorPath);
278     if (skipSensorType(type))
279     {
280         return;
281     }
282 
283     auto it = thresholdData.find(interface);
284     if (it == thresholdData.end())
285     {
286         return;
287     }
288 
289     auto properties = it->second.find(alarmProperty);
290     if (properties == it->second.end())
291     {
292         lg2::info("Could not find {ALARM_PROPERTY} in threshold alarms map",
293                   "ALARM_PROPERTY", alarmProperty);
294         return;
295     }
296 
297     ad.emplace("SENSOR_NAME", sensorPath);
298     ad.emplace("_PID", std::to_string(getpid()));
299 
300     try
301     {
302         auto sensorValue = SDBusPlus::getProperty<double>(
303             bus, sensorPath, valueInterface, "Value");
304 
305         ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
306 
307         lg2::info(
308             "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (sensor value {SENSOR_VALUE})",
309             "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
310             "ALARM_VALUE", alarmValue, "SENSOR_VALUE", sensorValue);
311     }
312     catch (const DBusServiceError& e)
313     {
314         // If the sensor was just added, the Value interface for it may
315         // not be in the mapper yet.  This could only happen if the sensor
316         // application was started up after this one and the value exceeded the
317         // threshold immediately.
318         lg2::info(
319             "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
320             "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
321             "ALARM_VALUE", alarmValue);
322     }
323 
324     auto callout = getCallout(sensorPath);
325     if (!callout.empty())
326     {
327         ad.emplace("CALLOUT_INVENTORY_PATH", callout);
328     }
329 
330     auto errorData = properties->second.find(alarmValue);
331 
332     // Add the base error name and the sensor type (like Temperature) to the
333     // error name that's in the thresholdData name to get something like
334     // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
335     const auto& [name, status, severity] = errorData->second;
336 
337     try
338     {
339         auto thresholdValue =
340             SDBusPlus::getProperty<double>(bus, sensorPath, interface, name);
341 
342         ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
343 
344         lg2::info(
345             "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (threshold value {THRESHOLD_VALUE})",
346             "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
347             "ALARM_VALUE", alarmValue, "THRESHOLD_VALUE", thresholdValue);
348     }
349     catch (const DBusServiceError& e)
350     {
351         lg2::info(
352             "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
353             "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
354             "ALARM_VALUE", alarmValue);
355     }
356 
357     type.front() = toupper(type.front());
358     std::string errorName = errorNameBase + type + name + status;
359     if (LOG_SENSOR_NAME_ON_ERROR != 0)
360     {
361         errorName += " on sensor " + getSensorName(sensorPath);
362     }
363 
364     SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
365                           "Create", errorName, convertForMessage(severity), ad);
366 }
367 
getSensorName(const std::string & sensorPath)368 std::string ThresholdAlarmLogger::getSensorName(const std::string& sensorPath)
369 {
370     auto pos = sensorPath.find_last_of('/');
371     if ((sensorPath.back() == '/') || (pos == std::string::npos))
372     {
373         lg2::error("Cannot get sensor name from sensor path {SENSOR_PATH}",
374                    "SENSOR_PATH", sensorPath);
375         return "unknown_sensor";
376     }
377 
378     return sensorPath.substr(pos + 1);
379 }
380 
getSensorType(std::string sensorPath)381 std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
382 {
383     auto pos = sensorPath.find_last_of('/');
384     if ((sensorPath.back() == '/') || (pos == std::string::npos))
385     {
386         lg2::error("Cannot get sensor type from sensor path {SENSOR_PATH}",
387                    "SENSOR_PATH", sensorPath);
388         throw std::runtime_error("Invalid sensor path");
389     }
390 
391     sensorPath = sensorPath.substr(0, pos);
392     return sensorPath.substr(sensorPath.find_last_of('/') + 1);
393 }
394 
skipSensorType(const std::string & type)395 bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
396 {
397     return (type == "utilization");
398 }
399 
getCallout(const std::string & sensorPath)400 std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
401 {
402     const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
403 
404     // Different implementations handle the association to the FRU
405     // differently:
406     //  * phosphor-inventory-manager uses the 'inventory' association
407     //    to point to the FRU.
408     //  * dbus-sensors/entity-manager uses the 'chassis' association'.
409     //  * For virtual sensors, no association.
410 
411     for (const auto& assocType : assocTypes)
412     {
413         auto assocPath = sensorPath + "/" + assocType;
414 
415         try
416         {
417             auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
418                 bus, assocPath, assocInterface, "endpoints");
419 
420             if (!endpoints.empty())
421             {
422                 return endpoints[0];
423             }
424         }
425         catch (const DBusServiceError& e)
426         {
427             // The association doesn't exist
428             continue;
429         }
430     }
431 
432     return std::string{};
433 }
434 
powerStateChanged(bool powerStateOn)435 void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
436 {
437     if (powerStateOn)
438     {
439         checkThresholds();
440     }
441 }
442 
checkThresholds()443 void ThresholdAlarmLogger::checkThresholds()
444 {
445     std::vector<InterfaceKey> toErase;
446 
447     for (const auto& [interfaceKey, alarmMap] : alarms)
448     {
449         for (const auto& [propertyName, alarmValue] : alarmMap)
450         {
451             if (alarmValue)
452             {
453                 const auto& sensorPath = std::get<0>(interfaceKey);
454                 const auto& interface = std::get<1>(interfaceKey);
455                 std::string service;
456 
457                 try
458                 {
459                     // Check that the service that provides the alarm is still
460                     // running, because if it died when the alarm was active
461                     // there would be no indication of it unless we listened
462                     // for NameOwnerChanged and tracked services, and this is
463                     // easier.
464                     service = SDBusPlus::getService(bus, sensorPath, interface);
465                 }
466                 catch (const DBusServiceError& e)
467                 {
468                     // No longer on D-Bus delete the alarm entry
469                     toErase.emplace_back(sensorPath, interface);
470                 }
471 
472                 if (!service.empty())
473                 {
474                     createEventLog(sensorPath, interface, propertyName,
475                                    alarmValue);
476                 }
477             }
478         }
479     }
480 
481     for (const auto& e : toErase)
482     {
483         alarms.erase(e);
484     }
485 }
486 
487 } // namespace sensor::monitor
488