xref: /openbmc/phosphor-fan-presence/sensor-monitor/threshold_alarm_logger.cpp (revision cd4d31b309c6716e3e8fa2cad08c93880265c8ef)
1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "threshold_alarm_logger.hpp"
19 
20 #include "sdbusplus.hpp"
21 
22 #include <unistd.h>
23 
24 #include <phosphor-logging/lg2.hpp>
25 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
26 
27 namespace sensor::monitor
28 {
29 
30 using namespace sdbusplus::xyz::openbmc_project::Logging::server;
31 using namespace phosphor::fan;
32 using namespace phosphor::fan::util;
33 
34 const std::string warningInterface =
35     "xyz.openbmc_project.Sensor.Threshold.Warning";
36 const std::string criticalInterface =
37     "xyz.openbmc_project.Sensor.Threshold.Critical";
38 const std::string perfLossInterface =
39     "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
40 constexpr auto loggingService = "xyz.openbmc_project.Logging";
41 constexpr auto loggingPath = "/xyz/openbmc_project/logging";
42 constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
43 constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
44 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
45 constexpr auto assocInterface = "xyz.openbmc_project.Association";
46 
47 const std::vector<std::string> thresholdIfaceNames{
48     warningInterface, criticalInterface, perfLossInterface};
49 
50 using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
51 
52 /**
53  * Map of threshold interfaces and alarm properties and values to error data.
54  */
55 const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
56     thresholdData{
57 
58         {warningInterface,
59          {{"WarningAlarmHigh",
60            {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
61             {false,
62              ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
63           {"WarningAlarmLow",
64            {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
65             {false,
66              ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
67 
68         {criticalInterface,
69          {{"CriticalAlarmHigh",
70            {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
71             {false,
72              ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
73           {"CriticalAlarmLow",
74            {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
75             {false, ErrorData{"CriticalLow", "Clear",
76                               Entry::Level::Informational}}}}}},
77 
78         {perfLossInterface,
79          {{"PerfLossAlarmHigh",
80            {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
81             {false, ErrorData{"PerformanceLossHigh", "Clear",
82                               Entry::Level::Informational}}}},
83           {"PerfLossAlarmLow",
84            {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
85             {false, ErrorData{"PerformanceLossLow", "Clear",
86                               Entry::Level::Informational}}}}}}};
87 
ThresholdAlarmLogger(sdbusplus::bus_t & bus,std::shared_ptr<PowerState> powerState)88 ThresholdAlarmLogger::ThresholdAlarmLogger(
89     sdbusplus::bus_t& bus, std::shared_ptr<PowerState> powerState) :
90     bus(bus), _powerState(std::move(powerState)),
91     warningMatch(bus,
92                  "type='signal',member='PropertiesChanged',"
93                  "path_namespace='/xyz/openbmc_project/sensors',"
94                  "arg0='" +
95                      warningInterface + "'",
96                  std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
97                            std::placeholders::_1)),
98     criticalMatch(bus,
99                   "type='signal',member='PropertiesChanged',"
100                   "path_namespace='/xyz/openbmc_project/sensors',"
101                   "arg0='" +
102                       criticalInterface + "'",
103                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
104                             std::placeholders::_1)),
105     perfLossMatch(bus,
106                   "type='signal',member='PropertiesChanged',"
107                   "path_namespace='/xyz/openbmc_project/sensors',"
108                   "arg0='" +
109                       perfLossInterface + "'",
110                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
111                             std::placeholders::_1)),
112     ifacesRemovedMatch(bus,
113                        "type='signal',member='InterfacesRemoved',arg0path="
114                        "'/xyz/openbmc_project/sensors/'",
115                        std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
116                                  std::placeholders::_1)),
117     ifacesAddedMatch(bus,
118                      "type='signal',member='InterfacesAdded',arg0path="
119                      "'/xyz/openbmc_project/sensors/'",
120                      std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
121                                std::placeholders::_1))
122 {
123     _powerState->addCallback("thresholdMon",
124                              std::bind(&ThresholdAlarmLogger::powerStateChanged,
125                                        this, std::placeholders::_1));
126 
127     // check for any currently asserted threshold alarms
128     std::for_each(
129         thresholdData.begin(), thresholdData.end(),
130         [this](const auto& thresholdInterface) {
131             const auto& interface = thresholdInterface.first;
132             auto objects =
133                 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
134             std::for_each(objects.begin(), objects.end(),
135                           [interface, this](const auto& object) {
136                               const auto& path = object.first;
137                               const auto& service =
138                                   object.second.begin()->first;
139                               this->checkThresholds(interface, path, service);
140                           });
141         });
142 }
143 
propertiesChanged(sdbusplus::message_t & msg)144 void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
145 {
146     std::map<std::string, std::variant<bool>> properties;
147     std::string sensorPath = msg.get_path();
148     std::string interface;
149 
150     msg.read(interface, properties);
151 
152     checkProperties(sensorPath, interface, properties);
153 }
154 
interfacesRemoved(sdbusplus::message_t & msg)155 void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
156 {
157     sdbusplus::message::object_path path;
158     std::vector<std::string> interfaces;
159 
160     msg.read(path, interfaces);
161 
162     for (const auto& interface : interfaces)
163     {
164         if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
165                       interface) != thresholdIfaceNames.end())
166         {
167             alarms.erase(InterfaceKey{path, interface});
168         }
169     }
170 }
171 
interfacesAdded(sdbusplus::message_t & msg)172 void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
173 {
174     sdbusplus::message::object_path path;
175     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
176 
177     msg.read(path, interfaces);
178 
179     for (const auto& [interface, properties] : interfaces)
180     {
181         if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
182                       interface) != thresholdIfaceNames.end())
183         {
184             checkProperties(path, interface, properties);
185         }
186     }
187 }
188 
checkProperties(const std::string & sensorPath,const std::string & interface,const std::map<std::string,std::variant<bool>> & properties)189 void ThresholdAlarmLogger::checkProperties(
190     const std::string& sensorPath, const std::string& interface,
191     const std::map<std::string, std::variant<bool>>& properties)
192 {
193     auto alarmProperties = thresholdData.find(interface);
194     if (alarmProperties == thresholdData.end())
195     {
196         return;
197     }
198 
199     for (const auto& [propertyName, propertyValue] : properties)
200     {
201         if (alarmProperties->second.find(propertyName) !=
202             alarmProperties->second.end())
203         {
204             // If this is the first time we've seen this alarm, then
205             // assume it was off before so it doesn't create an event
206             // log for a value of false.
207 
208             InterfaceKey key{sensorPath, interface};
209             if (alarms.find(key) == alarms.end())
210             {
211                 alarms[key][propertyName] = false;
212             }
213 
214             // Check if the value changed from what was there before.
215             auto alarmValue = std::get<bool>(propertyValue);
216             if (alarmValue != alarms[key][propertyName])
217             {
218                 alarms[key][propertyName] = alarmValue;
219 #ifndef SKIP_POWER_CHECKING
220                 if (_powerState->isPowerOn())
221 #endif
222                 {
223                     createEventLog(sensorPath, interface, propertyName,
224                                    alarmValue);
225                 }
226             }
227         }
228     }
229 }
230 
checkThresholds(const std::string & interface,const std::string & sensorPath,const std::string & service)231 void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
232                                            const std::string& sensorPath,
233                                            const std::string& service)
234 {
235     auto properties = thresholdData.find(interface);
236     if (properties == thresholdData.end())
237     {
238         return;
239     }
240 
241     for (const auto& [property, unused] : properties->second)
242     {
243         try
244         {
245             auto alarmValue = SDBusPlus::getProperty<bool>(
246                 bus, service, sensorPath, interface, property);
247             alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
248 
249             // This is just for checking alarms on startup,
250             // so only look for active alarms.
251 #ifdef SKIP_POWER_CHECKING
252             if (alarmValue)
253 #else
254             if (alarmValue && _powerState->isPowerOn())
255 #endif
256             {
257                 createEventLog(sensorPath, interface, property, alarmValue);
258             }
259         }
260         catch (const sdbusplus::exception_t& e)
261         {
262             // Sensor daemons that get their direction from entity manager
263             // may only be putting either the high alarm or low alarm on
264             // D-Bus, not both.
265             continue;
266         }
267     }
268 }
269 
createEventLog(const std::string & sensorPath,const std::string & interface,const std::string & alarmProperty,bool alarmValue)270 void ThresholdAlarmLogger::createEventLog(
271     const std::string& sensorPath, const std::string& interface,
272     const std::string& alarmProperty, bool alarmValue)
273 {
274     std::map<std::string, std::string> ad;
275 
276     auto type = getSensorType(sensorPath);
277     if (skipSensorType(type))
278     {
279         return;
280     }
281 
282     auto it = thresholdData.find(interface);
283     if (it == thresholdData.end())
284     {
285         return;
286     }
287 
288     auto properties = it->second.find(alarmProperty);
289     if (properties == it->second.end())
290     {
291         lg2::info("Could not find {ALARM_PROPERTY} in threshold alarms map",
292                   "ALARM_PROPERTY", alarmProperty);
293         return;
294     }
295 
296     ad.emplace("SENSOR_NAME", sensorPath);
297     ad.emplace("_PID", std::to_string(getpid()));
298 
299     try
300     {
301         auto sensorValue = SDBusPlus::getProperty<double>(
302             bus, sensorPath, valueInterface, "Value");
303 
304         ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
305 
306         lg2::info(
307             "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (sensor value {SENSOR_VALUE})",
308             "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
309             "ALARM_VALUE", alarmValue, "SENSOR_VALUE", sensorValue);
310     }
311     catch (const DBusServiceError& e)
312     {
313         // If the sensor was just added, the Value interface for it may
314         // not be in the mapper yet.  This could only happen if the sensor
315         // application was started up after this one and the value exceeded the
316         // threshold immediately.
317         lg2::info(
318             "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
319             "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
320             "ALARM_VALUE", alarmValue);
321     }
322 
323     auto callout = getCallout(sensorPath);
324     if (!callout.empty())
325     {
326         ad.emplace("CALLOUT_INVENTORY_PATH", callout);
327     }
328 
329     auto errorData = properties->second.find(alarmValue);
330 
331     // Add the base error name and the sensor type (like Temperature) to the
332     // error name that's in the thresholdData name to get something like
333     // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
334     const auto& [name, status, severity] = errorData->second;
335 
336     try
337     {
338         auto thresholdValue =
339             SDBusPlus::getProperty<double>(bus, sensorPath, interface, name);
340 
341         ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
342 
343         lg2::info(
344             "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (threshold value {THRESHOLD_VALUE})",
345             "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
346             "ALARM_VALUE", alarmValue, "THRESHOLD_VALUE", thresholdValue);
347     }
348     catch (const DBusServiceError& e)
349     {
350         lg2::info(
351             "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
352             "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
353             "ALARM_VALUE", alarmValue);
354     }
355 
356     type.front() = toupper(type.front());
357     std::string errorName = errorNameBase + type + name + status;
358     if (LOG_SENSOR_NAME_ON_ERROR != 0)
359     {
360         errorName += " on sensor " + getSensorName(sensorPath);
361     }
362 
363     SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
364                           "Create", errorName, convertForMessage(severity), ad);
365 }
366 
getSensorName(const std::string & sensorPath)367 std::string ThresholdAlarmLogger::getSensorName(const std::string& sensorPath)
368 {
369     auto pos = sensorPath.find_last_of('/');
370     if ((sensorPath.back() == '/') || (pos == std::string::npos))
371     {
372         lg2::error("Cannot get sensor name from sensor path {SENSOR_PATH}",
373                    "SENSOR_PATH", sensorPath);
374         return "unknown_sensor";
375     }
376 
377     return sensorPath.substr(pos + 1);
378 }
379 
getSensorType(std::string sensorPath)380 std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
381 {
382     auto pos = sensorPath.find_last_of('/');
383     if ((sensorPath.back() == '/') || (pos == std::string::npos))
384     {
385         lg2::error("Cannot get sensor type from sensor path {SENSOR_PATH}",
386                    "SENSOR_PATH", sensorPath);
387         throw std::runtime_error("Invalid sensor path");
388     }
389 
390     sensorPath = sensorPath.substr(0, pos);
391     return sensorPath.substr(sensorPath.find_last_of('/') + 1);
392 }
393 
skipSensorType(const std::string & type)394 bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
395 {
396     return (type == "utilization");
397 }
398 
getCallout(const std::string & sensorPath)399 std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
400 {
401     const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
402 
403     // Different implementations handle the association to the FRU
404     // differently:
405     //  * phosphor-inventory-manager uses the 'inventory' association
406     //    to point to the FRU.
407     //  * dbus-sensors/entity-manager uses the 'chassis' association'.
408     //  * For virtual sensors, no association.
409 
410     for (const auto& assocType : assocTypes)
411     {
412         auto assocPath = sensorPath + "/" + assocType;
413 
414         try
415         {
416             auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
417                 bus, assocPath, assocInterface, "endpoints");
418 
419             if (!endpoints.empty())
420             {
421                 return endpoints[0];
422             }
423         }
424         catch (const DBusServiceError& e)
425         {
426             // The association doesn't exist
427             continue;
428         }
429     }
430 
431     return std::string{};
432 }
433 
powerStateChanged(bool powerStateOn)434 void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
435 {
436     if (powerStateOn)
437     {
438         checkThresholds();
439     }
440 }
441 
checkThresholds()442 void ThresholdAlarmLogger::checkThresholds()
443 {
444     std::vector<InterfaceKey> toErase;
445 
446     for (const auto& [interfaceKey, alarmMap] : alarms)
447     {
448         for (const auto& [propertyName, alarmValue] : alarmMap)
449         {
450             if (alarmValue)
451             {
452                 const auto& sensorPath = std::get<0>(interfaceKey);
453                 const auto& interface = std::get<1>(interfaceKey);
454                 std::string service;
455 
456                 try
457                 {
458                     // Check that the service that provides the alarm is still
459                     // running, because if it died when the alarm was active
460                     // there would be no indication of it unless we listened
461                     // for NameOwnerChanged and tracked services, and this is
462                     // easier.
463                     service = SDBusPlus::getService(bus, sensorPath, interface);
464                 }
465                 catch (const DBusServiceError& e)
466                 {
467                     // No longer on D-Bus delete the alarm entry
468                     toErase.emplace_back(sensorPath, interface);
469                 }
470 
471                 if (!service.empty())
472                 {
473                     createEventLog(sensorPath, interface, propertyName,
474                                    alarmValue);
475                 }
476             }
477         }
478     }
479 
480     for (const auto& e : toErase)
481     {
482         alarms.erase(e);
483     }
484 }
485 
486 } // namespace sensor::monitor
487