xref: /openbmc/phosphor-fan-presence/sensor-monitor/threshold_alarm_logger.cpp (revision 32c4feff9b1cd1fd4d75bb530761f70f0e5922e0)
1  /**
2   * Copyright © 2021 IBM Corporation
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   *     http://www.apache.org/licenses/LICENSE-2.0
9   *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  #include "config.h"
17  
18  #include "threshold_alarm_logger.hpp"
19  
20  #include "sdbusplus.hpp"
21  
22  #include <unistd.h>
23  
24  #include <phosphor-logging/lg2.hpp>
25  #include <xyz/openbmc_project/Logging/Entry/server.hpp>
26  
27  namespace sensor::monitor
28  {
29  
30  using namespace sdbusplus::xyz::openbmc_project::Logging::server;
31  using namespace phosphor::fan;
32  using namespace phosphor::fan::util;
33  
34  const std::string warningInterface =
35      "xyz.openbmc_project.Sensor.Threshold.Warning";
36  const std::string criticalInterface =
37      "xyz.openbmc_project.Sensor.Threshold.Critical";
38  const std::string perfLossInterface =
39      "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
40  constexpr auto loggingService = "xyz.openbmc_project.Logging";
41  constexpr auto loggingPath = "/xyz/openbmc_project/logging";
42  constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
43  constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
44  constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
45  constexpr auto assocInterface = "xyz.openbmc_project.Association";
46  
47  const std::vector<std::string> thresholdIfaceNames{
48      warningInterface, criticalInterface, perfLossInterface};
49  
50  using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
51  
52  /**
53   * Map of threshold interfaces and alarm properties and values to error data.
54   */
55  const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
56      thresholdData{
57  
58          {warningInterface,
59           {{"WarningAlarmHigh",
60             {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
61              {false,
62               ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
63            {"WarningAlarmLow",
64             {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
65              {false,
66               ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
67  
68          {criticalInterface,
69           {{"CriticalAlarmHigh",
70             {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
71              {false,
72               ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
73            {"CriticalAlarmLow",
74             {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
75              {false, ErrorData{"CriticalLow", "Clear",
76                                Entry::Level::Informational}}}}}},
77  
78          {perfLossInterface,
79           {{"PerfLossAlarmHigh",
80             {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
81              {false, ErrorData{"PerformanceLossHigh", "Clear",
82                                Entry::Level::Informational}}}},
83            {"PerfLossAlarmLow",
84             {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
85              {false, ErrorData{"PerformanceLossLow", "Clear",
86                                Entry::Level::Informational}}}}}}};
87  
ThresholdAlarmLogger(sdbusplus::bus_t & bus,sdeventplus::Event & event,std::shared_ptr<PowerState> powerState)88  ThresholdAlarmLogger::ThresholdAlarmLogger(
89      sdbusplus::bus_t& bus, sdeventplus::Event& event,
90      std::shared_ptr<PowerState> powerState) :
91      bus(bus), event(event), _powerState(std::move(powerState)),
92      warningMatch(bus,
93                   "type='signal',member='PropertiesChanged',"
94                   "path_namespace='/xyz/openbmc_project/sensors',"
95                   "arg0='" +
96                       warningInterface + "'",
97                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
98                             std::placeholders::_1)),
99      criticalMatch(bus,
100                    "type='signal',member='PropertiesChanged',"
101                    "path_namespace='/xyz/openbmc_project/sensors',"
102                    "arg0='" +
103                        criticalInterface + "'",
104                    std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
105                              std::placeholders::_1)),
106      perfLossMatch(bus,
107                    "type='signal',member='PropertiesChanged',"
108                    "path_namespace='/xyz/openbmc_project/sensors',"
109                    "arg0='" +
110                        perfLossInterface + "'",
111                    std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
112                              std::placeholders::_1)),
113      ifacesRemovedMatch(bus,
114                         "type='signal',member='InterfacesRemoved',arg0path="
115                         "'/xyz/openbmc_project/sensors/'",
116                         std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
117                                   std::placeholders::_1)),
118      ifacesAddedMatch(bus,
119                       "type='signal',member='InterfacesAdded',arg0path="
120                       "'/xyz/openbmc_project/sensors/'",
121                       std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
122                                 std::placeholders::_1))
123  {
124      _powerState->addCallback("thresholdMon",
125                               std::bind(&ThresholdAlarmLogger::powerStateChanged,
126                                         this, std::placeholders::_1));
127  
128      // check for any currently asserted threshold alarms
129      std::for_each(
130          thresholdData.begin(), thresholdData.end(),
131          [this](const auto& thresholdInterface) {
132              const auto& interface = thresholdInterface.first;
133              auto objects =
134                  SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
135              std::for_each(objects.begin(), objects.end(),
136                            [interface, this](const auto& object) {
137                                const auto& path = object.first;
138                                const auto& service =
139                                    object.second.begin()->first;
140                                checkThresholds(interface, path, service);
141                            });
142          });
143  }
144  
propertiesChanged(sdbusplus::message_t & msg)145  void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
146  {
147      std::map<std::string, std::variant<bool>> properties;
148      std::string sensorPath = msg.get_path();
149      std::string interface;
150  
151      msg.read(interface, properties);
152  
153      checkProperties(sensorPath, interface, properties);
154  }
155  
interfacesRemoved(sdbusplus::message_t & msg)156  void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
157  {
158      sdbusplus::message::object_path path;
159      std::vector<std::string> interfaces;
160  
161      msg.read(path, interfaces);
162  
163      for (const auto& interface : interfaces)
164      {
165          if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
166                        interface) != thresholdIfaceNames.end())
167          {
168              alarms.erase(InterfaceKey{path, interface});
169          }
170      }
171  }
172  
interfacesAdded(sdbusplus::message_t & msg)173  void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
174  {
175      sdbusplus::message::object_path path;
176      std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
177  
178      msg.read(path, interfaces);
179  
180      for (const auto& [interface, properties] : interfaces)
181      {
182          if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
183                        interface) != thresholdIfaceNames.end())
184          {
185              checkProperties(path, interface, properties);
186          }
187      }
188  }
189  
checkProperties(const std::string & sensorPath,const std::string & interface,const std::map<std::string,std::variant<bool>> & properties)190  void ThresholdAlarmLogger::checkProperties(
191      const std::string& sensorPath, const std::string& interface,
192      const std::map<std::string, std::variant<bool>>& properties)
193  {
194      auto alarmProperties = thresholdData.find(interface);
195      if (alarmProperties == thresholdData.end())
196      {
197          return;
198      }
199  
200      for (const auto& [propertyName, propertyValue] : properties)
201      {
202          if (alarmProperties->second.find(propertyName) !=
203              alarmProperties->second.end())
204          {
205              // If this is the first time we've seen this alarm, then
206              // assume it was off before so it doesn't create an event
207              // log for a value of false.
208  
209              InterfaceKey key{sensorPath, interface};
210              if (alarms.find(key) == alarms.end())
211              {
212                  alarms[key][propertyName] = false;
213              }
214  
215              // Check if the value changed from what was there before.
216              auto alarmValue = std::get<bool>(propertyValue);
217              if (alarmValue != alarms[key][propertyName])
218              {
219                  alarms[key][propertyName] = alarmValue;
220  #ifndef SKIP_POWER_CHECKING
221                  if (_powerState->isPowerOn())
222  #endif
223                  {
224                      createEventLog(sensorPath, interface, propertyName,
225                                     alarmValue);
226                  }
227              }
228          }
229      }
230  }
231  
checkThresholds(const std::string & interface,const std::string & sensorPath,const std::string & service)232  void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
233                                             const std::string& sensorPath,
234                                             const std::string& service)
235  {
236      auto properties = thresholdData.find(interface);
237      if (properties == thresholdData.end())
238      {
239          return;
240      }
241  
242      for (const auto& [property, unused] : properties->second)
243      {
244          try
245          {
246              auto alarmValue = SDBusPlus::getProperty<bool>(
247                  bus, service, sensorPath, interface, property);
248              alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
249  
250              // This is just for checking alarms on startup,
251              // so only look for active alarms.
252  #ifdef SKIP_POWER_CHECKING
253              if (alarmValue)
254  #else
255              if (alarmValue && _powerState->isPowerOn())
256  #endif
257              {
258                  createEventLog(sensorPath, interface, property, alarmValue);
259              }
260          }
261          catch (const sdbusplus::exception_t& e)
262          {
263              // Sensor daemons that get their direction from entity manager
264              // may only be putting either the high alarm or low alarm on
265              // D-Bus, not both.
266              continue;
267          }
268      }
269  }
270  
createEventLog(const std::string & sensorPath,const std::string & interface,const std::string & alarmProperty,bool alarmValue)271  void ThresholdAlarmLogger::createEventLog(
272      const std::string& sensorPath, const std::string& interface,
273      const std::string& alarmProperty, bool alarmValue)
274  {
275      std::map<std::string, std::string> ad;
276  
277      auto type = getSensorType(sensorPath);
278      if (skipSensorType(type))
279      {
280          return;
281      }
282  
283      auto it = thresholdData.find(interface);
284      if (it == thresholdData.end())
285      {
286          return;
287      }
288  
289      auto properties = it->second.find(alarmProperty);
290      if (properties == it->second.end())
291      {
292          lg2::info("Could not find {ALARM_PROPERTY} in threshold alarms map",
293                    "ALARM_PROPERTY", alarmProperty);
294          return;
295      }
296  
297      ad.emplace("SENSOR_NAME", sensorPath);
298      ad.emplace("_PID", std::to_string(getpid()));
299  
300      try
301      {
302          auto sensorValue = SDBusPlus::getProperty<double>(
303              bus, sensorPath, valueInterface, "Value");
304  
305          ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
306  
307          lg2::info(
308              "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (sensor value {SENSOR_VALUE})",
309              "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
310              "ALARM_VALUE", alarmValue, "SENSOR_VALUE", sensorValue);
311      }
312      catch (const DBusServiceError& e)
313      {
314          // If the sensor was just added, the Value interface for it may
315          // not be in the mapper yet.  This could only happen if the sensor
316          // application was started up after this one and the value exceeded the
317          // threshold immediately.
318          lg2::info(
319              "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
320              "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
321              "ALARM_VALUE", alarmValue);
322      }
323  
324      auto callout = getCallout(sensorPath);
325      if (!callout.empty())
326      {
327          ad.emplace("CALLOUT_INVENTORY_PATH", callout);
328      }
329  
330      auto errorData = properties->second.find(alarmValue);
331  
332      // Add the base error name and the sensor type (like Temperature) to the
333      // error name that's in the thresholdData name to get something like
334      // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
335      const auto& [name, status, severity] = errorData->second;
336  
337      try
338      {
339          auto thresholdValue =
340              SDBusPlus::getProperty<double>(bus, sensorPath, interface, name);
341  
342          ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
343  
344          lg2::info(
345              "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (threshold value {THRESHOLD_VALUE})",
346              "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
347              "ALARM_VALUE", alarmValue, "THRESHOLD_VALUE", thresholdValue);
348      }
349      catch (const DBusServiceError& e)
350      {
351          lg2::info(
352              "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
353              "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
354              "ALARM_VALUE", alarmValue);
355      }
356  
357      type.front() = toupper(type.front());
358      std::string errorName = errorNameBase + type + name + status;
359      if (LOG_SENSOR_NAME_ON_ERROR != 0)
360      {
361          errorName += " on sensor " + getSensorName(sensorPath);
362      }
363  
364      SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
365                            "Create", errorName, convertForMessage(severity), ad);
366  }
367  
getSensorName(const std::string & sensorPath)368  std::string ThresholdAlarmLogger::getSensorName(const std::string& sensorPath)
369  {
370      auto pos = sensorPath.find_last_of('/');
371      if ((sensorPath.back() == '/') || (pos == std::string::npos))
372      {
373          lg2::error("Cannot get sensor name from sensor path {SENSOR_PATH}",
374                     "SENSOR_PATH", sensorPath);
375          return "unknown_sensor";
376      }
377  
378      return sensorPath.substr(pos + 1);
379  }
380  
getSensorType(std::string sensorPath)381  std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
382  {
383      auto pos = sensorPath.find_last_of('/');
384      if ((sensorPath.back() == '/') || (pos == std::string::npos))
385      {
386          lg2::error("Cannot get sensor type from sensor path {SENSOR_PATH}",
387                     "SENSOR_PATH", sensorPath);
388          throw std::runtime_error("Invalid sensor path");
389      }
390  
391      sensorPath = sensorPath.substr(0, pos);
392      return sensorPath.substr(sensorPath.find_last_of('/') + 1);
393  }
394  
skipSensorType(const std::string & type)395  bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
396  {
397      return (type == "utilization");
398  }
399  
getCallout(const std::string & sensorPath)400  std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
401  {
402      const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
403  
404      // Different implementations handle the association to the FRU
405      // differently:
406      //  * phosphor-inventory-manager uses the 'inventory' association
407      //    to point to the FRU.
408      //  * dbus-sensors/entity-manager uses the 'chassis' association'.
409      //  * For virtual sensors, no association.
410  
411      for (const auto& assocType : assocTypes)
412      {
413          auto assocPath = sensorPath + "/" + assocType;
414  
415          try
416          {
417              auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
418                  bus, assocPath, assocInterface, "endpoints");
419  
420              if (!endpoints.empty())
421              {
422                  return endpoints[0];
423              }
424          }
425          catch (const DBusServiceError& e)
426          {
427              // The association doesn't exist
428              continue;
429          }
430      }
431  
432      return std::string{};
433  }
434  
powerStateChanged(bool powerStateOn)435  void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
436  {
437      if (powerStateOn)
438      {
439          checkThresholds();
440      }
441  }
442  
checkThresholds()443  void ThresholdAlarmLogger::checkThresholds()
444  {
445      std::vector<InterfaceKey> toErase;
446  
447      for (const auto& [interfaceKey, alarmMap] : alarms)
448      {
449          for (const auto& [propertyName, alarmValue] : alarmMap)
450          {
451              if (alarmValue)
452              {
453                  const auto& sensorPath = std::get<0>(interfaceKey);
454                  const auto& interface = std::get<1>(interfaceKey);
455                  std::string service;
456  
457                  try
458                  {
459                      // Check that the service that provides the alarm is still
460                      // running, because if it died when the alarm was active
461                      // there would be no indication of it unless we listened
462                      // for NameOwnerChanged and tracked services, and this is
463                      // easier.
464                      service = SDBusPlus::getService(bus, sensorPath, interface);
465                  }
466                  catch (const DBusServiceError& e)
467                  {
468                      // No longer on D-Bus delete the alarm entry
469                      toErase.emplace_back(sensorPath, interface);
470                  }
471  
472                  if (!service.empty())
473                  {
474                      createEventLog(sensorPath, interface, propertyName,
475                                     alarmValue);
476                  }
477              }
478          }
479      }
480  
481      for (const auto& e : toErase)
482      {
483          alarms.erase(e);
484      }
485  }
486  
487  } // namespace sensor::monitor
488