1 /**
2  * Copyright © 2021 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "config.h"
17 
18 #include "threshold_alarm_logger.hpp"
19 
20 #include "sdbusplus.hpp"
21 
22 #include <unistd.h>
23 
24 #include <phosphor-logging/log.hpp>
25 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
26 
27 #include <format>
28 
29 namespace sensor::monitor
30 {
31 
32 using namespace sdbusplus::xyz::openbmc_project::Logging::server;
33 using namespace phosphor::logging;
34 using namespace phosphor::fan;
35 using namespace phosphor::fan::util;
36 
37 const std::string warningInterface =
38     "xyz.openbmc_project.Sensor.Threshold.Warning";
39 const std::string criticalInterface =
40     "xyz.openbmc_project.Sensor.Threshold.Critical";
41 const std::string perfLossInterface =
42     "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
43 constexpr auto loggingService = "xyz.openbmc_project.Logging";
44 constexpr auto loggingPath = "/xyz/openbmc_project/logging";
45 constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
46 constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
47 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
48 constexpr auto assocInterface = "xyz.openbmc_project.Association";
49 
50 const std::vector<std::string> thresholdIfaceNames{
51     warningInterface, criticalInterface, perfLossInterface};
52 
53 using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
54 
55 /**
56  * Map of threshold interfaces and alarm properties and values to error data.
57  */
58 const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
59     thresholdData{
60 
61         {warningInterface,
62          {{"WarningAlarmHigh",
63            {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
64             {false,
65              ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
66           {"WarningAlarmLow",
67            {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
68             {false,
69              ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
70 
71         {criticalInterface,
72          {{"CriticalAlarmHigh",
73            {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
74             {false,
75              ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
76           {"CriticalAlarmLow",
77            {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
78             {false, ErrorData{"CriticalLow", "Clear",
79                               Entry::Level::Informational}}}}}},
80 
81         {perfLossInterface,
82          {{"PerfLossAlarmHigh",
83            {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
84             {false, ErrorData{"PerformanceLossHigh", "Clear",
85                               Entry::Level::Informational}}}},
86           {"PerfLossAlarmLow",
87            {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
88             {false, ErrorData{"PerformanceLossLow", "Clear",
89                               Entry::Level::Informational}}}}}}};
90 
ThresholdAlarmLogger(sdbusplus::bus_t & bus,sdeventplus::Event & event,std::shared_ptr<PowerState> powerState)91 ThresholdAlarmLogger::ThresholdAlarmLogger(
92     sdbusplus::bus_t& bus, sdeventplus::Event& event,
93     std::shared_ptr<PowerState> powerState) :
94     bus(bus), event(event), _powerState(std::move(powerState)),
95     warningMatch(bus,
96                  "type='signal',member='PropertiesChanged',"
97                  "path_namespace='/xyz/openbmc_project/sensors',"
98                  "arg0='" +
99                      warningInterface + "'",
100                  std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
101                            std::placeholders::_1)),
102     criticalMatch(bus,
103                   "type='signal',member='PropertiesChanged',"
104                   "path_namespace='/xyz/openbmc_project/sensors',"
105                   "arg0='" +
106                       criticalInterface + "'",
107                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
108                             std::placeholders::_1)),
109     perfLossMatch(bus,
110                   "type='signal',member='PropertiesChanged',"
111                   "path_namespace='/xyz/openbmc_project/sensors',"
112                   "arg0='" +
113                       perfLossInterface + "'",
114                   std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
115                             std::placeholders::_1)),
116     ifacesRemovedMatch(bus,
117                        "type='signal',member='InterfacesRemoved',arg0path="
118                        "'/xyz/openbmc_project/sensors/'",
119                        std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
120                                  std::placeholders::_1)),
121     ifacesAddedMatch(bus,
122                      "type='signal',member='InterfacesAdded',arg0path="
123                      "'/xyz/openbmc_project/sensors/'",
124                      std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
125                                std::placeholders::_1))
126 {
127     _powerState->addCallback("thresholdMon",
128                              std::bind(&ThresholdAlarmLogger::powerStateChanged,
129                                        this, std::placeholders::_1));
130 
131     // check for any currently asserted threshold alarms
132     std::for_each(
133         thresholdData.begin(), thresholdData.end(),
134         [this](const auto& thresholdInterface) {
135             const auto& interface = thresholdInterface.first;
136             auto objects =
137                 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
138             std::for_each(objects.begin(), objects.end(),
139                           [interface, this](const auto& object) {
140                               const auto& path = object.first;
141                               const auto& service =
142                                   object.second.begin()->first;
143                               checkThresholds(interface, path, service);
144                           });
145         });
146 }
147 
propertiesChanged(sdbusplus::message_t & msg)148 void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
149 {
150     std::map<std::string, std::variant<bool>> properties;
151     std::string sensorPath = msg.get_path();
152     std::string interface;
153 
154     msg.read(interface, properties);
155 
156     checkProperties(sensorPath, interface, properties);
157 }
158 
interfacesRemoved(sdbusplus::message_t & msg)159 void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
160 {
161     sdbusplus::message::object_path path;
162     std::vector<std::string> interfaces;
163 
164     msg.read(path, interfaces);
165 
166     for (const auto& interface : interfaces)
167     {
168         if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
169                       interface) != thresholdIfaceNames.end())
170         {
171             alarms.erase(InterfaceKey{path, interface});
172         }
173     }
174 }
175 
interfacesAdded(sdbusplus::message_t & msg)176 void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
177 {
178     sdbusplus::message::object_path path;
179     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
180 
181     msg.read(path, interfaces);
182 
183     for (const auto& [interface, properties] : interfaces)
184     {
185         if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
186                       interface) != thresholdIfaceNames.end())
187         {
188             checkProperties(path, interface, properties);
189         }
190     }
191 }
192 
checkProperties(const std::string & sensorPath,const std::string & interface,const std::map<std::string,std::variant<bool>> & properties)193 void ThresholdAlarmLogger::checkProperties(
194     const std::string& sensorPath, const std::string& interface,
195     const std::map<std::string, std::variant<bool>>& properties)
196 {
197     auto alarmProperties = thresholdData.find(interface);
198     if (alarmProperties == thresholdData.end())
199     {
200         return;
201     }
202 
203     for (const auto& [propertyName, propertyValue] : properties)
204     {
205         if (alarmProperties->second.find(propertyName) !=
206             alarmProperties->second.end())
207         {
208             // If this is the first time we've seen this alarm, then
209             // assume it was off before so it doesn't create an event
210             // log for a value of false.
211 
212             InterfaceKey key{sensorPath, interface};
213             if (alarms.find(key) == alarms.end())
214             {
215                 alarms[key][propertyName] = false;
216             }
217 
218             // Check if the value changed from what was there before.
219             auto alarmValue = std::get<bool>(propertyValue);
220             if (alarmValue != alarms[key][propertyName])
221             {
222                 alarms[key][propertyName] = alarmValue;
223 #ifndef SKIP_POWER_CHECKING
224                 if (_powerState->isPowerOn())
225 #endif
226                 {
227                     createEventLog(sensorPath, interface, propertyName,
228                                    alarmValue);
229                 }
230             }
231         }
232     }
233 }
234 
checkThresholds(const std::string & interface,const std::string & sensorPath,const std::string & service)235 void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
236                                            const std::string& sensorPath,
237                                            const std::string& service)
238 {
239     auto properties = thresholdData.find(interface);
240     if (properties == thresholdData.end())
241     {
242         return;
243     }
244 
245     for (const auto& [property, unused] : properties->second)
246     {
247         try
248         {
249             auto alarmValue = SDBusPlus::getProperty<bool>(
250                 bus, service, sensorPath, interface, property);
251             alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
252 
253             // This is just for checking alarms on startup,
254             // so only look for active alarms.
255 #ifdef SKIP_POWER_CHECKING
256             if (alarmValue)
257 #else
258             if (alarmValue && _powerState->isPowerOn())
259 #endif
260             {
261                 createEventLog(sensorPath, interface, property, alarmValue);
262             }
263         }
264         catch (const sdbusplus::exception_t& e)
265         {
266             // Sensor daemons that get their direction from entity manager
267             // may only be putting either the high alarm or low alarm on
268             // D-Bus, not both.
269             continue;
270         }
271     }
272 }
273 
createEventLog(const std::string & sensorPath,const std::string & interface,const std::string & alarmProperty,bool alarmValue)274 void ThresholdAlarmLogger::createEventLog(
275     const std::string& sensorPath, const std::string& interface,
276     const std::string& alarmProperty, bool alarmValue)
277 {
278     std::map<std::string, std::string> ad;
279 
280     auto type = getSensorType(sensorPath);
281     if (skipSensorType(type))
282     {
283         return;
284     }
285 
286     auto it = thresholdData.find(interface);
287     if (it == thresholdData.end())
288     {
289         return;
290     }
291 
292     auto properties = it->second.find(alarmProperty);
293     if (properties == it->second.end())
294     {
295         log<level::INFO>(
296             std::format("Could not find {} in threshold alarms map",
297                         alarmProperty)
298                 .c_str());
299         return;
300     }
301 
302     ad.emplace("SENSOR_NAME", sensorPath);
303     ad.emplace("_PID", std::to_string(getpid()));
304 
305     try
306     {
307         auto sensorValue = SDBusPlus::getProperty<double>(
308             bus, sensorPath, valueInterface, "Value");
309 
310         ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
311 
312         log<level::INFO>(
313             std::format("Threshold Event {} {} = {} (sensor value {})",
314                         sensorPath, alarmProperty, alarmValue, sensorValue)
315                 .c_str());
316     }
317     catch (const DBusServiceError& e)
318     {
319         // If the sensor was just added, the Value interface for it may
320         // not be in the mapper yet.  This could only happen if the sensor
321         // application was started up after this one and the value exceeded the
322         // threshold immediately.
323         log<level::INFO>(std::format("Threshold Event {} {} = {}", sensorPath,
324                                      alarmProperty, alarmValue)
325                              .c_str());
326     }
327 
328     auto callout = getCallout(sensorPath);
329     if (!callout.empty())
330     {
331         ad.emplace("CALLOUT_INVENTORY_PATH", callout);
332     }
333 
334     auto errorData = properties->second.find(alarmValue);
335 
336     // Add the base error name and the sensor type (like Temperature) to the
337     // error name that's in the thresholdData name to get something like
338     // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
339     const auto& [name, status, severity] = errorData->second;
340 
341     try
342     {
343         auto thresholdValue =
344             SDBusPlus::getProperty<double>(bus, sensorPath, interface, name);
345 
346         ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
347 
348         log<level::INFO>(
349             std::format("Threshold Event {} {} = {} (threshold value {})",
350                         sensorPath, alarmProperty, alarmValue, thresholdValue)
351                 .c_str());
352     }
353     catch (const DBusServiceError& e)
354     {
355         log<level::INFO>(std::format("Threshold Event {} {} = {}", sensorPath,
356                                      alarmProperty, alarmValue)
357                              .c_str());
358     }
359 
360     type.front() = toupper(type.front());
361     std::string errorName = errorNameBase + type + name + status;
362 
363     SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
364                           "Create", errorName, convertForMessage(severity), ad);
365 }
366 
getSensorType(std::string sensorPath)367 std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
368 {
369     auto pos = sensorPath.find_last_of('/');
370     if ((sensorPath.back() == '/') || (pos == std::string::npos))
371     {
372         log<level::ERR>(
373             std::format("Cannot get sensor type from sensor path {}",
374                         sensorPath)
375                 .c_str());
376         throw std::runtime_error("Invalid sensor path");
377     }
378 
379     sensorPath = sensorPath.substr(0, pos);
380     return sensorPath.substr(sensorPath.find_last_of('/') + 1);
381 }
382 
skipSensorType(const std::string & type)383 bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
384 {
385     return (type == "utilization");
386 }
387 
getCallout(const std::string & sensorPath)388 std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
389 {
390     const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
391 
392     // Different implementations handle the association to the FRU
393     // differently:
394     //  * phosphor-inventory-manager uses the 'inventory' association
395     //    to point to the FRU.
396     //  * dbus-sensors/entity-manager uses the 'chassis' association'.
397     //  * For virtual sensors, no association.
398 
399     for (const auto& assocType : assocTypes)
400     {
401         auto assocPath = sensorPath + "/" + assocType;
402 
403         try
404         {
405             auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
406                 bus, assocPath, assocInterface, "endpoints");
407 
408             if (!endpoints.empty())
409             {
410                 return endpoints[0];
411             }
412         }
413         catch (const DBusServiceError& e)
414         {
415             // The association doesn't exist
416             continue;
417         }
418     }
419 
420     return std::string{};
421 }
422 
powerStateChanged(bool powerStateOn)423 void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
424 {
425     if (powerStateOn)
426     {
427         checkThresholds();
428     }
429 }
430 
checkThresholds()431 void ThresholdAlarmLogger::checkThresholds()
432 {
433     std::vector<InterfaceKey> toErase;
434 
435     for (const auto& [interfaceKey, alarmMap] : alarms)
436     {
437         for (const auto& [propertyName, alarmValue] : alarmMap)
438         {
439             if (alarmValue)
440             {
441                 const auto& sensorPath = std::get<0>(interfaceKey);
442                 const auto& interface = std::get<1>(interfaceKey);
443                 std::string service;
444 
445                 try
446                 {
447                     // Check that the service that provides the alarm is still
448                     // running, because if it died when the alarm was active
449                     // there would be no indication of it unless we listened
450                     // for NameOwnerChanged and tracked services, and this is
451                     // easier.
452                     service = SDBusPlus::getService(bus, sensorPath, interface);
453                 }
454                 catch (const DBusServiceError& e)
455                 {
456                     // No longer on D-Bus delete the alarm entry
457                     toErase.emplace_back(sensorPath, interface);
458                 }
459 
460                 if (!service.empty())
461                 {
462                     createEventLog(sensorPath, interface, propertyName,
463                                    alarmValue);
464                 }
465             }
466         }
467     }
468 
469     for (const auto& e : toErase)
470     {
471         alarms.erase(e);
472     }
473 }
474 
475 } // namespace sensor::monitor
476