1 /**
2 * Copyright © 2021 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "config.h"
17
18 #include "threshold_alarm_logger.hpp"
19
20 #include "sdbusplus.hpp"
21
22 #include <unistd.h>
23
24 #include <phosphor-logging/lg2.hpp>
25 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
26
27 namespace sensor::monitor
28 {
29
30 using namespace sdbusplus::xyz::openbmc_project::Logging::server;
31 using namespace phosphor::fan;
32 using namespace phosphor::fan::util;
33
34 const std::string warningInterface =
35 "xyz.openbmc_project.Sensor.Threshold.Warning";
36 const std::string criticalInterface =
37 "xyz.openbmc_project.Sensor.Threshold.Critical";
38 const std::string perfLossInterface =
39 "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
40 constexpr auto loggingService = "xyz.openbmc_project.Logging";
41 constexpr auto loggingPath = "/xyz/openbmc_project/logging";
42 constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
43 constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
44 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
45 constexpr auto assocInterface = "xyz.openbmc_project.Association";
46
47 const std::vector<std::string> thresholdIfaceNames{
48 warningInterface, criticalInterface, perfLossInterface};
49
50 using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
51
52 /**
53 * Map of threshold interfaces and alarm properties and values to error data.
54 */
55 const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
56 thresholdData{
57
58 {warningInterface,
59 {{"WarningAlarmHigh",
60 {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
61 {false,
62 ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
63 {"WarningAlarmLow",
64 {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
65 {false,
66 ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
67
68 {criticalInterface,
69 {{"CriticalAlarmHigh",
70 {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
71 {false,
72 ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
73 {"CriticalAlarmLow",
74 {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
75 {false, ErrorData{"CriticalLow", "Clear",
76 Entry::Level::Informational}}}}}},
77
78 {perfLossInterface,
79 {{"PerfLossAlarmHigh",
80 {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
81 {false, ErrorData{"PerformanceLossHigh", "Clear",
82 Entry::Level::Informational}}}},
83 {"PerfLossAlarmLow",
84 {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
85 {false, ErrorData{"PerformanceLossLow", "Clear",
86 Entry::Level::Informational}}}}}}};
87
ThresholdAlarmLogger(sdbusplus::bus_t & bus,std::shared_ptr<PowerState> powerState)88 ThresholdAlarmLogger::ThresholdAlarmLogger(
89 sdbusplus::bus_t& bus, std::shared_ptr<PowerState> powerState) :
90 bus(bus), _powerState(std::move(powerState)),
91 warningMatch(bus,
92 "type='signal',member='PropertiesChanged',"
93 "path_namespace='/xyz/openbmc_project/sensors',"
94 "arg0='" +
95 warningInterface + "'",
96 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
97 std::placeholders::_1)),
98 criticalMatch(bus,
99 "type='signal',member='PropertiesChanged',"
100 "path_namespace='/xyz/openbmc_project/sensors',"
101 "arg0='" +
102 criticalInterface + "'",
103 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
104 std::placeholders::_1)),
105 perfLossMatch(bus,
106 "type='signal',member='PropertiesChanged',"
107 "path_namespace='/xyz/openbmc_project/sensors',"
108 "arg0='" +
109 perfLossInterface + "'",
110 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
111 std::placeholders::_1)),
112 ifacesRemovedMatch(bus,
113 "type='signal',member='InterfacesRemoved',arg0path="
114 "'/xyz/openbmc_project/sensors/'",
115 std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
116 std::placeholders::_1)),
117 ifacesAddedMatch(bus,
118 "type='signal',member='InterfacesAdded',arg0path="
119 "'/xyz/openbmc_project/sensors/'",
120 std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
121 std::placeholders::_1))
122 {
123 _powerState->addCallback("thresholdMon",
124 std::bind(&ThresholdAlarmLogger::powerStateChanged,
125 this, std::placeholders::_1));
126
127 // check for any currently asserted threshold alarms
128 std::for_each(
129 thresholdData.begin(), thresholdData.end(),
130 [this](const auto& thresholdInterface) {
131 const auto& interface = thresholdInterface.first;
132 auto objects =
133 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
134 std::for_each(objects.begin(), objects.end(),
135 [interface, this](const auto& object) {
136 const auto& path = object.first;
137 const auto& service =
138 object.second.begin()->first;
139 this->checkThresholds(interface, path, service);
140 });
141 });
142 }
143
propertiesChanged(sdbusplus::message_t & msg)144 void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
145 {
146 std::map<std::string, std::variant<bool>> properties;
147 std::string sensorPath = msg.get_path();
148 std::string interface;
149
150 msg.read(interface, properties);
151
152 checkProperties(sensorPath, interface, properties);
153 }
154
interfacesRemoved(sdbusplus::message_t & msg)155 void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
156 {
157 sdbusplus::message::object_path path;
158 std::vector<std::string> interfaces;
159
160 msg.read(path, interfaces);
161
162 for (const auto& interface : interfaces)
163 {
164 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
165 interface) != thresholdIfaceNames.end())
166 {
167 alarms.erase(InterfaceKey{path, interface});
168 }
169 }
170 }
171
interfacesAdded(sdbusplus::message_t & msg)172 void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
173 {
174 sdbusplus::message::object_path path;
175 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
176
177 msg.read(path, interfaces);
178
179 for (const auto& [interface, properties] : interfaces)
180 {
181 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
182 interface) != thresholdIfaceNames.end())
183 {
184 checkProperties(path, interface, properties);
185 }
186 }
187 }
188
checkProperties(const std::string & sensorPath,const std::string & interface,const std::map<std::string,std::variant<bool>> & properties)189 void ThresholdAlarmLogger::checkProperties(
190 const std::string& sensorPath, const std::string& interface,
191 const std::map<std::string, std::variant<bool>>& properties)
192 {
193 auto alarmProperties = thresholdData.find(interface);
194 if (alarmProperties == thresholdData.end())
195 {
196 return;
197 }
198
199 for (const auto& [propertyName, propertyValue] : properties)
200 {
201 if (alarmProperties->second.find(propertyName) !=
202 alarmProperties->second.end())
203 {
204 // If this is the first time we've seen this alarm, then
205 // assume it was off before so it doesn't create an event
206 // log for a value of false.
207
208 InterfaceKey key{sensorPath, interface};
209 if (alarms.find(key) == alarms.end())
210 {
211 alarms[key][propertyName] = false;
212 }
213
214 // Check if the value changed from what was there before.
215 auto alarmValue = std::get<bool>(propertyValue);
216 if (alarmValue != alarms[key][propertyName])
217 {
218 alarms[key][propertyName] = alarmValue;
219 #ifndef SKIP_POWER_CHECKING
220 if (_powerState->isPowerOn())
221 #endif
222 {
223 createEventLog(sensorPath, interface, propertyName,
224 alarmValue);
225 }
226 }
227 }
228 }
229 }
230
checkThresholds(const std::string & interface,const std::string & sensorPath,const std::string & service)231 void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
232 const std::string& sensorPath,
233 const std::string& service)
234 {
235 auto properties = thresholdData.find(interface);
236 if (properties == thresholdData.end())
237 {
238 return;
239 }
240
241 for (const auto& [property, unused] : properties->second)
242 {
243 try
244 {
245 auto alarmValue = SDBusPlus::getProperty<bool>(
246 bus, service, sensorPath, interface, property);
247 alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
248
249 // This is just for checking alarms on startup,
250 // so only look for active alarms.
251 #ifdef SKIP_POWER_CHECKING
252 if (alarmValue)
253 #else
254 if (alarmValue && _powerState->isPowerOn())
255 #endif
256 {
257 createEventLog(sensorPath, interface, property, alarmValue);
258 }
259 }
260 catch (const sdbusplus::exception_t& e)
261 {
262 // Sensor daemons that get their direction from entity manager
263 // may only be putting either the high alarm or low alarm on
264 // D-Bus, not both.
265 continue;
266 }
267 }
268 }
269
createEventLog(const std::string & sensorPath,const std::string & interface,const std::string & alarmProperty,bool alarmValue)270 void ThresholdAlarmLogger::createEventLog(
271 const std::string& sensorPath, const std::string& interface,
272 const std::string& alarmProperty, bool alarmValue)
273 {
274 std::map<std::string, std::string> ad;
275
276 auto type = getSensorType(sensorPath);
277 if (skipSensorType(type))
278 {
279 return;
280 }
281
282 auto it = thresholdData.find(interface);
283 if (it == thresholdData.end())
284 {
285 return;
286 }
287
288 auto properties = it->second.find(alarmProperty);
289 if (properties == it->second.end())
290 {
291 lg2::info("Could not find {ALARM_PROPERTY} in threshold alarms map",
292 "ALARM_PROPERTY", alarmProperty);
293 return;
294 }
295
296 ad.emplace("SENSOR_NAME", sensorPath);
297 ad.emplace("_PID", std::to_string(getpid()));
298
299 try
300 {
301 auto sensorValue = SDBusPlus::getProperty<double>(
302 bus, sensorPath, valueInterface, "Value");
303
304 ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
305
306 lg2::info(
307 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (sensor value {SENSOR_VALUE})",
308 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
309 "ALARM_VALUE", alarmValue, "SENSOR_VALUE", sensorValue);
310 }
311 catch (const DBusServiceError& e)
312 {
313 // If the sensor was just added, the Value interface for it may
314 // not be in the mapper yet. This could only happen if the sensor
315 // application was started up after this one and the value exceeded the
316 // threshold immediately.
317 lg2::info(
318 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
319 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
320 "ALARM_VALUE", alarmValue);
321 }
322
323 auto callout = getCallout(sensorPath);
324 if (!callout.empty())
325 {
326 ad.emplace("CALLOUT_INVENTORY_PATH", callout);
327 }
328
329 auto errorData = properties->second.find(alarmValue);
330
331 // Add the base error name and the sensor type (like Temperature) to the
332 // error name that's in the thresholdData name to get something like
333 // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
334 const auto& [name, status, severity] = errorData->second;
335
336 try
337 {
338 auto thresholdValue =
339 SDBusPlus::getProperty<double>(bus, sensorPath, interface, name);
340
341 ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
342
343 lg2::info(
344 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (threshold value {THRESHOLD_VALUE})",
345 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
346 "ALARM_VALUE", alarmValue, "THRESHOLD_VALUE", thresholdValue);
347 }
348 catch (const DBusServiceError& e)
349 {
350 lg2::info(
351 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
352 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
353 "ALARM_VALUE", alarmValue);
354 }
355
356 type.front() = toupper(type.front());
357 std::string errorName = errorNameBase + type + name + status;
358 if (LOG_SENSOR_NAME_ON_ERROR != 0)
359 {
360 errorName += " on sensor " + getSensorName(sensorPath);
361 }
362
363 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
364 "Create", errorName, convertForMessage(severity), ad);
365 }
366
getSensorName(const std::string & sensorPath)367 std::string ThresholdAlarmLogger::getSensorName(const std::string& sensorPath)
368 {
369 auto pos = sensorPath.find_last_of('/');
370 if ((sensorPath.back() == '/') || (pos == std::string::npos))
371 {
372 lg2::error("Cannot get sensor name from sensor path {SENSOR_PATH}",
373 "SENSOR_PATH", sensorPath);
374 return "unknown_sensor";
375 }
376
377 return sensorPath.substr(pos + 1);
378 }
379
getSensorType(std::string sensorPath)380 std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
381 {
382 auto pos = sensorPath.find_last_of('/');
383 if ((sensorPath.back() == '/') || (pos == std::string::npos))
384 {
385 lg2::error("Cannot get sensor type from sensor path {SENSOR_PATH}",
386 "SENSOR_PATH", sensorPath);
387 throw std::runtime_error("Invalid sensor path");
388 }
389
390 sensorPath = sensorPath.substr(0, pos);
391 return sensorPath.substr(sensorPath.find_last_of('/') + 1);
392 }
393
skipSensorType(const std::string & type)394 bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
395 {
396 return (type == "utilization");
397 }
398
getCallout(const std::string & sensorPath)399 std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
400 {
401 const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
402
403 // Different implementations handle the association to the FRU
404 // differently:
405 // * phosphor-inventory-manager uses the 'inventory' association
406 // to point to the FRU.
407 // * dbus-sensors/entity-manager uses the 'chassis' association'.
408 // * For virtual sensors, no association.
409
410 for (const auto& assocType : assocTypes)
411 {
412 auto assocPath = sensorPath + "/" + assocType;
413
414 try
415 {
416 auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
417 bus, assocPath, assocInterface, "endpoints");
418
419 if (!endpoints.empty())
420 {
421 return endpoints[0];
422 }
423 }
424 catch (const DBusServiceError& e)
425 {
426 // The association doesn't exist
427 continue;
428 }
429 }
430
431 return std::string{};
432 }
433
powerStateChanged(bool powerStateOn)434 void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
435 {
436 if (powerStateOn)
437 {
438 checkThresholds();
439 }
440 }
441
checkThresholds()442 void ThresholdAlarmLogger::checkThresholds()
443 {
444 std::vector<InterfaceKey> toErase;
445
446 for (const auto& [interfaceKey, alarmMap] : alarms)
447 {
448 for (const auto& [propertyName, alarmValue] : alarmMap)
449 {
450 if (alarmValue)
451 {
452 const auto& sensorPath = std::get<0>(interfaceKey);
453 const auto& interface = std::get<1>(interfaceKey);
454 std::string service;
455
456 try
457 {
458 // Check that the service that provides the alarm is still
459 // running, because if it died when the alarm was active
460 // there would be no indication of it unless we listened
461 // for NameOwnerChanged and tracked services, and this is
462 // easier.
463 service = SDBusPlus::getService(bus, sensorPath, interface);
464 }
465 catch (const DBusServiceError& e)
466 {
467 // No longer on D-Bus delete the alarm entry
468 toErase.emplace_back(sensorPath, interface);
469 }
470
471 if (!service.empty())
472 {
473 createEventLog(sensorPath, interface, propertyName,
474 alarmValue);
475 }
476 }
477 }
478 }
479
480 for (const auto& e : toErase)
481 {
482 alarms.erase(e);
483 }
484 }
485
486 } // namespace sensor::monitor
487