1 /**
2 * Copyright © 2021 IBM Corporation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "config.h"
17
18 #include "threshold_alarm_logger.hpp"
19
20 #include "sdbusplus.hpp"
21
22 #include <unistd.h>
23
24 #include <phosphor-logging/lg2.hpp>
25 #include <xyz/openbmc_project/Logging/Entry/server.hpp>
26
27 namespace sensor::monitor
28 {
29
30 using namespace sdbusplus::xyz::openbmc_project::Logging::server;
31 using namespace phosphor::fan;
32 using namespace phosphor::fan::util;
33
34 const std::string warningInterface =
35 "xyz.openbmc_project.Sensor.Threshold.Warning";
36 const std::string criticalInterface =
37 "xyz.openbmc_project.Sensor.Threshold.Critical";
38 const std::string perfLossInterface =
39 "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss";
40 constexpr auto loggingService = "xyz.openbmc_project.Logging";
41 constexpr auto loggingPath = "/xyz/openbmc_project/logging";
42 constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create";
43 constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error.";
44 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value";
45 constexpr auto assocInterface = "xyz.openbmc_project.Association";
46
47 const std::vector<std::string> thresholdIfaceNames{
48 warningInterface, criticalInterface, perfLossInterface};
49
50 using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>;
51
52 /**
53 * Map of threshold interfaces and alarm properties and values to error data.
54 */
55 const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>>
56 thresholdData{
57
58 {warningInterface,
59 {{"WarningAlarmHigh",
60 {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}},
61 {false,
62 ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}},
63 {"WarningAlarmLow",
64 {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}},
65 {false,
66 ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}},
67
68 {criticalInterface,
69 {{"CriticalAlarmHigh",
70 {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}},
71 {false,
72 ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}},
73 {"CriticalAlarmLow",
74 {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}},
75 {false, ErrorData{"CriticalLow", "Clear",
76 Entry::Level::Informational}}}}}},
77
78 {perfLossInterface,
79 {{"PerfLossAlarmHigh",
80 {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}},
81 {false, ErrorData{"PerformanceLossHigh", "Clear",
82 Entry::Level::Informational}}}},
83 {"PerfLossAlarmLow",
84 {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}},
85 {false, ErrorData{"PerformanceLossLow", "Clear",
86 Entry::Level::Informational}}}}}}};
87
ThresholdAlarmLogger(sdbusplus::bus_t & bus,sdeventplus::Event & event,std::shared_ptr<PowerState> powerState)88 ThresholdAlarmLogger::ThresholdAlarmLogger(
89 sdbusplus::bus_t& bus, sdeventplus::Event& event,
90 std::shared_ptr<PowerState> powerState) :
91 bus(bus), event(event), _powerState(std::move(powerState)),
92 warningMatch(bus,
93 "type='signal',member='PropertiesChanged',"
94 "path_namespace='/xyz/openbmc_project/sensors',"
95 "arg0='" +
96 warningInterface + "'",
97 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
98 std::placeholders::_1)),
99 criticalMatch(bus,
100 "type='signal',member='PropertiesChanged',"
101 "path_namespace='/xyz/openbmc_project/sensors',"
102 "arg0='" +
103 criticalInterface + "'",
104 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
105 std::placeholders::_1)),
106 perfLossMatch(bus,
107 "type='signal',member='PropertiesChanged',"
108 "path_namespace='/xyz/openbmc_project/sensors',"
109 "arg0='" +
110 perfLossInterface + "'",
111 std::bind(&ThresholdAlarmLogger::propertiesChanged, this,
112 std::placeholders::_1)),
113 ifacesRemovedMatch(bus,
114 "type='signal',member='InterfacesRemoved',arg0path="
115 "'/xyz/openbmc_project/sensors/'",
116 std::bind(&ThresholdAlarmLogger::interfacesRemoved, this,
117 std::placeholders::_1)),
118 ifacesAddedMatch(bus,
119 "type='signal',member='InterfacesAdded',arg0path="
120 "'/xyz/openbmc_project/sensors/'",
121 std::bind(&ThresholdAlarmLogger::interfacesAdded, this,
122 std::placeholders::_1))
123 {
124 _powerState->addCallback("thresholdMon",
125 std::bind(&ThresholdAlarmLogger::powerStateChanged,
126 this, std::placeholders::_1));
127
128 // check for any currently asserted threshold alarms
129 std::for_each(
130 thresholdData.begin(), thresholdData.end(),
131 [this](const auto& thresholdInterface) {
132 const auto& interface = thresholdInterface.first;
133 auto objects =
134 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0);
135 std::for_each(objects.begin(), objects.end(),
136 [interface, this](const auto& object) {
137 const auto& path = object.first;
138 const auto& service =
139 object.second.begin()->first;
140 checkThresholds(interface, path, service);
141 });
142 });
143 }
144
propertiesChanged(sdbusplus::message_t & msg)145 void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg)
146 {
147 std::map<std::string, std::variant<bool>> properties;
148 std::string sensorPath = msg.get_path();
149 std::string interface;
150
151 msg.read(interface, properties);
152
153 checkProperties(sensorPath, interface, properties);
154 }
155
interfacesRemoved(sdbusplus::message_t & msg)156 void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg)
157 {
158 sdbusplus::message::object_path path;
159 std::vector<std::string> interfaces;
160
161 msg.read(path, interfaces);
162
163 for (const auto& interface : interfaces)
164 {
165 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
166 interface) != thresholdIfaceNames.end())
167 {
168 alarms.erase(InterfaceKey{path, interface});
169 }
170 }
171 }
172
interfacesAdded(sdbusplus::message_t & msg)173 void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg)
174 {
175 sdbusplus::message::object_path path;
176 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
177
178 msg.read(path, interfaces);
179
180 for (const auto& [interface, properties] : interfaces)
181 {
182 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(),
183 interface) != thresholdIfaceNames.end())
184 {
185 checkProperties(path, interface, properties);
186 }
187 }
188 }
189
checkProperties(const std::string & sensorPath,const std::string & interface,const std::map<std::string,std::variant<bool>> & properties)190 void ThresholdAlarmLogger::checkProperties(
191 const std::string& sensorPath, const std::string& interface,
192 const std::map<std::string, std::variant<bool>>& properties)
193 {
194 auto alarmProperties = thresholdData.find(interface);
195 if (alarmProperties == thresholdData.end())
196 {
197 return;
198 }
199
200 for (const auto& [propertyName, propertyValue] : properties)
201 {
202 if (alarmProperties->second.find(propertyName) !=
203 alarmProperties->second.end())
204 {
205 // If this is the first time we've seen this alarm, then
206 // assume it was off before so it doesn't create an event
207 // log for a value of false.
208
209 InterfaceKey key{sensorPath, interface};
210 if (alarms.find(key) == alarms.end())
211 {
212 alarms[key][propertyName] = false;
213 }
214
215 // Check if the value changed from what was there before.
216 auto alarmValue = std::get<bool>(propertyValue);
217 if (alarmValue != alarms[key][propertyName])
218 {
219 alarms[key][propertyName] = alarmValue;
220 #ifndef SKIP_POWER_CHECKING
221 if (_powerState->isPowerOn())
222 #endif
223 {
224 createEventLog(sensorPath, interface, propertyName,
225 alarmValue);
226 }
227 }
228 }
229 }
230 }
231
checkThresholds(const std::string & interface,const std::string & sensorPath,const std::string & service)232 void ThresholdAlarmLogger::checkThresholds(const std::string& interface,
233 const std::string& sensorPath,
234 const std::string& service)
235 {
236 auto properties = thresholdData.find(interface);
237 if (properties == thresholdData.end())
238 {
239 return;
240 }
241
242 for (const auto& [property, unused] : properties->second)
243 {
244 try
245 {
246 auto alarmValue = SDBusPlus::getProperty<bool>(
247 bus, service, sensorPath, interface, property);
248 alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue;
249
250 // This is just for checking alarms on startup,
251 // so only look for active alarms.
252 #ifdef SKIP_POWER_CHECKING
253 if (alarmValue)
254 #else
255 if (alarmValue && _powerState->isPowerOn())
256 #endif
257 {
258 createEventLog(sensorPath, interface, property, alarmValue);
259 }
260 }
261 catch (const sdbusplus::exception_t& e)
262 {
263 // Sensor daemons that get their direction from entity manager
264 // may only be putting either the high alarm or low alarm on
265 // D-Bus, not both.
266 continue;
267 }
268 }
269 }
270
createEventLog(const std::string & sensorPath,const std::string & interface,const std::string & alarmProperty,bool alarmValue)271 void ThresholdAlarmLogger::createEventLog(
272 const std::string& sensorPath, const std::string& interface,
273 const std::string& alarmProperty, bool alarmValue)
274 {
275 std::map<std::string, std::string> ad;
276
277 auto type = getSensorType(sensorPath);
278 if (skipSensorType(type))
279 {
280 return;
281 }
282
283 auto it = thresholdData.find(interface);
284 if (it == thresholdData.end())
285 {
286 return;
287 }
288
289 auto properties = it->second.find(alarmProperty);
290 if (properties == it->second.end())
291 {
292 lg2::info("Could not find {ALARM_PROPERTY} in threshold alarms map",
293 "ALARM_PROPERTY", alarmProperty);
294 return;
295 }
296
297 ad.emplace("SENSOR_NAME", sensorPath);
298 ad.emplace("_PID", std::to_string(getpid()));
299
300 try
301 {
302 auto sensorValue = SDBusPlus::getProperty<double>(
303 bus, sensorPath, valueInterface, "Value");
304
305 ad.emplace("SENSOR_VALUE", std::to_string(sensorValue));
306
307 lg2::info(
308 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (sensor value {SENSOR_VALUE})",
309 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
310 "ALARM_VALUE", alarmValue, "SENSOR_VALUE", sensorValue);
311 }
312 catch (const DBusServiceError& e)
313 {
314 // If the sensor was just added, the Value interface for it may
315 // not be in the mapper yet. This could only happen if the sensor
316 // application was started up after this one and the value exceeded the
317 // threshold immediately.
318 lg2::info(
319 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
320 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
321 "ALARM_VALUE", alarmValue);
322 }
323
324 auto callout = getCallout(sensorPath);
325 if (!callout.empty())
326 {
327 ad.emplace("CALLOUT_INVENTORY_PATH", callout);
328 }
329
330 auto errorData = properties->second.find(alarmValue);
331
332 // Add the base error name and the sensor type (like Temperature) to the
333 // error name that's in the thresholdData name to get something like
334 // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh
335 const auto& [name, status, severity] = errorData->second;
336
337 try
338 {
339 auto thresholdValue =
340 SDBusPlus::getProperty<double>(bus, sensorPath, interface, name);
341
342 ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue));
343
344 lg2::info(
345 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (threshold value {THRESHOLD_VALUE})",
346 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
347 "ALARM_VALUE", alarmValue, "THRESHOLD_VALUE", thresholdValue);
348 }
349 catch (const DBusServiceError& e)
350 {
351 lg2::info(
352 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}",
353 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty,
354 "ALARM_VALUE", alarmValue);
355 }
356
357 type.front() = toupper(type.front());
358 std::string errorName = errorNameBase + type + name + status;
359 if (LOG_SENSOR_NAME_ON_ERROR != 0)
360 {
361 errorName += " on sensor " + getSensorName(sensorPath);
362 }
363
364 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface,
365 "Create", errorName, convertForMessage(severity), ad);
366 }
367
getSensorName(const std::string & sensorPath)368 std::string ThresholdAlarmLogger::getSensorName(const std::string& sensorPath)
369 {
370 auto pos = sensorPath.find_last_of('/');
371 if ((sensorPath.back() == '/') || (pos == std::string::npos))
372 {
373 lg2::error("Cannot get sensor name from sensor path {SENSOR_PATH}",
374 "SENSOR_PATH", sensorPath);
375 return "unknown_sensor";
376 }
377
378 return sensorPath.substr(pos + 1);
379 }
380
getSensorType(std::string sensorPath)381 std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath)
382 {
383 auto pos = sensorPath.find_last_of('/');
384 if ((sensorPath.back() == '/') || (pos == std::string::npos))
385 {
386 lg2::error("Cannot get sensor type from sensor path {SENSOR_PATH}",
387 "SENSOR_PATH", sensorPath);
388 throw std::runtime_error("Invalid sensor path");
389 }
390
391 sensorPath = sensorPath.substr(0, pos);
392 return sensorPath.substr(sensorPath.find_last_of('/') + 1);
393 }
394
skipSensorType(const std::string & type)395 bool ThresholdAlarmLogger::skipSensorType(const std::string& type)
396 {
397 return (type == "utilization");
398 }
399
getCallout(const std::string & sensorPath)400 std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath)
401 {
402 const std::array<std::string, 2> assocTypes{"inventory", "chassis"};
403
404 // Different implementations handle the association to the FRU
405 // differently:
406 // * phosphor-inventory-manager uses the 'inventory' association
407 // to point to the FRU.
408 // * dbus-sensors/entity-manager uses the 'chassis' association'.
409 // * For virtual sensors, no association.
410
411 for (const auto& assocType : assocTypes)
412 {
413 auto assocPath = sensorPath + "/" + assocType;
414
415 try
416 {
417 auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>(
418 bus, assocPath, assocInterface, "endpoints");
419
420 if (!endpoints.empty())
421 {
422 return endpoints[0];
423 }
424 }
425 catch (const DBusServiceError& e)
426 {
427 // The association doesn't exist
428 continue;
429 }
430 }
431
432 return std::string{};
433 }
434
powerStateChanged(bool powerStateOn)435 void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn)
436 {
437 if (powerStateOn)
438 {
439 checkThresholds();
440 }
441 }
442
checkThresholds()443 void ThresholdAlarmLogger::checkThresholds()
444 {
445 std::vector<InterfaceKey> toErase;
446
447 for (const auto& [interfaceKey, alarmMap] : alarms)
448 {
449 for (const auto& [propertyName, alarmValue] : alarmMap)
450 {
451 if (alarmValue)
452 {
453 const auto& sensorPath = std::get<0>(interfaceKey);
454 const auto& interface = std::get<1>(interfaceKey);
455 std::string service;
456
457 try
458 {
459 // Check that the service that provides the alarm is still
460 // running, because if it died when the alarm was active
461 // there would be no indication of it unless we listened
462 // for NameOwnerChanged and tracked services, and this is
463 // easier.
464 service = SDBusPlus::getService(bus, sensorPath, interface);
465 }
466 catch (const DBusServiceError& e)
467 {
468 // No longer on D-Bus delete the alarm entry
469 toErase.emplace_back(sensorPath, interface);
470 }
471
472 if (!service.empty())
473 {
474 createEventLog(sensorPath, interface, propertyName,
475 alarmValue);
476 }
477 }
478 }
479 }
480
481 for (const auto& e : toErase)
482 {
483 alarms.erase(e);
484 }
485 }
486
487 } // namespace sensor::monitor
488