xref: /openbmc/dbus-sensors/src/Thresholds.cpp (revision 6198435dc2bc6e07b46e32465876815d4ea86437)
1 #include "Thresholds.hpp"
2 
3 #include "Utils.hpp"
4 #include "VariantVisitors.hpp"
5 #include "sensor.hpp"
6 
7 #include <boost/algorithm/string/replace.hpp>
8 #include <boost/asio/error.hpp>
9 #include <boost/asio/steady_timer.hpp>
10 #include <boost/container/flat_map.hpp>
11 #include <phosphor-logging/lg2.hpp>
12 #include <sdbusplus/asio/connection.hpp>
13 #include <sdbusplus/asio/object_server.hpp>
14 #include <sdbusplus/exception.hpp>
15 #include <sdbusplus/message.hpp>
16 
17 #include <array>
18 #include <chrono>
19 #include <cstddef>
20 #include <cstdint>
21 #include <limits>
22 #include <memory>
23 #include <string>
24 #include <tuple>
25 #include <utility>
26 #include <variant>
27 #include <vector>
28 
29 static constexpr bool debug = false;
30 namespace thresholds
31 {
findThresholdLevel(uint8_t sev)32 Level findThresholdLevel(uint8_t sev)
33 {
34     for (const ThresholdDefinition& prop : thresProp)
35     {
36         if (prop.sevOrder == sev)
37         {
38             return prop.level;
39         }
40     }
41     return Level::ERROR;
42 }
43 
findThresholdDirection(const std::string & direct)44 Direction findThresholdDirection(const std::string& direct)
45 {
46     if (direct == "greater than")
47     {
48         return Direction::HIGH;
49     }
50     if (direct == "less than")
51     {
52         return Direction::LOW;
53     }
54     return Direction::ERROR;
55 }
56 
parseThresholdsFromConfig(const SensorData & sensorData,std::vector<thresholds::Threshold> & thresholdVector,const std::string * matchLabel,const int * sensorIndex)57 bool parseThresholdsFromConfig(
58     const SensorData& sensorData,
59     std::vector<thresholds::Threshold>& thresholdVector,
60     const std::string* matchLabel, const int* sensorIndex)
61 {
62     for (const auto& [intf, cfg] : sensorData)
63     {
64         if (intf.find("Thresholds") == std::string::npos)
65         {
66             continue;
67         }
68         if (matchLabel != nullptr)
69         {
70             auto labelFind = cfg.find("Label");
71             if (labelFind == cfg.end())
72             {
73                 continue;
74             }
75             if (std::visit(VariantToStringVisitor(), labelFind->second) !=
76                 *matchLabel)
77             {
78                 continue;
79             }
80         }
81 
82         if (sensorIndex != nullptr)
83         {
84             auto indexFind = cfg.find("Index");
85 
86             // If we're checking for index 1, a missing Index is OK.
87             if ((indexFind == cfg.end()) && (*sensorIndex != 1))
88             {
89                 continue;
90             }
91 
92             if ((indexFind != cfg.end()) &&
93                 (std::visit(VariantToIntVisitor(), indexFind->second) !=
94                  *sensorIndex))
95             {
96                 continue;
97             }
98         }
99 
100         double hysteresis = std::numeric_limits<double>::quiet_NaN();
101         auto hysteresisFind = cfg.find("Hysteresis");
102         if (hysteresisFind != cfg.end())
103         {
104             hysteresis =
105                 std::visit(VariantToDoubleVisitor(), hysteresisFind->second);
106         }
107 
108         auto directionFind = cfg.find("Direction");
109         auto severityFind = cfg.find("Severity");
110         auto valueFind = cfg.find("Value");
111         if (valueFind == cfg.end() || severityFind == cfg.end() ||
112             directionFind == cfg.end())
113         {
114             lg2::error(
115                 "Malformed threshold on configuration interface: '{INTERFACE}'",
116                 "INTERFACE", intf);
117             return false;
118         }
119         unsigned int severity =
120             std::visit(VariantToUnsignedIntVisitor(), severityFind->second);
121 
122         std::string directions =
123             std::visit(VariantToStringVisitor(), directionFind->second);
124 
125         Level level = findThresholdLevel(severity);
126         Direction direction = findThresholdDirection(directions);
127 
128         if ((level == Level::ERROR) || (direction == Direction::ERROR))
129         {
130             continue;
131         }
132         double val = std::visit(VariantToDoubleVisitor(), valueFind->second);
133 
134         thresholdVector.emplace_back(level, direction, val, hysteresis);
135     }
136     return true;
137 }
138 
persistThreshold(const std::string & path,const std::string & baseInterface,const thresholds::Threshold & threshold,std::shared_ptr<sdbusplus::asio::connection> & conn,size_t thresholdCount,const std::string & labelMatch)139 void persistThreshold(const std::string& path, const std::string& baseInterface,
140                       const thresholds::Threshold& threshold,
141                       std::shared_ptr<sdbusplus::asio::connection>& conn,
142                       size_t thresholdCount, const std::string& labelMatch)
143 {
144     for (size_t ii = 0; ii < thresholdCount; ii++)
145     {
146         std::string thresholdInterface =
147             baseInterface + ".Thresholds" + std::to_string(ii);
148         conn->async_method_call(
149             [&, path, threshold, thresholdInterface,
150              labelMatch](const boost::system::error_code& ec,
151                          const SensorBaseConfigMap& result) {
152                 if (ec)
153                 {
154                     return; // threshold not supported
155                 }
156 
157                 if (!labelMatch.empty())
158                 {
159                     auto labelFind = result.find("Label");
160                     if (labelFind == result.end())
161                     {
162                         lg2::error("No label in threshold configuration");
163                         return;
164                     }
165                     std::string label =
166                         std::visit(VariantToStringVisitor(), labelFind->second);
167                     if (label != labelMatch)
168                     {
169                         return;
170                     }
171                 }
172 
173                 auto directionFind = result.find("Direction");
174                 auto severityFind = result.find("Severity");
175                 auto valueFind = result.find("Value");
176                 if (valueFind == result.end() || severityFind == result.end() ||
177                     directionFind == result.end())
178                 {
179                     lg2::error("Malformed threshold in configuration");
180                     return;
181                 }
182                 unsigned int severity = std::visit(
183                     VariantToUnsignedIntVisitor(), severityFind->second);
184 
185                 std::string dir =
186                     std::visit(VariantToStringVisitor(), directionFind->second);
187                 if ((findThresholdLevel(severity) != threshold.level) ||
188                     (findThresholdDirection(dir) != threshold.direction))
189                 {
190                     return; // not the droid we're looking for
191                 }
192 
193                 std::variant<double> value(threshold.value);
194                 conn->async_method_call(
195                     [](const boost::system::error_code& ec) {
196                         if (ec)
197                         {
198                             lg2::error(
199                                 "Error setting threshold: '{ERROR_MESSAGE}'",
200                                 "ERROR_MESSAGE", ec.message());
201                         }
202                     },
203                     entityManagerName, path, "org.freedesktop.DBus.Properties",
204                     "Set", thresholdInterface, "Value", value);
205             },
206             entityManagerName, path, "org.freedesktop.DBus.Properties",
207             "GetAll", thresholdInterface);
208     }
209 }
210 
updateThresholds(Sensor * sensor)211 void updateThresholds(Sensor* sensor)
212 {
213     for (const auto& threshold : sensor->thresholds)
214     {
215         std::shared_ptr<sdbusplus::asio::dbus_interface> interface =
216             sensor->getThresholdInterface(threshold.level);
217 
218         if (!interface)
219         {
220             continue;
221         }
222 
223         std::string property =
224             Sensor::propertyLevel(threshold.level, threshold.direction);
225         if (property.empty())
226         {
227             continue;
228         }
229         interface->set_property(property, threshold.value);
230     }
231 }
232 
233 // Debugging counters
234 static int cHiTrue = 0;
235 static int cHiFalse = 0;
236 static int cHiMidstate = 0;
237 static int cLoTrue = 0;
238 static int cLoFalse = 0;
239 static int cLoMidstate = 0;
240 static int cDebugThrottle = 0;
241 static constexpr int assertLogCount = 10;
242 
243 struct ChangeParam
244 {
ChangeParamthresholds::ChangeParam245     ChangeParam(Threshold whichThreshold, bool status, double value) :
246         threshold(whichThreshold), asserted(status), assertValue(value)
247     {}
248 
249     Threshold threshold;
250     bool asserted;
251     double assertValue;
252 };
253 
checkThresholds(Sensor * sensor,double value)254 static std::vector<ChangeParam> checkThresholds(Sensor* sensor, double value)
255 {
256     std::vector<ChangeParam> thresholdChanges;
257     if (sensor->thresholds.empty())
258     {
259         return thresholdChanges;
260     }
261 
262     for (auto& threshold : sensor->thresholds)
263     {
264         // Use "Schmitt trigger" logic to avoid threshold trigger spam,
265         // if value is noisy while hovering very close to a threshold.
266         // When a threshold is crossed, indicate true immediately,
267         // but require more distance to be crossed the other direction,
268         // before resetting the indicator back to false.
269         if (threshold.direction == thresholds::Direction::HIGH)
270         {
271             if (value >= threshold.value)
272             {
273                 thresholdChanges.emplace_back(threshold, true, value);
274                 if (++cHiTrue < assertLogCount)
275                 {
276                     lg2::info(
277                         "Sensor name: {NAME}, high threshold: {THRESHOLD}, "
278                         "assert value: {VALUE}, raw data: {RAW_DATA}",
279                         "NAME", sensor->name, "THRESHOLD", threshold.value,
280                         "VALUE", value, "RAW_DATA", sensor->rawValue);
281                 }
282             }
283             else if (value < (threshold.value - threshold.hysteresis))
284             {
285                 thresholdChanges.emplace_back(threshold, false, value);
286                 ++cHiFalse;
287             }
288             else
289             {
290                 ++cHiMidstate;
291             }
292         }
293         else if (threshold.direction == thresholds::Direction::LOW)
294         {
295             if (value <= threshold.value)
296             {
297                 thresholdChanges.emplace_back(threshold, true, value);
298                 if (++cLoTrue < assertLogCount)
299                 {
300                     lg2::info(
301                         "Sensor name: {NAME}, low threshold: {THRESHOLD}, "
302                         "assert value: {VALUE}, raw data: {RAW_DATA}",
303                         "NAME", sensor->name, "THRESHOLD", threshold.value,
304                         "VALUE", value, "RAW_DATA", sensor->rawValue);
305                 }
306             }
307             else if (value > (threshold.value + threshold.hysteresis))
308             {
309                 thresholdChanges.emplace_back(threshold, false, value);
310                 ++cLoFalse;
311             }
312             else
313             {
314                 ++cLoMidstate;
315             }
316         }
317         else
318         {
319             lg2::error("Error determining threshold direction");
320         }
321     }
322 
323     // Throttle debug output, so that it does not continuously spam
324     ++cDebugThrottle;
325     if (cDebugThrottle >= 1000)
326     {
327         cDebugThrottle = 0;
328         if constexpr (debug)
329         {
330             lg2::error("checkThresholds: High T= {HIGH_TRUE}, F= {HIGH_FALSE},"
331                        " M= {HIGH_MIDSTATE}, Low T= {LOW_TRUE}, F= {LOW_FALSE},"
332                        " M= {LOW_MIDSTATE}",
333                        "HIGH_TRUE", cHiTrue, "HIGH_FALSE", cHiFalse,
334                        "HIGH_MIDSTATE", cHiMidstate, "LOW_TRUE", cLoTrue,
335                        "LOW_FALSE", cLoFalse, "LOW_MIDSTATE", cLoMidstate);
336         }
337     }
338 
339     return thresholdChanges;
340 }
341 
startTimer(const std::weak_ptr<Sensor> & weakSensor,const Threshold & threshold,bool assert,double assertValue)342 void ThresholdTimer::startTimer(const std::weak_ptr<Sensor>& weakSensor,
343                                 const Threshold& threshold, bool assert,
344                                 double assertValue)
345 {
346     struct TimerUsed timerUsed = {};
347     constexpr const size_t waitTime = 5;
348     TimerPair* pair = nullptr;
349 
350     for (TimerPair& timer : timers)
351     {
352         if (!timer.first.used)
353         {
354             pair = &timer;
355             break;
356         }
357     }
358     if (pair == nullptr)
359     {
360         pair = &timers.emplace_back(timerUsed, boost::asio::steady_timer(io));
361     }
362 
363     pair->first.used = true;
364     pair->first.level = threshold.level;
365     pair->first.direction = threshold.direction;
366     pair->first.assert = assert;
367     pair->second.expires_after(std::chrono::seconds(waitTime));
368     pair->second.async_wait([weakSensor, pair, threshold, assert,
369                              assertValue](boost::system::error_code ec) {
370         auto sensorPtr = weakSensor.lock();
371         if (!sensorPtr)
372         {
373             return; // owner sensor has been destructed
374         }
375         // pair is valid as long as sensor is valid
376         pair->first.used = false;
377 
378         if (ec == boost::asio::error::operation_aborted)
379         {
380             return; // we're being canceled
381         }
382         if (ec)
383         {
384             lg2::error("timer error: '{ERROR_MESSAGE}'", "ERROR_MESSAGE",
385                        ec.message());
386             return;
387         }
388         if (sensorPtr->readingStateGood())
389         {
390             assertThresholds(sensorPtr.get(), assertValue, threshold.level,
391                              threshold.direction, assert);
392         }
393     });
394 }
395 
checkThresholds(Sensor * sensor)396 bool checkThresholds(Sensor* sensor)
397 {
398     bool status = true;
399     std::vector<ChangeParam> changes = checkThresholds(sensor, sensor->value);
400     for (const auto& change : changes)
401     {
402         assertThresholds(sensor, change.assertValue, change.threshold.level,
403                          change.threshold.direction, change.asserted);
404         if (change.threshold.level == thresholds::Level::CRITICAL &&
405             change.asserted)
406         {
407             status = false;
408         }
409     }
410 
411     return status;
412 }
413 
checkThresholdsPowerDelay(const std::weak_ptr<Sensor> & weakSensor,ThresholdTimer & thresholdTimer)414 void checkThresholdsPowerDelay(const std::weak_ptr<Sensor>& weakSensor,
415                                ThresholdTimer& thresholdTimer)
416 {
417     auto sensorPtr = weakSensor.lock();
418     if (!sensorPtr)
419     {
420         return; // sensor is destructed, should never be here
421     }
422 
423     Sensor* sensor = sensorPtr.get();
424     std::vector<ChangeParam> changes = checkThresholds(sensor, sensor->value);
425     for (const auto& change : changes)
426     {
427         // When CPU is powered off, some volatges are expected to
428         // go below low thresholds. Filter these events with thresholdTimer.
429         // 1. always delay the assertion of low events to see if they are
430         //   caused by power off event.
431         // 2. conditional delay the de-assertion of low events if there is
432         //   an existing timer for assertion.
433         // 3. no delays for de-assert of low events if there is an existing
434         //   de-assert for low event. This means 2nd de-assert would happen
435         //   first and when timer expires for the previous one, no additional
436         //   signal will be logged.
437         // 4. no delays for all high events.
438         if (change.threshold.direction == thresholds::Direction::LOW)
439         {
440             if (change.asserted || thresholdTimer.hasActiveTimer(
441                                        change.threshold, !change.asserted))
442             {
443                 thresholdTimer.startTimer(weakSensor, change.threshold,
444                                           change.asserted, change.assertValue);
445                 continue;
446             }
447         }
448         assertThresholds(sensor, change.assertValue, change.threshold.level,
449                          change.threshold.direction, change.asserted);
450     }
451 }
452 
assertThresholds(Sensor * sensor,double assertValue,thresholds::Level level,thresholds::Direction direction,bool assert)453 void assertThresholds(Sensor* sensor, double assertValue,
454                       thresholds::Level level, thresholds::Direction direction,
455                       bool assert)
456 {
457     std::shared_ptr<sdbusplus::asio::dbus_interface> interface =
458         sensor->getThresholdInterface(level);
459 
460     if (!interface)
461     {
462         lg2::info("trying to set uninitialized interface");
463         return;
464     }
465 
466     std::string property = Sensor::propertyAlarm(level, direction);
467     if (property.empty())
468     {
469         lg2::info("Alarm property is empty");
470         return;
471     }
472     if (interface->set_property<bool, true>(property, assert))
473     {
474         try
475         {
476             // msg.get_path() is interface->get_object_path()
477             sdbusplus::message_t msg =
478                 interface->new_signal("ThresholdAsserted");
479 
480             msg.append(sensor->name, interface->get_interface_name(), property,
481                        assert, assertValue);
482             msg.signal_send();
483         }
484         catch (const sdbusplus::exception_t& e)
485         {
486             lg2::error(
487                 "Failed to send thresholdAsserted signal with assertValue");
488         }
489     }
490 }
491 
parseThresholdsFromAttr(std::vector<thresholds::Threshold> & thresholdVector,const std::string & inputPath,const double & scaleFactor,const double & offset,const double & hysteresis)492 bool parseThresholdsFromAttr(
493     std::vector<thresholds::Threshold>& thresholdVector,
494     const std::string& inputPath, const double& scaleFactor,
495     const double& offset, const double& hysteresis)
496 {
497     const boost::container::flat_map<
498         std::string, std::vector<std::tuple<const char*, thresholds::Level,
499                                             thresholds::Direction, double>>>
500         map = {
501             {"average",
502              {
503                  std::make_tuple("average_min", Level::WARNING, Direction::LOW,
504                                  0.0),
505                  std::make_tuple("average_max", Level::WARNING, Direction::HIGH,
506                                  0.0),
507              }},
508             {"input",
509              {
510                  std::make_tuple("min", Level::WARNING, Direction::LOW, 0.0),
511                  std::make_tuple("max", Level::WARNING, Direction::HIGH, 0.0),
512                  std::make_tuple("lcrit", Level::CRITICAL, Direction::LOW, 0.0),
513                  std::make_tuple("crit", Level::CRITICAL, Direction::HIGH,
514                                  offset),
515              }},
516         };
517 
518     if (auto fileParts = splitFileName(inputPath))
519     {
520         auto& [type, nr, item] = *fileParts;
521         if (map.count(item) != 0)
522         {
523             for (const auto& t : map.at(item))
524             {
525                 const auto& [suffix, level, direction, offset] = t;
526                 auto attrPath =
527                     boost::replace_all_copy(inputPath, item, suffix);
528                 if (auto val = readFile(attrPath, scaleFactor))
529                 {
530                     *val += offset;
531                     if (debug)
532                     {
533                         lg2::info("Threshold: '{PATH}': '{VALUE}'", "PATH",
534                                   attrPath, "VALUE", *val);
535                     }
536                     thresholdVector.emplace_back(level, direction, *val,
537                                                  hysteresis);
538                 }
539             }
540         }
541     }
542     return true;
543 }
544 
getInterface(const Level thresholdLevel)545 std::string getInterface(const Level thresholdLevel)
546 {
547     for (const ThresholdDefinition& thresh : thresProp)
548     {
549         if (thresh.level == thresholdLevel)
550         {
551             return std::string("xyz.openbmc_project.Sensor.Threshold.") +
552                    thresh.levelName;
553         }
554     }
555     return "";
556 }
557 } // namespace thresholds
558