xref: /openbmc/phosphor-health-monitor/health_metric.cpp (revision 2d4cbeb09fa719a4caa3d497f1a67bb572546313)
1  #include "health_metric.hpp"
2  
3  #include <phosphor-logging/lg2.hpp>
4  
5  #include <cmath>
6  #include <numeric>
7  #include <unordered_map>
8  
9  PHOSPHOR_LOG2_USING;
10  
11  namespace phosphor::health::metric
12  {
13  
14  using association_t = std::tuple<std::string, std::string, std::string>;
15  
getPath(MType type,std::string name,SubType subType)16  auto HealthMetric::getPath(MType type, std::string name, SubType subType)
17      -> std::string
18  {
19      std::string path;
20      switch (subType)
21      {
22          case SubType::cpuTotal:
23          {
24              return std::string(BmcPath) + "/" + PathIntf::total_cpu;
25          }
26          case SubType::cpuKernel:
27          {
28              return std::string(BmcPath) + "/" + PathIntf::kernel_cpu;
29          }
30          case SubType::cpuUser:
31          {
32              return std::string(BmcPath) + "/" + PathIntf::user_cpu;
33          }
34          case SubType::memoryAvailable:
35          {
36              return std::string(BmcPath) + "/" + PathIntf::available_memory;
37          }
38          case SubType::memoryBufferedAndCached:
39          {
40              return std::string(BmcPath) + "/" +
41                     PathIntf::buffered_and_cached_memory;
42          }
43          case SubType::memoryFree:
44          {
45              return std::string(BmcPath) + "/" + PathIntf::free_memory;
46          }
47          case SubType::memoryShared:
48          {
49              return std::string(BmcPath) + "/" + PathIntf::shared_memory;
50          }
51          case SubType::memoryTotal:
52          {
53              return std::string(BmcPath) + "/" + PathIntf::total_memory;
54          }
55          case SubType::NA:
56          {
57              if (type == MType::storage)
58              {
59                  static constexpr auto nameDelimiter = "_";
60                  auto storageType = name.substr(
61                      name.find_last_of(nameDelimiter) + 1, name.length());
62                  std::ranges::for_each(storageType, [](auto& c) {
63                      c = std::tolower(c);
64                  });
65                  return std::string(BmcPath) + "/" + PathIntf::storage + "/" +
66                         storageType;
67              }
68              else
69              {
70                  error("Invalid metric {SUBTYPE} for metric {TYPE}", "SUBTYPE",
71                        subType, "TYPE", type);
72                  return "";
73              }
74          }
75          default:
76          {
77              error("Invalid metric {SUBTYPE}", "SUBTYPE", subType);
78              return "";
79          }
80      }
81  }
82  
initProperties()83  void HealthMetric::initProperties()
84  {
85      switch (type)
86      {
87          case MType::cpu:
88          {
89              ValueIntf::unit(ValueIntf::Unit::Percent, true);
90              ValueIntf::minValue(0.0, true);
91              ValueIntf::maxValue(100.0, true);
92              break;
93          }
94          case MType::memory:
95          case MType::storage:
96          {
97              ValueIntf::unit(ValueIntf::Unit::Bytes, true);
98              ValueIntf::minValue(0.0, true);
99              break;
100          }
101          case MType::inode:
102          case MType::unknown:
103          default:
104          {
105              throw std::invalid_argument("Invalid metric type");
106          }
107      }
108      ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true);
109  
110      using bound_map_t = std::map<Bound, double>;
111      std::map<Type, bound_map_t> thresholds;
112      for (const auto& [key, value] : config.thresholds)
113      {
114          auto type = std::get<Type>(key);
115          auto bound = std::get<Bound>(key);
116          auto threshold = thresholds.find(type);
117          if (threshold == thresholds.end())
118          {
119              bound_map_t bounds;
120              bounds.emplace(bound, std::numeric_limits<double>::quiet_NaN());
121              thresholds.emplace(type, bounds);
122          }
123          else
124          {
125              threshold->second.emplace(bound, value.value);
126          }
127      }
128      ThresholdIntf::value(thresholds, true);
129  }
130  
didThresholdViolate(ThresholdIntf::Bound bound,double thresholdValue,double value)131  bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue,
132                           double value)
133  {
134      switch (bound)
135      {
136          case ThresholdIntf::Bound::Lower:
137          {
138              return (value < thresholdValue);
139          }
140          case ThresholdIntf::Bound::Upper:
141          {
142              return (value > thresholdValue);
143          }
144          default:
145          {
146              error("Invalid threshold bound {BOUND}", "BOUND", bound);
147              return false;
148          }
149      }
150  }
151  
checkThreshold(Type type,Bound bound,MValue value)152  void HealthMetric::checkThreshold(Type type, Bound bound, MValue value)
153  {
154      auto threshold = std::make_tuple(type, bound);
155      auto thresholds = ThresholdIntf::value();
156  
157      if (thresholds.contains(type) && thresholds[type].contains(bound))
158      {
159          auto tConfig = config.thresholds.at(threshold);
160          auto thresholdValue = tConfig.value / 100 * value.total;
161          thresholds[type][bound] = thresholdValue;
162          ThresholdIntf::value(thresholds);
163          auto assertions = ThresholdIntf::asserted();
164          if (didThresholdViolate(bound, thresholdValue, value.current))
165          {
166              if (!assertions.contains(threshold))
167              {
168                  assertions.insert(threshold);
169                  ThresholdIntf::asserted(assertions);
170                  ThresholdIntf::assertionChanged(type, bound, true,
171                                                  value.current);
172                  if (tConfig.log)
173                  {
174                      error(
175                          "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold",
176                          "METRIC", config.name, "TYPE", type);
177                      startUnit(bus, tConfig.target);
178                  }
179              }
180              return;
181          }
182          else if (assertions.contains(threshold))
183          {
184              assertions.erase(threshold);
185              ThresholdIntf::asserted(assertions);
186              ThresholdIntf::assertionChanged(type, bound, false, value.current);
187              if (config.thresholds.find(threshold)->second.log)
188              {
189                  info(
190                      "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold",
191                      "METRIC", config.name, "TYPE", type);
192              }
193          }
194      }
195  }
196  
checkThresholds(MValue value)197  void HealthMetric::checkThresholds(MValue value)
198  {
199      if (!ThresholdIntf::value().empty())
200      {
201          for (auto type : {Type::HardShutdown, Type::SoftShutdown,
202                            Type::PerformanceLoss, Type::Critical, Type::Warning})
203          {
204              checkThreshold(type, Bound::Lower, value);
205              checkThreshold(type, Bound::Upper, value);
206          }
207      }
208  }
209  
shouldNotify(MValue value)210  auto HealthMetric::shouldNotify(MValue value) -> bool
211  {
212      if (std::isnan(value.current))
213      {
214          return true;
215      }
216      auto changed = std::abs(
217          (value.current - lastNotifiedValue) / lastNotifiedValue * 100.0);
218      if (changed >= config.hysteresis)
219      {
220          lastNotifiedValue = value.current;
221          return true;
222      }
223      return false;
224  }
225  
update(MValue value)226  void HealthMetric::update(MValue value)
227  {
228      ValueIntf::value(value.current, !shouldNotify(value));
229  
230      // Maintain window size for threshold calculation
231      if (history.size() >= config.windowSize)
232      {
233          history.pop_front();
234      }
235      history.push_back(value.current);
236  
237      if (history.size() < config.windowSize)
238      {
239          // Wait for the metric to have enough samples to calculate average
240          return;
241      }
242  
243      double average =
244          (std::accumulate(history.begin(), history.end(), 0.0)) / history.size();
245      value.current = average;
246      checkThresholds(value);
247  }
248  
create(const paths_t & bmcPaths)249  void HealthMetric::create(const paths_t& bmcPaths)
250  {
251      info("Create Health Metric: {METRIC}", "METRIC", config.name);
252      initProperties();
253  
254      std::vector<association_t> associations;
255      static constexpr auto forwardAssociation = "measuring";
256      static constexpr auto reverseAssociation = "measured_by";
257      for (const auto& bmcPath : bmcPaths)
258      {
259          /*
260           * This metric is "measuring" the health for the BMC at bmcPath
261           * The BMC at bmcPath is "measured_by" this metric.
262           */
263          associations.push_back(
264              {forwardAssociation, reverseAssociation, bmcPath});
265      }
266      AssociationIntf::associations(associations);
267  }
268  
269  } // namespace phosphor::health::metric
270