xref: /openbmc/phosphor-health-monitor/health_metric.cpp (revision 2d4cbeb09fa719a4caa3d497f1a67bb572546313)
1 #include "health_metric.hpp"
2 
3 #include <phosphor-logging/lg2.hpp>
4 
5 #include <cmath>
6 #include <numeric>
7 #include <unordered_map>
8 
9 PHOSPHOR_LOG2_USING;
10 
11 namespace phosphor::health::metric
12 {
13 
14 using association_t = std::tuple<std::string, std::string, std::string>;
15 
getPath(MType type,std::string name,SubType subType)16 auto HealthMetric::getPath(MType type, std::string name, SubType subType)
17     -> std::string
18 {
19     std::string path;
20     switch (subType)
21     {
22         case SubType::cpuTotal:
23         {
24             return std::string(BmcPath) + "/" + PathIntf::total_cpu;
25         }
26         case SubType::cpuKernel:
27         {
28             return std::string(BmcPath) + "/" + PathIntf::kernel_cpu;
29         }
30         case SubType::cpuUser:
31         {
32             return std::string(BmcPath) + "/" + PathIntf::user_cpu;
33         }
34         case SubType::memoryAvailable:
35         {
36             return std::string(BmcPath) + "/" + PathIntf::available_memory;
37         }
38         case SubType::memoryBufferedAndCached:
39         {
40             return std::string(BmcPath) + "/" +
41                    PathIntf::buffered_and_cached_memory;
42         }
43         case SubType::memoryFree:
44         {
45             return std::string(BmcPath) + "/" + PathIntf::free_memory;
46         }
47         case SubType::memoryShared:
48         {
49             return std::string(BmcPath) + "/" + PathIntf::shared_memory;
50         }
51         case SubType::memoryTotal:
52         {
53             return std::string(BmcPath) + "/" + PathIntf::total_memory;
54         }
55         case SubType::NA:
56         {
57             if (type == MType::storage)
58             {
59                 static constexpr auto nameDelimiter = "_";
60                 auto storageType = name.substr(
61                     name.find_last_of(nameDelimiter) + 1, name.length());
62                 std::ranges::for_each(storageType, [](auto& c) {
63                     c = std::tolower(c);
64                 });
65                 return std::string(BmcPath) + "/" + PathIntf::storage + "/" +
66                        storageType;
67             }
68             else
69             {
70                 error("Invalid metric {SUBTYPE} for metric {TYPE}", "SUBTYPE",
71                       subType, "TYPE", type);
72                 return "";
73             }
74         }
75         default:
76         {
77             error("Invalid metric {SUBTYPE}", "SUBTYPE", subType);
78             return "";
79         }
80     }
81 }
82 
initProperties()83 void HealthMetric::initProperties()
84 {
85     switch (type)
86     {
87         case MType::cpu:
88         {
89             ValueIntf::unit(ValueIntf::Unit::Percent, true);
90             ValueIntf::minValue(0.0, true);
91             ValueIntf::maxValue(100.0, true);
92             break;
93         }
94         case MType::memory:
95         case MType::storage:
96         {
97             ValueIntf::unit(ValueIntf::Unit::Bytes, true);
98             ValueIntf::minValue(0.0, true);
99             break;
100         }
101         case MType::inode:
102         case MType::unknown:
103         default:
104         {
105             throw std::invalid_argument("Invalid metric type");
106         }
107     }
108     ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true);
109 
110     using bound_map_t = std::map<Bound, double>;
111     std::map<Type, bound_map_t> thresholds;
112     for (const auto& [key, value] : config.thresholds)
113     {
114         auto type = std::get<Type>(key);
115         auto bound = std::get<Bound>(key);
116         auto threshold = thresholds.find(type);
117         if (threshold == thresholds.end())
118         {
119             bound_map_t bounds;
120             bounds.emplace(bound, std::numeric_limits<double>::quiet_NaN());
121             thresholds.emplace(type, bounds);
122         }
123         else
124         {
125             threshold->second.emplace(bound, value.value);
126         }
127     }
128     ThresholdIntf::value(thresholds, true);
129 }
130 
didThresholdViolate(ThresholdIntf::Bound bound,double thresholdValue,double value)131 bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue,
132                          double value)
133 {
134     switch (bound)
135     {
136         case ThresholdIntf::Bound::Lower:
137         {
138             return (value < thresholdValue);
139         }
140         case ThresholdIntf::Bound::Upper:
141         {
142             return (value > thresholdValue);
143         }
144         default:
145         {
146             error("Invalid threshold bound {BOUND}", "BOUND", bound);
147             return false;
148         }
149     }
150 }
151 
checkThreshold(Type type,Bound bound,MValue value)152 void HealthMetric::checkThreshold(Type type, Bound bound, MValue value)
153 {
154     auto threshold = std::make_tuple(type, bound);
155     auto thresholds = ThresholdIntf::value();
156 
157     if (thresholds.contains(type) && thresholds[type].contains(bound))
158     {
159         auto tConfig = config.thresholds.at(threshold);
160         auto thresholdValue = tConfig.value / 100 * value.total;
161         thresholds[type][bound] = thresholdValue;
162         ThresholdIntf::value(thresholds);
163         auto assertions = ThresholdIntf::asserted();
164         if (didThresholdViolate(bound, thresholdValue, value.current))
165         {
166             if (!assertions.contains(threshold))
167             {
168                 assertions.insert(threshold);
169                 ThresholdIntf::asserted(assertions);
170                 ThresholdIntf::assertionChanged(type, bound, true,
171                                                 value.current);
172                 if (tConfig.log)
173                 {
174                     error(
175                         "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold",
176                         "METRIC", config.name, "TYPE", type);
177                     startUnit(bus, tConfig.target);
178                 }
179             }
180             return;
181         }
182         else if (assertions.contains(threshold))
183         {
184             assertions.erase(threshold);
185             ThresholdIntf::asserted(assertions);
186             ThresholdIntf::assertionChanged(type, bound, false, value.current);
187             if (config.thresholds.find(threshold)->second.log)
188             {
189                 info(
190                     "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold",
191                     "METRIC", config.name, "TYPE", type);
192             }
193         }
194     }
195 }
196 
checkThresholds(MValue value)197 void HealthMetric::checkThresholds(MValue value)
198 {
199     if (!ThresholdIntf::value().empty())
200     {
201         for (auto type : {Type::HardShutdown, Type::SoftShutdown,
202                           Type::PerformanceLoss, Type::Critical, Type::Warning})
203         {
204             checkThreshold(type, Bound::Lower, value);
205             checkThreshold(type, Bound::Upper, value);
206         }
207     }
208 }
209 
shouldNotify(MValue value)210 auto HealthMetric::shouldNotify(MValue value) -> bool
211 {
212     if (std::isnan(value.current))
213     {
214         return true;
215     }
216     auto changed = std::abs(
217         (value.current - lastNotifiedValue) / lastNotifiedValue * 100.0);
218     if (changed >= config.hysteresis)
219     {
220         lastNotifiedValue = value.current;
221         return true;
222     }
223     return false;
224 }
225 
update(MValue value)226 void HealthMetric::update(MValue value)
227 {
228     ValueIntf::value(value.current, !shouldNotify(value));
229 
230     // Maintain window size for threshold calculation
231     if (history.size() >= config.windowSize)
232     {
233         history.pop_front();
234     }
235     history.push_back(value.current);
236 
237     if (history.size() < config.windowSize)
238     {
239         // Wait for the metric to have enough samples to calculate average
240         return;
241     }
242 
243     double average =
244         (std::accumulate(history.begin(), history.end(), 0.0)) / history.size();
245     value.current = average;
246     checkThresholds(value);
247 }
248 
create(const paths_t & bmcPaths)249 void HealthMetric::create(const paths_t& bmcPaths)
250 {
251     info("Create Health Metric: {METRIC}", "METRIC", config.name);
252     initProperties();
253 
254     std::vector<association_t> associations;
255     static constexpr auto forwardAssociation = "measuring";
256     static constexpr auto reverseAssociation = "measured_by";
257     for (const auto& bmcPath : bmcPaths)
258     {
259         /*
260          * This metric is "measuring" the health for the BMC at bmcPath
261          * The BMC at bmcPath is "measured_by" this metric.
262          */
263         associations.push_back(
264             {forwardAssociation, reverseAssociation, bmcPath});
265     }
266     AssociationIntf::associations(associations);
267 }
268 
269 } // namespace phosphor::health::metric
270