1 #include "health_metric.hpp" 2 3 #include <phosphor-logging/lg2.hpp> 4 5 #include <cmath> 6 #include <numeric> 7 #include <unordered_map> 8 9 PHOSPHOR_LOG2_USING; 10 11 namespace phosphor::health::metric 12 { 13 14 using association_t = std::tuple<std::string, std::string, std::string>; 15 16 static constexpr double hysteresis = 1.0; 17 18 auto HealthMetric::getPath(MType type, std::string name, SubType subType) 19 -> std::string 20 { 21 std::string path; 22 switch (subType) 23 { 24 case SubType::cpuTotal: 25 { 26 return std::string(BmcPath) + "/" + PathIntf::total_cpu; 27 } 28 case SubType::cpuKernel: 29 { 30 return std::string(BmcPath) + "/" + PathIntf::kernel_cpu; 31 } 32 case SubType::cpuUser: 33 { 34 return std::string(BmcPath) + "/" + PathIntf::user_cpu; 35 } 36 case SubType::memoryAvailable: 37 { 38 return std::string(BmcPath) + "/" + PathIntf::available_memory; 39 } 40 case SubType::memoryBufferedAndCached: 41 { 42 return std::string(BmcPath) + "/" + 43 PathIntf::buffered_and_cached_memory; 44 } 45 case SubType::memoryFree: 46 { 47 return std::string(BmcPath) + "/" + PathIntf::free_memory; 48 } 49 case SubType::memoryShared: 50 { 51 return std::string(BmcPath) + "/" + PathIntf::shared_memory; 52 } 53 case SubType::memoryTotal: 54 { 55 return std::string(BmcPath) + "/" + PathIntf::total_memory; 56 } 57 case SubType::NA: 58 { 59 if (type == MType::storage) 60 { 61 static constexpr auto nameDelimiter = "_"; 62 auto storageType = name.substr( 63 name.find_last_of(nameDelimiter) + 1, name.length()); 64 std::ranges::for_each(storageType, 65 [](auto& c) { c = std::tolower(c); }); 66 return std::string(BmcPath) + "/" + PathIntf::storage + "/" + 67 storageType; 68 } 69 else 70 { 71 error("Invalid metric {SUBTYPE} for metric {TYPE}", "SUBTYPE", 72 subType, "TYPE", type); 73 return ""; 74 } 75 } 76 default: 77 { 78 error("Invalid metric {SUBTYPE}", "SUBTYPE", subType); 79 return ""; 80 } 81 } 82 } 83 84 void HealthMetric::initProperties() 85 { 86 switch (type) 87 { 88 case MType::cpu: 89 { 90 ValueIntf::unit(ValueIntf::Unit::Percent, true); 91 ValueIntf::minValue(0.0, true); 92 ValueIntf::maxValue(100.0, true); 93 break; 94 } 95 case MType::memory: 96 case MType::storage: 97 { 98 ValueIntf::unit(ValueIntf::Unit::Bytes, true); 99 ValueIntf::minValue(0.0, true); 100 break; 101 } 102 case MType::inode: 103 case MType::unknown: 104 default: 105 { 106 throw std::invalid_argument("Invalid metric type"); 107 } 108 } 109 ValueIntf::value(std::numeric_limits<double>::quiet_NaN(), true); 110 111 using bound_map_t = std::map<Bound, double>; 112 std::map<Type, bound_map_t> thresholds; 113 for (const auto& [key, value] : config.thresholds) 114 { 115 auto type = std::get<Type>(key); 116 auto bound = std::get<Bound>(key); 117 auto threshold = thresholds.find(type); 118 if (threshold == thresholds.end()) 119 { 120 bound_map_t bounds; 121 bounds.emplace(bound, std::numeric_limits<double>::quiet_NaN()); 122 thresholds.emplace(type, bounds); 123 } 124 else 125 { 126 threshold->second.emplace(bound, value.value); 127 } 128 } 129 ThresholdIntf::value(thresholds, true); 130 } 131 132 bool didThresholdViolate(ThresholdIntf::Bound bound, double thresholdValue, 133 double value) 134 { 135 switch (bound) 136 { 137 case ThresholdIntf::Bound::Lower: 138 { 139 return (value < thresholdValue); 140 } 141 case ThresholdIntf::Bound::Upper: 142 { 143 return (value > thresholdValue); 144 } 145 default: 146 { 147 error("Invalid threshold bound {BOUND}", "BOUND", bound); 148 return false; 149 } 150 } 151 } 152 153 void HealthMetric::checkThreshold(Type type, Bound bound, MValue value) 154 { 155 auto threshold = std::make_tuple(type, bound); 156 auto thresholds = ThresholdIntf::value(); 157 158 if (thresholds.contains(type) && thresholds[type].contains(bound)) 159 { 160 auto tConfig = config.thresholds.at(threshold); 161 auto thresholdValue = tConfig.value / 100 * value.total; 162 thresholds[type][bound] = thresholdValue; 163 ThresholdIntf::value(thresholds); 164 auto assertions = ThresholdIntf::asserted(); 165 if (didThresholdViolate(bound, thresholdValue, value.current)) 166 { 167 if (!assertions.contains(threshold)) 168 { 169 assertions.insert(threshold); 170 ThresholdIntf::asserted(assertions); 171 ThresholdIntf::assertionChanged(type, bound, true, 172 value.current); 173 if (tConfig.log) 174 { 175 error( 176 "ASSERT: Health Metric {METRIC} crossed {TYPE} upper threshold", 177 "METRIC", config.name, "TYPE", type); 178 startUnit(bus, tConfig.target); 179 } 180 } 181 return; 182 } 183 else if (assertions.contains(threshold)) 184 { 185 assertions.erase(threshold); 186 ThresholdIntf::asserted(assertions); 187 ThresholdIntf::assertionChanged(type, bound, false, value.current); 188 if (config.thresholds.find(threshold)->second.log) 189 { 190 info( 191 "DEASSERT: Health Metric {METRIC} is below {TYPE} upper threshold", 192 "METRIC", config.name, "TYPE", type); 193 } 194 } 195 } 196 } 197 198 void HealthMetric::checkThresholds(MValue value) 199 { 200 if (!ThresholdIntf::value().empty()) 201 { 202 for (auto type : {Type::HardShutdown, Type::SoftShutdown, 203 Type::PerformanceLoss, Type::Critical, Type::Warning}) 204 { 205 checkThreshold(type, Bound::Lower, value); 206 checkThreshold(type, Bound::Upper, value); 207 } 208 } 209 } 210 211 auto HealthMetric::shouldNotify(MValue value) -> bool 212 { 213 if (std::isnan(value.current)) 214 { 215 return true; 216 } 217 auto changed = std::abs((value.current - lastNotifiedValue) / 218 lastNotifiedValue * 100.0); 219 if (changed >= hysteresis) 220 { 221 lastNotifiedValue = value.current; 222 return true; 223 } 224 return false; 225 } 226 227 void HealthMetric::update(MValue value) 228 { 229 ValueIntf::value(value.current, !shouldNotify(value)); 230 231 // Maintain window size for threshold calculation 232 if (history.size() >= config.windowSize) 233 { 234 history.pop_front(); 235 } 236 history.push_back(value.current); 237 238 if (history.size() < config.windowSize) 239 { 240 // Wait for the metric to have enough samples to calculate average 241 debug("Not enough samples to calculate average"); 242 return; 243 } 244 245 double average = (std::accumulate(history.begin(), history.end(), 0.0)) / 246 history.size(); 247 value.current = average; 248 checkThresholds(value); 249 } 250 251 void HealthMetric::create(const paths_t& bmcPaths) 252 { 253 info("Create Health Metric: {METRIC}", "METRIC", config.name); 254 initProperties(); 255 256 std::vector<association_t> associations; 257 static constexpr auto forwardAssociation = "measuring"; 258 static constexpr auto reverseAssociation = "measured_by"; 259 for (const auto& bmcPath : bmcPaths) 260 { 261 /* 262 * This metric is "measuring" the health for the BMC at bmcPath 263 * The BMC at bmcPath is "measured_by" this metric. 264 */ 265 associations.push_back( 266 {forwardAssociation, reverseAssociation, bmcPath}); 267 } 268 AssociationIntf::associations(associations); 269 } 270 271 } // namespace phosphor::health::metric 272