1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "threshold_alarm_logger.hpp" 17 18 #include "sdbusplus.hpp" 19 20 #include <unistd.h> 21 22 #include <phosphor-logging/log.hpp> 23 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 24 25 #include <format> 26 27 namespace sensor::monitor 28 { 29 30 using namespace sdbusplus::xyz::openbmc_project::Logging::server; 31 using namespace phosphor::logging; 32 using namespace phosphor::fan; 33 using namespace phosphor::fan::util; 34 35 const std::string warningInterface = 36 "xyz.openbmc_project.Sensor.Threshold.Warning"; 37 const std::string criticalInterface = 38 "xyz.openbmc_project.Sensor.Threshold.Critical"; 39 const std::string perfLossInterface = 40 "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss"; 41 constexpr auto loggingService = "xyz.openbmc_project.Logging"; 42 constexpr auto loggingPath = "/xyz/openbmc_project/logging"; 43 constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create"; 44 constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error."; 45 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value"; 46 constexpr auto assocInterface = "xyz.openbmc_project.Association"; 47 48 const std::vector<std::string> thresholdIfaceNames{ 49 warningInterface, criticalInterface, perfLossInterface}; 50 51 using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>; 52 53 /** 54 * Map of threshold interfaces and alarm properties and values to error data. 55 */ 56 const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>> 57 thresholdData{ 58 59 {warningInterface, 60 {{"WarningAlarmHigh", 61 {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}}, 62 {false, 63 ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}}, 64 {"WarningAlarmLow", 65 {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}}, 66 {false, 67 ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}}, 68 69 {criticalInterface, 70 {{"CriticalAlarmHigh", 71 {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}}, 72 {false, 73 ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}}, 74 {"CriticalAlarmLow", 75 {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}}, 76 {false, ErrorData{"CriticalLow", "Clear", 77 Entry::Level::Informational}}}}}}, 78 79 {perfLossInterface, 80 {{"PerfLossAlarmHigh", 81 {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}}, 82 {false, ErrorData{"PerformanceLossHigh", "Clear", 83 Entry::Level::Informational}}}}, 84 {"PerfLossAlarmLow", 85 {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}}, 86 {false, ErrorData{"PerformanceLossLow", "Clear", 87 Entry::Level::Informational}}}}}}}; 88 89 ThresholdAlarmLogger::ThresholdAlarmLogger( 90 sdbusplus::bus_t& bus, sdeventplus::Event& event, 91 std::shared_ptr<PowerState> powerState) : 92 bus(bus), event(event), _powerState(std::move(powerState)), 93 warningMatch(bus, 94 "type='signal',member='PropertiesChanged'," 95 "path_namespace='/xyz/openbmc_project/sensors'," 96 "arg0='" + 97 warningInterface + "'", 98 std::bind(&ThresholdAlarmLogger::propertiesChanged, this, 99 std::placeholders::_1)), 100 criticalMatch(bus, 101 "type='signal',member='PropertiesChanged'," 102 "path_namespace='/xyz/openbmc_project/sensors'," 103 "arg0='" + 104 criticalInterface + "'", 105 std::bind(&ThresholdAlarmLogger::propertiesChanged, this, 106 std::placeholders::_1)), 107 perfLossMatch(bus, 108 "type='signal',member='PropertiesChanged'," 109 "path_namespace='/xyz/openbmc_project/sensors'," 110 "arg0='" + 111 perfLossInterface + "'", 112 std::bind(&ThresholdAlarmLogger::propertiesChanged, this, 113 std::placeholders::_1)), 114 ifacesRemovedMatch(bus, 115 "type='signal',member='InterfacesRemoved',arg0path=" 116 "'/xyz/openbmc_project/sensors/'", 117 std::bind(&ThresholdAlarmLogger::interfacesRemoved, this, 118 std::placeholders::_1)), 119 ifacesAddedMatch(bus, 120 "type='signal',member='InterfacesAdded',arg0path=" 121 "'/xyz/openbmc_project/sensors/'", 122 std::bind(&ThresholdAlarmLogger::interfacesAdded, this, 123 std::placeholders::_1)) 124 { 125 _powerState->addCallback("thresholdMon", 126 std::bind(&ThresholdAlarmLogger::powerStateChanged, 127 this, std::placeholders::_1)); 128 129 // check for any currently asserted threshold alarms 130 std::for_each( 131 thresholdData.begin(), thresholdData.end(), 132 [this](const auto& thresholdInterface) { 133 const auto& interface = thresholdInterface.first; 134 auto objects = 135 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0); 136 std::for_each(objects.begin(), objects.end(), 137 [interface, this](const auto& object) { 138 const auto& path = object.first; 139 const auto& service = 140 object.second.begin()->first; 141 checkThresholds(interface, path, service); 142 }); 143 }); 144 } 145 146 void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg) 147 { 148 std::map<std::string, std::variant<bool>> properties; 149 std::string sensorPath = msg.get_path(); 150 std::string interface; 151 152 msg.read(interface, properties); 153 154 checkProperties(sensorPath, interface, properties); 155 } 156 157 void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg) 158 { 159 sdbusplus::message::object_path path; 160 std::vector<std::string> interfaces; 161 162 msg.read(path, interfaces); 163 164 for (const auto& interface : interfaces) 165 { 166 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(), 167 interface) != thresholdIfaceNames.end()) 168 { 169 alarms.erase(InterfaceKey{path, interface}); 170 } 171 } 172 } 173 174 void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg) 175 { 176 sdbusplus::message::object_path path; 177 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 178 179 msg.read(path, interfaces); 180 181 for (const auto& [interface, properties] : interfaces) 182 { 183 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(), 184 interface) != thresholdIfaceNames.end()) 185 { 186 checkProperties(path, interface, properties); 187 } 188 } 189 } 190 191 void ThresholdAlarmLogger::checkProperties( 192 const std::string& sensorPath, const std::string& interface, 193 const std::map<std::string, std::variant<bool>>& properties) 194 { 195 auto alarmProperties = thresholdData.find(interface); 196 if (alarmProperties == thresholdData.end()) 197 { 198 return; 199 } 200 201 for (const auto& [propertyName, propertyValue] : properties) 202 { 203 if (alarmProperties->second.find(propertyName) != 204 alarmProperties->second.end()) 205 { 206 // If this is the first time we've seen this alarm, then 207 // assume it was off before so it doesn't create an event 208 // log for a value of false. 209 210 InterfaceKey key{sensorPath, interface}; 211 if (alarms.find(key) == alarms.end()) 212 { 213 alarms[key][propertyName] = false; 214 } 215 216 // Check if the value changed from what was there before. 217 auto alarmValue = std::get<bool>(propertyValue); 218 if (alarmValue != alarms[key][propertyName]) 219 { 220 alarms[key][propertyName] = alarmValue; 221 222 if (_powerState->isPowerOn()) 223 { 224 createEventLog(sensorPath, interface, propertyName, 225 alarmValue); 226 } 227 } 228 } 229 } 230 } 231 232 void ThresholdAlarmLogger::checkThresholds(const std::string& interface, 233 const std::string& sensorPath, 234 const std::string& service) 235 { 236 auto properties = thresholdData.find(interface); 237 if (properties == thresholdData.end()) 238 { 239 return; 240 } 241 242 for (const auto& [property, unused] : properties->second) 243 { 244 try 245 { 246 auto alarmValue = SDBusPlus::getProperty<bool>( 247 bus, service, sensorPath, interface, property); 248 alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue; 249 250 // This is just for checking alarms on startup, 251 // so only look for active alarms. 252 if (alarmValue && _powerState->isPowerOn()) 253 { 254 createEventLog(sensorPath, interface, property, alarmValue); 255 } 256 } 257 catch (const sdbusplus::exception_t& e) 258 { 259 // Sensor daemons that get their direction from entity manager 260 // may only be putting either the high alarm or low alarm on 261 // D-Bus, not both. 262 continue; 263 } 264 } 265 } 266 267 void ThresholdAlarmLogger::createEventLog( 268 const std::string& sensorPath, const std::string& interface, 269 const std::string& alarmProperty, bool alarmValue) 270 { 271 std::map<std::string, std::string> ad; 272 273 auto type = getSensorType(sensorPath); 274 if (skipSensorType(type)) 275 { 276 return; 277 } 278 279 auto it = thresholdData.find(interface); 280 if (it == thresholdData.end()) 281 { 282 return; 283 } 284 285 auto properties = it->second.find(alarmProperty); 286 if (properties == it->second.end()) 287 { 288 log<level::INFO>( 289 std::format("Could not find {} in threshold alarms map", 290 alarmProperty) 291 .c_str()); 292 return; 293 } 294 295 ad.emplace("SENSOR_NAME", sensorPath); 296 ad.emplace("_PID", std::to_string(getpid())); 297 298 try 299 { 300 auto sensorValue = SDBusPlus::getProperty<double>( 301 bus, sensorPath, valueInterface, "Value"); 302 303 ad.emplace("SENSOR_VALUE", std::to_string(sensorValue)); 304 305 log<level::INFO>( 306 std::format("Threshold Event {} {} = {} (sensor value {})", 307 sensorPath, alarmProperty, alarmValue, sensorValue) 308 .c_str()); 309 } 310 catch (const DBusServiceError& e) 311 { 312 // If the sensor was just added, the Value interface for it may 313 // not be in the mapper yet. This could only happen if the sensor 314 // application was started up after this one and the value exceeded the 315 // threshold immediately. 316 log<level::INFO>(std::format("Threshold Event {} {} = {}", sensorPath, 317 alarmProperty, alarmValue) 318 .c_str()); 319 } 320 321 auto callout = getCallout(sensorPath); 322 if (!callout.empty()) 323 { 324 ad.emplace("CALLOUT_INVENTORY_PATH", callout); 325 } 326 327 auto errorData = properties->second.find(alarmValue); 328 329 // Add the base error name and the sensor type (like Temperature) to the 330 // error name that's in the thresholdData name to get something like 331 // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh 332 const auto& [name, status, severity] = errorData->second; 333 334 try 335 { 336 auto thresholdValue = 337 SDBusPlus::getProperty<double>(bus, sensorPath, interface, name); 338 339 ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue)); 340 341 log<level::INFO>( 342 std::format("Threshold Event {} {} = {} (threshold value {})", 343 sensorPath, alarmProperty, alarmValue, thresholdValue) 344 .c_str()); 345 } 346 catch (const DBusServiceError& e) 347 { 348 log<level::INFO>(std::format("Threshold Event {} {} = {}", sensorPath, 349 alarmProperty, alarmValue) 350 .c_str()); 351 } 352 353 type.front() = toupper(type.front()); 354 std::string errorName = errorNameBase + type + name + status; 355 356 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface, 357 "Create", errorName, convertForMessage(severity), ad); 358 } 359 360 std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath) 361 { 362 auto pos = sensorPath.find_last_of('/'); 363 if ((sensorPath.back() == '/') || (pos == std::string::npos)) 364 { 365 log<level::ERR>( 366 std::format("Cannot get sensor type from sensor path {}", 367 sensorPath) 368 .c_str()); 369 throw std::runtime_error("Invalid sensor path"); 370 } 371 372 sensorPath = sensorPath.substr(0, pos); 373 return sensorPath.substr(sensorPath.find_last_of('/') + 1); 374 } 375 376 bool ThresholdAlarmLogger::skipSensorType(const std::string& type) 377 { 378 return (type == "utilization"); 379 } 380 381 std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath) 382 { 383 const std::array<std::string, 2> assocTypes{"inventory", "chassis"}; 384 385 // Different implementations handle the association to the FRU 386 // differently: 387 // * phosphor-inventory-manager uses the 'inventory' association 388 // to point to the FRU. 389 // * dbus-sensors/entity-manager uses the 'chassis' association'. 390 // * For virtual sensors, no association. 391 392 for (const auto& assocType : assocTypes) 393 { 394 auto assocPath = sensorPath + "/" + assocType; 395 396 try 397 { 398 auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>( 399 bus, assocPath, assocInterface, "endpoints"); 400 401 if (!endpoints.empty()) 402 { 403 return endpoints[0]; 404 } 405 } 406 catch (const DBusServiceError& e) 407 { 408 // The association doesn't exist 409 continue; 410 } 411 } 412 413 return std::string{}; 414 } 415 416 void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn) 417 { 418 if (powerStateOn) 419 { 420 checkThresholds(); 421 } 422 } 423 424 void ThresholdAlarmLogger::checkThresholds() 425 { 426 std::vector<InterfaceKey> toErase; 427 428 for (const auto& [interfaceKey, alarmMap] : alarms) 429 { 430 for (const auto& [propertyName, alarmValue] : alarmMap) 431 { 432 if (alarmValue) 433 { 434 const auto& sensorPath = std::get<0>(interfaceKey); 435 const auto& interface = std::get<1>(interfaceKey); 436 std::string service; 437 438 try 439 { 440 // Check that the service that provides the alarm is still 441 // running, because if it died when the alarm was active 442 // there would be no indication of it unless we listened 443 // for NameOwnerChanged and tracked services, and this is 444 // easier. 445 service = SDBusPlus::getService(bus, sensorPath, interface); 446 } 447 catch (const DBusServiceError& e) 448 { 449 // No longer on D-Bus delete the alarm entry 450 toErase.emplace_back(sensorPath, interface); 451 } 452 453 if (!service.empty()) 454 { 455 createEventLog(sensorPath, interface, propertyName, 456 alarmValue); 457 } 458 } 459 } 460 } 461 462 for (const auto& e : toErase) 463 { 464 alarms.erase(e); 465 } 466 } 467 468 } // namespace sensor::monitor 469