1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "threshold_alarm_logger.hpp" 19 20 #include "sdbusplus.hpp" 21 22 #include <unistd.h> 23 24 #include <phosphor-logging/lg2.hpp> 25 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 26 27 namespace sensor::monitor 28 { 29 30 using namespace sdbusplus::xyz::openbmc_project::Logging::server; 31 using namespace phosphor::fan; 32 using namespace phosphor::fan::util; 33 34 const std::string warningInterface = 35 "xyz.openbmc_project.Sensor.Threshold.Warning"; 36 const std::string criticalInterface = 37 "xyz.openbmc_project.Sensor.Threshold.Critical"; 38 const std::string perfLossInterface = 39 "xyz.openbmc_project.Sensor.Threshold.PerformanceLoss"; 40 constexpr auto loggingService = "xyz.openbmc_project.Logging"; 41 constexpr auto loggingPath = "/xyz/openbmc_project/logging"; 42 constexpr auto loggingCreateIface = "xyz.openbmc_project.Logging.Create"; 43 constexpr auto errorNameBase = "xyz.openbmc_project.Sensor.Threshold.Error."; 44 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value"; 45 constexpr auto assocInterface = "xyz.openbmc_project.Association"; 46 47 const std::vector<std::string> thresholdIfaceNames{ 48 warningInterface, criticalInterface, perfLossInterface}; 49 50 using ErrorData = std::tuple<ErrorName, ErrorStatus, Entry::Level>; 51 52 /** 53 * Map of threshold interfaces and alarm properties and values to error data. 54 */ 55 const std::map<InterfaceName, std::map<PropertyName, std::map<bool, ErrorData>>> 56 thresholdData{ 57 58 {warningInterface, 59 {{"WarningAlarmHigh", 60 {{true, ErrorData{"WarningHigh", "", Entry::Level::Warning}}, 61 {false, 62 ErrorData{"WarningHigh", "Clear", Entry::Level::Informational}}}}, 63 {"WarningAlarmLow", 64 {{true, ErrorData{"WarningLow", "", Entry::Level::Warning}}, 65 {false, 66 ErrorData{"WarningLow", "Clear", Entry::Level::Informational}}}}}}, 67 68 {criticalInterface, 69 {{"CriticalAlarmHigh", 70 {{true, ErrorData{"CriticalHigh", "", Entry::Level::Critical}}, 71 {false, 72 ErrorData{"CriticalHigh", "Clear", Entry::Level::Informational}}}}, 73 {"CriticalAlarmLow", 74 {{true, ErrorData{"CriticalLow", "", Entry::Level::Critical}}, 75 {false, ErrorData{"CriticalLow", "Clear", 76 Entry::Level::Informational}}}}}}, 77 78 {perfLossInterface, 79 {{"PerfLossAlarmHigh", 80 {{true, ErrorData{"PerformanceLossHigh", "", Entry::Level::Warning}}, 81 {false, ErrorData{"PerformanceLossHigh", "Clear", 82 Entry::Level::Informational}}}}, 83 {"PerfLossAlarmLow", 84 {{true, ErrorData{"PerformanceLossLow", "", Entry::Level::Warning}}, 85 {false, ErrorData{"PerformanceLossLow", "Clear", 86 Entry::Level::Informational}}}}}}}; 87 ThresholdAlarmLogger(sdbusplus::bus_t & bus,sdeventplus::Event & event,std::shared_ptr<PowerState> powerState)88 ThresholdAlarmLogger::ThresholdAlarmLogger( 89 sdbusplus::bus_t& bus, sdeventplus::Event& event, 90 std::shared_ptr<PowerState> powerState) : 91 bus(bus), event(event), _powerState(std::move(powerState)), 92 warningMatch(bus, 93 "type='signal',member='PropertiesChanged'," 94 "path_namespace='/xyz/openbmc_project/sensors'," 95 "arg0='" + 96 warningInterface + "'", 97 std::bind(&ThresholdAlarmLogger::propertiesChanged, this, 98 std::placeholders::_1)), 99 criticalMatch(bus, 100 "type='signal',member='PropertiesChanged'," 101 "path_namespace='/xyz/openbmc_project/sensors'," 102 "arg0='" + 103 criticalInterface + "'", 104 std::bind(&ThresholdAlarmLogger::propertiesChanged, this, 105 std::placeholders::_1)), 106 perfLossMatch(bus, 107 "type='signal',member='PropertiesChanged'," 108 "path_namespace='/xyz/openbmc_project/sensors'," 109 "arg0='" + 110 perfLossInterface + "'", 111 std::bind(&ThresholdAlarmLogger::propertiesChanged, this, 112 std::placeholders::_1)), 113 ifacesRemovedMatch(bus, 114 "type='signal',member='InterfacesRemoved',arg0path=" 115 "'/xyz/openbmc_project/sensors/'", 116 std::bind(&ThresholdAlarmLogger::interfacesRemoved, this, 117 std::placeholders::_1)), 118 ifacesAddedMatch(bus, 119 "type='signal',member='InterfacesAdded',arg0path=" 120 "'/xyz/openbmc_project/sensors/'", 121 std::bind(&ThresholdAlarmLogger::interfacesAdded, this, 122 std::placeholders::_1)) 123 { 124 _powerState->addCallback("thresholdMon", 125 std::bind(&ThresholdAlarmLogger::powerStateChanged, 126 this, std::placeholders::_1)); 127 128 // check for any currently asserted threshold alarms 129 std::for_each( 130 thresholdData.begin(), thresholdData.end(), 131 [this](const auto& thresholdInterface) { 132 const auto& interface = thresholdInterface.first; 133 auto objects = 134 SDBusPlus::getSubTreeRaw(this->bus, "/", interface, 0); 135 std::for_each(objects.begin(), objects.end(), 136 [interface, this](const auto& object) { 137 const auto& path = object.first; 138 const auto& service = 139 object.second.begin()->first; 140 checkThresholds(interface, path, service); 141 }); 142 }); 143 } 144 propertiesChanged(sdbusplus::message_t & msg)145 void ThresholdAlarmLogger::propertiesChanged(sdbusplus::message_t& msg) 146 { 147 std::map<std::string, std::variant<bool>> properties; 148 std::string sensorPath = msg.get_path(); 149 std::string interface; 150 151 msg.read(interface, properties); 152 153 checkProperties(sensorPath, interface, properties); 154 } 155 interfacesRemoved(sdbusplus::message_t & msg)156 void ThresholdAlarmLogger::interfacesRemoved(sdbusplus::message_t& msg) 157 { 158 sdbusplus::message::object_path path; 159 std::vector<std::string> interfaces; 160 161 msg.read(path, interfaces); 162 163 for (const auto& interface : interfaces) 164 { 165 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(), 166 interface) != thresholdIfaceNames.end()) 167 { 168 alarms.erase(InterfaceKey{path, interface}); 169 } 170 } 171 } 172 interfacesAdded(sdbusplus::message_t & msg)173 void ThresholdAlarmLogger::interfacesAdded(sdbusplus::message_t& msg) 174 { 175 sdbusplus::message::object_path path; 176 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 177 178 msg.read(path, interfaces); 179 180 for (const auto& [interface, properties] : interfaces) 181 { 182 if (std::find(thresholdIfaceNames.begin(), thresholdIfaceNames.end(), 183 interface) != thresholdIfaceNames.end()) 184 { 185 checkProperties(path, interface, properties); 186 } 187 } 188 } 189 checkProperties(const std::string & sensorPath,const std::string & interface,const std::map<std::string,std::variant<bool>> & properties)190 void ThresholdAlarmLogger::checkProperties( 191 const std::string& sensorPath, const std::string& interface, 192 const std::map<std::string, std::variant<bool>>& properties) 193 { 194 auto alarmProperties = thresholdData.find(interface); 195 if (alarmProperties == thresholdData.end()) 196 { 197 return; 198 } 199 200 for (const auto& [propertyName, propertyValue] : properties) 201 { 202 if (alarmProperties->second.find(propertyName) != 203 alarmProperties->second.end()) 204 { 205 // If this is the first time we've seen this alarm, then 206 // assume it was off before so it doesn't create an event 207 // log for a value of false. 208 209 InterfaceKey key{sensorPath, interface}; 210 if (alarms.find(key) == alarms.end()) 211 { 212 alarms[key][propertyName] = false; 213 } 214 215 // Check if the value changed from what was there before. 216 auto alarmValue = std::get<bool>(propertyValue); 217 if (alarmValue != alarms[key][propertyName]) 218 { 219 alarms[key][propertyName] = alarmValue; 220 #ifndef SKIP_POWER_CHECKING 221 if (_powerState->isPowerOn()) 222 #endif 223 { 224 createEventLog(sensorPath, interface, propertyName, 225 alarmValue); 226 } 227 } 228 } 229 } 230 } 231 checkThresholds(const std::string & interface,const std::string & sensorPath,const std::string & service)232 void ThresholdAlarmLogger::checkThresholds(const std::string& interface, 233 const std::string& sensorPath, 234 const std::string& service) 235 { 236 auto properties = thresholdData.find(interface); 237 if (properties == thresholdData.end()) 238 { 239 return; 240 } 241 242 for (const auto& [property, unused] : properties->second) 243 { 244 try 245 { 246 auto alarmValue = SDBusPlus::getProperty<bool>( 247 bus, service, sensorPath, interface, property); 248 alarms[InterfaceKey(sensorPath, interface)][property] = alarmValue; 249 250 // This is just for checking alarms on startup, 251 // so only look for active alarms. 252 #ifdef SKIP_POWER_CHECKING 253 if (alarmValue) 254 #else 255 if (alarmValue && _powerState->isPowerOn()) 256 #endif 257 { 258 createEventLog(sensorPath, interface, property, alarmValue); 259 } 260 } 261 catch (const sdbusplus::exception_t& e) 262 { 263 // Sensor daemons that get their direction from entity manager 264 // may only be putting either the high alarm or low alarm on 265 // D-Bus, not both. 266 continue; 267 } 268 } 269 } 270 createEventLog(const std::string & sensorPath,const std::string & interface,const std::string & alarmProperty,bool alarmValue)271 void ThresholdAlarmLogger::createEventLog( 272 const std::string& sensorPath, const std::string& interface, 273 const std::string& alarmProperty, bool alarmValue) 274 { 275 std::map<std::string, std::string> ad; 276 277 auto type = getSensorType(sensorPath); 278 if (skipSensorType(type)) 279 { 280 return; 281 } 282 283 auto it = thresholdData.find(interface); 284 if (it == thresholdData.end()) 285 { 286 return; 287 } 288 289 auto properties = it->second.find(alarmProperty); 290 if (properties == it->second.end()) 291 { 292 lg2::info("Could not find {ALARM_PROPERTY} in threshold alarms map", 293 "ALARM_PROPERTY", alarmProperty); 294 return; 295 } 296 297 ad.emplace("SENSOR_NAME", sensorPath); 298 ad.emplace("_PID", std::to_string(getpid())); 299 300 try 301 { 302 auto sensorValue = SDBusPlus::getProperty<double>( 303 bus, sensorPath, valueInterface, "Value"); 304 305 ad.emplace("SENSOR_VALUE", std::to_string(sensorValue)); 306 307 lg2::info( 308 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (sensor value {SENSOR_VALUE})", 309 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty, 310 "ALARM_VALUE", alarmValue, "SENSOR_VALUE", sensorValue); 311 } 312 catch (const DBusServiceError& e) 313 { 314 // If the sensor was just added, the Value interface for it may 315 // not be in the mapper yet. This could only happen if the sensor 316 // application was started up after this one and the value exceeded the 317 // threshold immediately. 318 lg2::info( 319 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}", 320 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty, 321 "ALARM_VALUE", alarmValue); 322 } 323 324 auto callout = getCallout(sensorPath); 325 if (!callout.empty()) 326 { 327 ad.emplace("CALLOUT_INVENTORY_PATH", callout); 328 } 329 330 auto errorData = properties->second.find(alarmValue); 331 332 // Add the base error name and the sensor type (like Temperature) to the 333 // error name that's in the thresholdData name to get something like 334 // xyz.openbmc_project.Sensor.Threshold.Error.TemperatureWarningHigh 335 const auto& [name, status, severity] = errorData->second; 336 337 try 338 { 339 auto thresholdValue = 340 SDBusPlus::getProperty<double>(bus, sensorPath, interface, name); 341 342 ad.emplace("THRESHOLD_VALUE", std::to_string(thresholdValue)); 343 344 lg2::info( 345 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE} (threshold value {THRESHOLD_VALUE})", 346 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty, 347 "ALARM_VALUE", alarmValue, "THRESHOLD_VALUE", thresholdValue); 348 } 349 catch (const DBusServiceError& e) 350 { 351 lg2::info( 352 "Threshold Event {SENSOR_PATH} {ALARM_PROPERTY} = {ALARM_VALUE}", 353 "SENSOR_PATH", sensorPath, "ALARM_PROPERTY", alarmProperty, 354 "ALARM_VALUE", alarmValue); 355 } 356 357 type.front() = toupper(type.front()); 358 std::string errorName = errorNameBase + type + name + status; 359 if (LOG_SENSOR_NAME_ON_ERROR != 0) 360 { 361 errorName += " on sensor " + getSensorName(sensorPath); 362 } 363 364 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface, 365 "Create", errorName, convertForMessage(severity), ad); 366 } 367 getSensorName(const std::string & sensorPath)368 std::string ThresholdAlarmLogger::getSensorName(const std::string& sensorPath) 369 { 370 auto pos = sensorPath.find_last_of('/'); 371 if ((sensorPath.back() == '/') || (pos == std::string::npos)) 372 { 373 lg2::error("Cannot get sensor name from sensor path {SENSOR_PATH}", 374 "SENSOR_PATH", sensorPath); 375 return "unknown_sensor"; 376 } 377 378 return sensorPath.substr(pos + 1); 379 } 380 getSensorType(std::string sensorPath)381 std::string ThresholdAlarmLogger::getSensorType(std::string sensorPath) 382 { 383 auto pos = sensorPath.find_last_of('/'); 384 if ((sensorPath.back() == '/') || (pos == std::string::npos)) 385 { 386 lg2::error("Cannot get sensor type from sensor path {SENSOR_PATH}", 387 "SENSOR_PATH", sensorPath); 388 throw std::runtime_error("Invalid sensor path"); 389 } 390 391 sensorPath = sensorPath.substr(0, pos); 392 return sensorPath.substr(sensorPath.find_last_of('/') + 1); 393 } 394 skipSensorType(const std::string & type)395 bool ThresholdAlarmLogger::skipSensorType(const std::string& type) 396 { 397 return (type == "utilization"); 398 } 399 getCallout(const std::string & sensorPath)400 std::string ThresholdAlarmLogger::getCallout(const std::string& sensorPath) 401 { 402 const std::array<std::string, 2> assocTypes{"inventory", "chassis"}; 403 404 // Different implementations handle the association to the FRU 405 // differently: 406 // * phosphor-inventory-manager uses the 'inventory' association 407 // to point to the FRU. 408 // * dbus-sensors/entity-manager uses the 'chassis' association'. 409 // * For virtual sensors, no association. 410 411 for (const auto& assocType : assocTypes) 412 { 413 auto assocPath = sensorPath + "/" + assocType; 414 415 try 416 { 417 auto endpoints = SDBusPlus::getProperty<std::vector<std::string>>( 418 bus, assocPath, assocInterface, "endpoints"); 419 420 if (!endpoints.empty()) 421 { 422 return endpoints[0]; 423 } 424 } 425 catch (const DBusServiceError& e) 426 { 427 // The association doesn't exist 428 continue; 429 } 430 } 431 432 return std::string{}; 433 } 434 powerStateChanged(bool powerStateOn)435 void ThresholdAlarmLogger::powerStateChanged(bool powerStateOn) 436 { 437 if (powerStateOn) 438 { 439 checkThresholds(); 440 } 441 } 442 checkThresholds()443 void ThresholdAlarmLogger::checkThresholds() 444 { 445 std::vector<InterfaceKey> toErase; 446 447 for (const auto& [interfaceKey, alarmMap] : alarms) 448 { 449 for (const auto& [propertyName, alarmValue] : alarmMap) 450 { 451 if (alarmValue) 452 { 453 const auto& sensorPath = std::get<0>(interfaceKey); 454 const auto& interface = std::get<1>(interfaceKey); 455 std::string service; 456 457 try 458 { 459 // Check that the service that provides the alarm is still 460 // running, because if it died when the alarm was active 461 // there would be no indication of it unless we listened 462 // for NameOwnerChanged and tracked services, and this is 463 // easier. 464 service = SDBusPlus::getService(bus, sensorPath, interface); 465 } 466 catch (const DBusServiceError& e) 467 { 468 // No longer on D-Bus delete the alarm entry 469 toErase.emplace_back(sensorPath, interface); 470 } 471 472 if (!service.empty()) 473 { 474 createEventLog(sensorPath, interface, propertyName, 475 alarmValue); 476 } 477 } 478 } 479 } 480 481 for (const auto& e : toErase) 482 { 483 alarms.erase(e); 484 } 485 } 486 487 } // namespace sensor::monitor 488