1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "shutdown_alarm_monitor.hpp" 19 20 #include <fmt/format.h> 21 #include <unistd.h> 22 23 #include <phosphor-logging/log.hpp> 24 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 25 26 namespace sensor::monitor 27 { 28 using namespace phosphor::logging; 29 using namespace phosphor::fan::util; 30 using namespace phosphor::fan; 31 namespace fs = std::filesystem; 32 33 const std::map<ShutdownType, std::string> shutdownInterfaces{ 34 {ShutdownType::hard, "xyz.openbmc_project.Sensor.Threshold.HardShutdown"}, 35 {ShutdownType::soft, "xyz.openbmc_project.Sensor.Threshold.SoftShutdown"}}; 36 37 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmProperties{ 38 {ShutdownType::hard, 39 {{AlarmType::low, "HardShutdownAlarmLow"}, 40 {AlarmType::high, "HardShutdownAlarmHigh"}}}, 41 {ShutdownType::soft, 42 {{AlarmType::low, "SoftShutdownAlarmLow"}, 43 {AlarmType::high, "SoftShutdownAlarmHigh"}}}}; 44 45 const std::map<ShutdownType, std::chrono::milliseconds> shutdownDelays{ 46 {ShutdownType::hard, 47 std::chrono::milliseconds{SHUTDOWN_ALARM_HARD_SHUTDOWN_DELAY_MS}}, 48 {ShutdownType::soft, 49 std::chrono::milliseconds{SHUTDOWN_ALARM_SOFT_SHUTDOWN_DELAY_MS}}}; 50 51 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmEventLogs{ 52 {ShutdownType::hard, 53 {{AlarmType::high, 54 "xyz.openbmc_project.Sensor.Threshold.Error.HardShutdownAlarmHigh"}, 55 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 56 "HardShutdownAlarmLow"}}}, 57 {ShutdownType::soft, 58 {{AlarmType::high, 59 "xyz.openbmc_project.Sensor.Threshold.Error.SoftShutdownAlarmHigh"}, 60 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 61 "SoftShutdownAlarmLow"}}}}; 62 63 const std::map<ShutdownType, std::map<AlarmType, std::string>> 64 alarmClearEventLogs{ 65 {ShutdownType::hard, 66 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error." 67 "HardShutdownAlarmHighClear"}, 68 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 69 "HardShutdownAlarmLowClear"}}}, 70 {ShutdownType::soft, 71 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error." 72 "SoftShutdownAlarmHighClear"}, 73 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 74 "SoftShutdownAlarmLowClear"}}}}; 75 76 constexpr auto systemdService = "org.freedesktop.systemd1"; 77 constexpr auto systemdPath = "/org/freedesktop/systemd1"; 78 constexpr auto systemdMgrIface = "org.freedesktop.systemd1.Manager"; 79 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value"; 80 constexpr auto valueProperty = "Value"; 81 const auto loggingService = "xyz.openbmc_project.Logging"; 82 const auto loggingPath = "/xyz/openbmc_project/logging"; 83 const auto loggingCreateIface = "xyz.openbmc_project.Logging.Create"; 84 85 using namespace sdbusplus::bus::match; 86 87 ShutdownAlarmMonitor::ShutdownAlarmMonitor( 88 sdbusplus::bus::bus& bus, sdeventplus::Event& event, 89 std::shared_ptr<PowerState> powerState) : 90 bus(bus), 91 event(event), _powerState(std::move(powerState)), 92 hardShutdownMatch(bus, 93 "type='signal',member='PropertiesChanged'," 94 "path_namespace='/xyz/openbmc_project/sensors'," 95 "arg0='" + 96 shutdownInterfaces.at(ShutdownType::soft) + "'", 97 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this, 98 std::placeholders::_1)), 99 softShutdownMatch(bus, 100 "type='signal',member='PropertiesChanged'," 101 "path_namespace='/xyz/openbmc_project/sensors'," 102 "arg0='" + 103 shutdownInterfaces.at(ShutdownType::hard) + "'", 104 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this, 105 std::placeholders::_1)) 106 { 107 _powerState->addCallback("shutdownMon", 108 std::bind(&ShutdownAlarmMonitor::powerStateChanged, 109 this, std::placeholders::_1)); 110 findAlarms(); 111 112 if (_powerState->isPowerOn()) 113 { 114 checkAlarms(); 115 116 // Get rid of any previous saved timestamps that don't 117 // apply anymore. 118 timestamps.prune(alarms); 119 } 120 else 121 { 122 timestamps.clear(); 123 } 124 } 125 126 void ShutdownAlarmMonitor::findAlarms() 127 { 128 // Find all shutdown threshold ifaces currently on D-Bus. 129 for (const auto& [shutdownType, interface] : shutdownInterfaces) 130 { 131 auto paths = SDBusPlus::getSubTreePathsRaw(bus, "/", interface, 0); 132 133 std::for_each( 134 paths.begin(), paths.end(), [this, shutdownType](const auto& path) { 135 alarms.emplace(AlarmKey{path, shutdownType, AlarmType::high}, 136 nullptr); 137 alarms.emplace(AlarmKey{path, shutdownType, AlarmType::low}, 138 nullptr); 139 }); 140 } 141 } 142 143 void ShutdownAlarmMonitor::checkAlarms() 144 { 145 for (auto& [alarmKey, timer] : alarms) 146 { 147 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 148 const auto& interface = shutdownInterfaces.at(shutdownType); 149 auto propertyName = alarmProperties.at(shutdownType).at(alarmType); 150 bool value; 151 152 try 153 { 154 value = SDBusPlus::getProperty<bool>(bus, sensorPath, interface, 155 propertyName); 156 } 157 catch (const DBusServiceError& e) 158 { 159 // The sensor isn't on D-Bus anymore 160 log<level::INFO>(fmt::format("No {} interface on {} anymore.", 161 interface, sensorPath) 162 .c_str()); 163 continue; 164 } 165 166 checkAlarm(value, alarmKey); 167 } 168 } 169 170 void ShutdownAlarmMonitor::propertiesChanged( 171 sdbusplus::message::message& message) 172 { 173 std::map<std::string, std::variant<bool>> properties; 174 std::string interface; 175 176 if (!_powerState->isPowerOn()) 177 { 178 return; 179 } 180 181 message.read(interface, properties); 182 183 auto type = getShutdownType(interface); 184 if (!type) 185 { 186 return; 187 } 188 189 std::string sensorPath = message.get_path(); 190 191 const auto& lowAlarmName = alarmProperties.at(*type).at(AlarmType::low); 192 if (properties.count(lowAlarmName) > 0) 193 { 194 AlarmKey alarmKey{sensorPath, *type, AlarmType::low}; 195 auto alarm = alarms.find(alarmKey); 196 if (alarm == alarms.end()) 197 { 198 alarms.emplace(alarmKey, nullptr); 199 } 200 checkAlarm(std::get<bool>(properties.at(lowAlarmName)), alarmKey); 201 } 202 203 const auto& highAlarmName = alarmProperties.at(*type).at(AlarmType::high); 204 if (properties.count(highAlarmName) > 0) 205 { 206 AlarmKey alarmKey{sensorPath, *type, AlarmType::high}; 207 auto alarm = alarms.find(alarmKey); 208 if (alarm == alarms.end()) 209 { 210 alarms.emplace(alarmKey, nullptr); 211 } 212 checkAlarm(std::get<bool>(properties.at(highAlarmName)), alarmKey); 213 } 214 } 215 216 void ShutdownAlarmMonitor::checkAlarm(bool value, const AlarmKey& alarmKey) 217 { 218 auto alarm = alarms.find(alarmKey); 219 if (alarm == alarms.end()) 220 { 221 return; 222 } 223 224 // Start or stop the timer if necessary. 225 auto& timer = alarm->second; 226 if (value) 227 { 228 if (!timer) 229 { 230 startTimer(alarmKey); 231 } 232 } 233 else 234 { 235 if (timer) 236 { 237 stopTimer(alarmKey); 238 } 239 } 240 } 241 242 void ShutdownAlarmMonitor::startTimer(const AlarmKey& alarmKey) 243 { 244 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 245 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 246 std::chrono::milliseconds shutdownDelay{shutdownDelays.at(shutdownType)}; 247 std::optional<double> value; 248 249 auto alarm = alarms.find(alarmKey); 250 if (alarm == alarms.end()) 251 { 252 throw std::runtime_error("Couldn't find alarm inside startTimer"); 253 } 254 255 try 256 { 257 value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 258 valueProperty); 259 } 260 catch (const DBusServiceError& e) 261 { 262 // If the sensor was just added, the Value interface for it may 263 // not be in the mapper yet. This could only happen if the sensor 264 // application was started with power up and the value exceeded the 265 // threshold immediately. 266 } 267 268 createEventLog(alarmKey, true, value); 269 270 uint64_t now = std::chrono::duration_cast<std::chrono::milliseconds>( 271 std::chrono::system_clock::now().time_since_epoch()) 272 .count(); 273 274 // If there is a saved timestamp for this timer, then we were restarted 275 // while the timer was running. Calculate the remaining time to use 276 // for the timer. 277 auto previousStartTime = timestamps.get().find(alarmKey); 278 if (previousStartTime != timestamps.get().end()) 279 { 280 const uint64_t& original = previousStartTime->second; 281 282 log<level::INFO>(fmt::format("Found previously running {} timer " 283 "for {} with start time {}", 284 propertyName, sensorPath, original) 285 .c_str()); 286 287 // Sanity check it isn't total garbage. 288 if (now > original) 289 { 290 uint64_t remainingTime = 0; 291 auto elapsedTime = now - original; 292 293 if (elapsedTime < static_cast<uint64_t>(shutdownDelay.count())) 294 { 295 remainingTime = 296 static_cast<uint64_t>(shutdownDelay.count()) - elapsedTime; 297 } 298 299 shutdownDelay = std::chrono::milliseconds{remainingTime}; 300 } 301 else 302 { 303 log<level::WARNING>( 304 fmt::format( 305 "Restarting {} shutdown timer for {} for full " 306 "time because saved time {} is after current time {}", 307 propertyName, original, now) 308 .c_str()); 309 } 310 } 311 312 log<level::INFO>( 313 fmt::format("Starting {}ms {} shutdown timer due to sensor {} value {}", 314 shutdownDelay.count(), propertyName, sensorPath, *value) 315 .c_str()); 316 317 auto& timer = alarm->second; 318 319 timer = std::make_unique< 320 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 321 event, std::bind(&ShutdownAlarmMonitor::timerExpired, this, alarmKey)); 322 323 timer->restartOnce(shutdownDelay); 324 325 // Note that if this key is already in the timestamps map because 326 // the timer was already running the timestamp wil not be updated. 327 timestamps.add(alarmKey, now); 328 } 329 330 void ShutdownAlarmMonitor::stopTimer(const AlarmKey& alarmKey) 331 { 332 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 333 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 334 335 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 336 valueProperty); 337 338 auto alarm = alarms.find(alarmKey); 339 if (alarm == alarms.end()) 340 { 341 throw std::runtime_error("Couldn't find alarm inside stopTimer"); 342 } 343 344 createEventLog(alarmKey, false, value); 345 346 log<level::INFO>( 347 fmt::format("Stopping {} shutdown timer due to sensor {} value {}", 348 propertyName, sensorPath, value) 349 .c_str()); 350 351 auto& timer = alarm->second; 352 timer->setEnabled(false); 353 timer.reset(); 354 355 timestamps.erase(alarmKey); 356 } 357 358 void ShutdownAlarmMonitor::timerExpired(const AlarmKey& alarmKey) 359 { 360 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 361 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 362 363 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 364 valueProperty); 365 366 log<level::ERR>( 367 fmt::format( 368 "The {} shutdown timer expired for sensor {}, shutting down", 369 propertyName, sensorPath) 370 .c_str()); 371 372 // Re-send the event log. If someone didn't want this it could be 373 // wrapped by a compile option. 374 createEventLog(alarmKey, true, value, true); 375 376 SDBusPlus::callMethod(systemdService, systemdPath, systemdMgrIface, 377 "StartUnit", "obmc-chassis-hard-poweroff@0.target", 378 "replace"); 379 380 timestamps.erase(alarmKey); 381 } 382 383 void ShutdownAlarmMonitor::powerStateChanged(bool powerStateOn) 384 { 385 if (powerStateOn) 386 { 387 checkAlarms(); 388 } 389 else 390 { 391 timestamps.clear(); 392 393 // Cancel and delete all timers 394 std::for_each(alarms.begin(), alarms.end(), [](auto& alarm) { 395 auto& timer = alarm.second; 396 if (timer) 397 { 398 timer->setEnabled(false); 399 timer.reset(); 400 } 401 }); 402 } 403 } 404 405 void ShutdownAlarmMonitor::createEventLog( 406 const AlarmKey& alarmKey, bool alarmValue, 407 const std::optional<double>& sensorValue, bool isPowerOffError) 408 { 409 using namespace sdbusplus::xyz::openbmc_project::Logging::server; 410 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 411 std::map<std::string, std::string> ad{{"SENSOR_NAME", sensorPath}, 412 {"_PID", std::to_string(getpid())}}; 413 414 std::string errorName = 415 (alarmValue) ? alarmEventLogs.at(shutdownType).at(alarmType) 416 : alarmClearEventLogs.at(shutdownType).at(alarmType); 417 418 // severity = Critical if a power off 419 // severity = Error if alarm was asserted 420 // severity = Informational if alarm was deasserted 421 Entry::Level severity = Entry::Level::Error; 422 if (isPowerOffError) 423 { 424 severity = Entry::Level::Critical; 425 } 426 else if (!alarmValue) 427 { 428 severity = Entry::Level::Informational; 429 } 430 431 if (sensorValue) 432 { 433 ad.emplace("SENSOR_VALUE", std::to_string(*sensorValue)); 434 } 435 436 // If this is a power off, specify that it's a power 437 // fault and a system termination. This is used by some 438 // implementations for service reasons. 439 if (isPowerOffError) 440 { 441 ad.emplace("POWER_THERMAL_CRITICAL_FAULT", "TRUE"); 442 ad.emplace("SEVERITY_DETAIL", "SYSTEM_TERM"); 443 } 444 445 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface, 446 "Create", errorName, convertForMessage(severity), ad); 447 } 448 449 std::optional<ShutdownType> 450 ShutdownAlarmMonitor::getShutdownType(const std::string& interface) const 451 { 452 auto it = std::find_if( 453 shutdownInterfaces.begin(), shutdownInterfaces.end(), 454 [interface](const auto& a) { return a.second == interface; }); 455 456 if (it == shutdownInterfaces.end()) 457 { 458 return std::nullopt; 459 } 460 461 return it->first; 462 } 463 464 } // namespace sensor::monitor 465