1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "shutdown_alarm_monitor.hpp" 19 20 #include <fmt/format.h> 21 #include <unistd.h> 22 23 #include <phosphor-logging/log.hpp> 24 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 25 26 namespace sensor::monitor 27 { 28 using namespace phosphor::logging; 29 using namespace phosphor::fan::util; 30 using namespace phosphor::fan; 31 namespace fs = std::filesystem; 32 33 const std::map<ShutdownType, std::string> shutdownInterfaces{ 34 {ShutdownType::hard, "xyz.openbmc_project.Sensor.Threshold.HardShutdown"}, 35 {ShutdownType::soft, "xyz.openbmc_project.Sensor.Threshold.SoftShutdown"}}; 36 37 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmProperties{ 38 {ShutdownType::hard, 39 {{AlarmType::low, "HardShutdownAlarmLow"}, 40 {AlarmType::high, "HardShutdownAlarmHigh"}}}, 41 {ShutdownType::soft, 42 {{AlarmType::low, "SoftShutdownAlarmLow"}, 43 {AlarmType::high, "SoftShutdownAlarmHigh"}}}}; 44 45 const std::map<ShutdownType, std::chrono::milliseconds> shutdownDelays{ 46 {ShutdownType::hard, 47 std::chrono::milliseconds{SHUTDOWN_ALARM_HARD_SHUTDOWN_DELAY_MS}}, 48 {ShutdownType::soft, 49 std::chrono::milliseconds{SHUTDOWN_ALARM_SOFT_SHUTDOWN_DELAY_MS}}}; 50 51 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmEventLogs{ 52 {ShutdownType::hard, 53 {{AlarmType::high, 54 "xyz.openbmc_project.Sensor.Threshold.Error.HardShutdownAlarmHigh"}, 55 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 56 "HardShutdownAlarmLow"}}}, 57 {ShutdownType::soft, 58 {{AlarmType::high, 59 "xyz.openbmc_project.Sensor.Threshold.Error.SoftShutdownAlarmHigh"}, 60 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 61 "SoftShutdownAlarmLow"}}}}; 62 63 const std::map<ShutdownType, std::map<AlarmType, std::string>> 64 alarmClearEventLogs{ 65 {ShutdownType::hard, 66 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error." 67 "HardShutdownAlarmHighClear"}, 68 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 69 "HardShutdownAlarmLowClear"}}}, 70 {ShutdownType::soft, 71 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error." 72 "SoftShutdownAlarmHighClear"}, 73 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 74 "SoftShutdownAlarmLowClear"}}}}; 75 76 constexpr auto systemdService = "org.freedesktop.systemd1"; 77 constexpr auto systemdPath = "/org/freedesktop/systemd1"; 78 constexpr auto systemdMgrIface = "org.freedesktop.systemd1.Manager"; 79 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value"; 80 constexpr auto valueProperty = "Value"; 81 const auto loggingService = "xyz.openbmc_project.Logging"; 82 const auto loggingPath = "/xyz/openbmc_project/logging"; 83 const auto loggingCreateIface = "xyz.openbmc_project.Logging.Create"; 84 85 using namespace sdbusplus::bus::match; 86 87 ShutdownAlarmMonitor::ShutdownAlarmMonitor( 88 sdbusplus::bus_t& bus, sdeventplus::Event& event, 89 std::shared_ptr<PowerState> powerState) : 90 bus(bus), 91 event(event), _powerState(std::move(powerState)), 92 hardShutdownMatch(bus, 93 "type='signal',member='PropertiesChanged'," 94 "path_namespace='/xyz/openbmc_project/sensors'," 95 "arg0='" + 96 shutdownInterfaces.at(ShutdownType::hard) + "'", 97 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this, 98 std::placeholders::_1)), 99 softShutdownMatch(bus, 100 "type='signal',member='PropertiesChanged'," 101 "path_namespace='/xyz/openbmc_project/sensors'," 102 "arg0='" + 103 shutdownInterfaces.at(ShutdownType::soft) + "'", 104 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this, 105 std::placeholders::_1)) 106 { 107 _powerState->addCallback("shutdownMon", 108 std::bind(&ShutdownAlarmMonitor::powerStateChanged, 109 this, std::placeholders::_1)); 110 findAlarms(); 111 112 if (_powerState->isPowerOn()) 113 { 114 checkAlarms(); 115 116 // Get rid of any previous saved timestamps that don't 117 // apply anymore. 118 timestamps.prune(alarms); 119 } 120 else 121 { 122 timestamps.clear(); 123 } 124 } 125 126 void ShutdownAlarmMonitor::findAlarms() 127 { 128 // Find all shutdown threshold ifaces currently on D-Bus. 129 for (const auto& [shutdownType, interface] : shutdownInterfaces) 130 { 131 auto paths = SDBusPlus::getSubTreePathsRaw(bus, "/", interface, 0); 132 133 auto shutdownType2 = shutdownType; 134 135 std::for_each(paths.begin(), paths.end(), 136 [this, shutdownType2](const auto& path) { 137 alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::high}, 138 nullptr); 139 alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::low}, 140 nullptr); 141 }); 142 } 143 } 144 145 void ShutdownAlarmMonitor::checkAlarms() 146 { 147 for (auto& [alarmKey, timer] : alarms) 148 { 149 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 150 const auto& interface = shutdownInterfaces.at(shutdownType); 151 auto propertyName = alarmProperties.at(shutdownType).at(alarmType); 152 bool value; 153 154 try 155 { 156 value = SDBusPlus::getProperty<bool>(bus, sensorPath, interface, 157 propertyName); 158 } 159 catch (const DBusServiceError& e) 160 { 161 // The sensor isn't on D-Bus anymore 162 log<level::INFO>(fmt::format("No {} interface on {} anymore.", 163 interface, sensorPath) 164 .c_str()); 165 continue; 166 } 167 168 checkAlarm(value, alarmKey); 169 } 170 } 171 172 void ShutdownAlarmMonitor::propertiesChanged(sdbusplus::message_t& message) 173 { 174 std::map<std::string, std::variant<bool>> properties; 175 std::string interface; 176 177 if (!_powerState->isPowerOn()) 178 { 179 return; 180 } 181 182 message.read(interface, properties); 183 184 auto type = getShutdownType(interface); 185 if (!type) 186 { 187 return; 188 } 189 190 std::string sensorPath = message.get_path(); 191 192 const auto& lowAlarmName = alarmProperties.at(*type).at(AlarmType::low); 193 if (properties.count(lowAlarmName) > 0) 194 { 195 AlarmKey alarmKey{sensorPath, *type, AlarmType::low}; 196 auto alarm = alarms.find(alarmKey); 197 if (alarm == alarms.end()) 198 { 199 alarms.emplace(alarmKey, nullptr); 200 } 201 checkAlarm(std::get<bool>(properties.at(lowAlarmName)), alarmKey); 202 } 203 204 const auto& highAlarmName = alarmProperties.at(*type).at(AlarmType::high); 205 if (properties.count(highAlarmName) > 0) 206 { 207 AlarmKey alarmKey{sensorPath, *type, AlarmType::high}; 208 auto alarm = alarms.find(alarmKey); 209 if (alarm == alarms.end()) 210 { 211 alarms.emplace(alarmKey, nullptr); 212 } 213 checkAlarm(std::get<bool>(properties.at(highAlarmName)), alarmKey); 214 } 215 } 216 217 void ShutdownAlarmMonitor::checkAlarm(bool value, const AlarmKey& alarmKey) 218 { 219 auto alarm = alarms.find(alarmKey); 220 if (alarm == alarms.end()) 221 { 222 return; 223 } 224 225 // Start or stop the timer if necessary. 226 auto& timer = alarm->second; 227 if (value) 228 { 229 if (!timer) 230 { 231 startTimer(alarmKey); 232 } 233 } 234 else 235 { 236 if (timer) 237 { 238 stopTimer(alarmKey); 239 } 240 } 241 } 242 243 void ShutdownAlarmMonitor::startTimer(const AlarmKey& alarmKey) 244 { 245 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 246 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 247 std::chrono::milliseconds shutdownDelay{shutdownDelays.at(shutdownType)}; 248 std::optional<double> value; 249 250 auto alarm = alarms.find(alarmKey); 251 if (alarm == alarms.end()) 252 { 253 throw std::runtime_error("Couldn't find alarm inside startTimer"); 254 } 255 256 try 257 { 258 value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 259 valueProperty); 260 } 261 catch (const DBusServiceError& e) 262 { 263 // If the sensor was just added, the Value interface for it may 264 // not be in the mapper yet. This could only happen if the sensor 265 // application was started with power up and the value exceeded the 266 // threshold immediately. 267 } 268 269 createEventLog(alarmKey, true, value); 270 271 uint64_t now = std::chrono::duration_cast<std::chrono::milliseconds>( 272 std::chrono::system_clock::now().time_since_epoch()) 273 .count(); 274 275 // If there is a saved timestamp for this timer, then we were restarted 276 // while the timer was running. Calculate the remaining time to use 277 // for the timer. 278 auto previousStartTime = timestamps.get().find(alarmKey); 279 if (previousStartTime != timestamps.get().end()) 280 { 281 const uint64_t& original = previousStartTime->second; 282 283 log<level::INFO>(fmt::format("Found previously running {} timer " 284 "for {} with start time {}", 285 propertyName, sensorPath, original) 286 .c_str()); 287 288 // Sanity check it isn't total garbage. 289 if (now > original) 290 { 291 uint64_t remainingTime = 0; 292 auto elapsedTime = now - original; 293 294 if (elapsedTime < static_cast<uint64_t>(shutdownDelay.count())) 295 { 296 remainingTime = static_cast<uint64_t>(shutdownDelay.count()) - 297 elapsedTime; 298 } 299 300 shutdownDelay = std::chrono::milliseconds{remainingTime}; 301 } 302 else 303 { 304 log<level::WARNING>( 305 fmt::format( 306 "Restarting {} shutdown timer for {} for full " 307 "time because saved time {} is after current time {}", 308 propertyName, sensorPath, original, now) 309 .c_str()); 310 } 311 } 312 313 log<level::INFO>( 314 fmt::format("Starting {}ms {} shutdown timer due to sensor {} value {}", 315 shutdownDelay.count(), propertyName, sensorPath, *value) 316 .c_str()); 317 318 auto& timer = alarm->second; 319 320 timer = std::make_unique< 321 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 322 event, std::bind(&ShutdownAlarmMonitor::timerExpired, this, alarmKey)); 323 324 timer->restartOnce(shutdownDelay); 325 326 // Note that if this key is already in the timestamps map because 327 // the timer was already running the timestamp wil not be updated. 328 timestamps.add(alarmKey, now); 329 } 330 331 void ShutdownAlarmMonitor::stopTimer(const AlarmKey& alarmKey) 332 { 333 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 334 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 335 336 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 337 valueProperty); 338 339 auto alarm = alarms.find(alarmKey); 340 if (alarm == alarms.end()) 341 { 342 throw std::runtime_error("Couldn't find alarm inside stopTimer"); 343 } 344 345 createEventLog(alarmKey, false, value); 346 347 log<level::INFO>( 348 fmt::format("Stopping {} shutdown timer due to sensor {} value {}", 349 propertyName, sensorPath, value) 350 .c_str()); 351 352 auto& timer = alarm->second; 353 timer->setEnabled(false); 354 timer.reset(); 355 356 timestamps.erase(alarmKey); 357 } 358 359 void ShutdownAlarmMonitor::createBmcDump() const 360 { 361 try 362 { 363 util::SDBusPlus::callMethod( 364 "xyz.openbmc_project.Dump.Manager", "/xyz/openbmc_project/dump/bmc", 365 "xyz.openbmc_project.Dump.Create", "CreateDump", 366 std::vector< 367 std::pair<std::string, std::variant<std::string, uint64_t>>>()); 368 } 369 catch (const std::exception& e) 370 { 371 auto message = fmt::format( 372 "Caught exception while creating BMC dump: {}", e.what()); 373 374 log<level::ERR>(message.c_str()); 375 } 376 } 377 378 void ShutdownAlarmMonitor::timerExpired(const AlarmKey& alarmKey) 379 { 380 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 381 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 382 383 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 384 valueProperty); 385 386 log<level::ERR>( 387 fmt::format( 388 "The {} shutdown timer expired for sensor {}, shutting down", 389 propertyName, sensorPath) 390 .c_str()); 391 392 // Re-send the event log. If someone didn't want this it could be 393 // wrapped by a compile option. 394 createEventLog(alarmKey, true, value, true); 395 396 SDBusPlus::callMethod(systemdService, systemdPath, systemdMgrIface, 397 "StartUnit", "obmc-chassis-hard-poweroff@0.target", 398 "replace"); 399 400 timestamps.erase(alarmKey); 401 createBmcDump(); 402 } 403 404 void ShutdownAlarmMonitor::powerStateChanged(bool powerStateOn) 405 { 406 if (powerStateOn) 407 { 408 checkAlarms(); 409 } 410 else 411 { 412 timestamps.clear(); 413 414 // Cancel and delete all timers 415 std::for_each(alarms.begin(), alarms.end(), [](auto& alarm) { 416 auto& timer = alarm.second; 417 if (timer) 418 { 419 timer->setEnabled(false); 420 timer.reset(); 421 } 422 }); 423 } 424 } 425 426 void ShutdownAlarmMonitor::createEventLog( 427 const AlarmKey& alarmKey, bool alarmValue, 428 const std::optional<double>& sensorValue, bool isPowerOffError) 429 { 430 using namespace sdbusplus::xyz::openbmc_project::Logging::server; 431 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 432 std::map<std::string, std::string> ad{{"SENSOR_NAME", sensorPath}, 433 {"_PID", std::to_string(getpid())}}; 434 435 std::string errorName = 436 (alarmValue) ? alarmEventLogs.at(shutdownType).at(alarmType) 437 : alarmClearEventLogs.at(shutdownType).at(alarmType); 438 439 // severity = Critical if a power off 440 // severity = Error if alarm was asserted 441 // severity = Informational if alarm was deasserted 442 Entry::Level severity = Entry::Level::Error; 443 if (isPowerOffError) 444 { 445 severity = Entry::Level::Critical; 446 } 447 else if (!alarmValue) 448 { 449 severity = Entry::Level::Informational; 450 } 451 452 if (sensorValue) 453 { 454 ad.emplace("SENSOR_VALUE", std::to_string(*sensorValue)); 455 } 456 457 // If this is a power off, specify that it's a power 458 // fault and a system termination. This is used by some 459 // implementations for service reasons. 460 if (isPowerOffError) 461 { 462 ad.emplace("SEVERITY_DETAIL", "SYSTEM_TERM"); 463 } 464 465 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface, 466 "Create", errorName, convertForMessage(severity), ad); 467 } 468 469 std::optional<ShutdownType> 470 ShutdownAlarmMonitor::getShutdownType(const std::string& interface) const 471 { 472 auto it = std::find_if(shutdownInterfaces.begin(), shutdownInterfaces.end(), 473 [interface](const auto& a) { 474 return a.second == interface; 475 }); 476 477 if (it == shutdownInterfaces.end()) 478 { 479 return std::nullopt; 480 } 481 482 return it->first; 483 } 484 485 } // namespace sensor::monitor 486