1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "shutdown_alarm_monitor.hpp" 19 20 #include <fmt/format.h> 21 #include <unistd.h> 22 23 #include <phosphor-logging/log.hpp> 24 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 25 26 namespace sensor::monitor 27 { 28 using namespace phosphor::logging; 29 using namespace phosphor::fan::util; 30 using namespace phosphor::fan; 31 namespace fs = std::filesystem; 32 33 const std::map<ShutdownType, std::string> shutdownInterfaces{ 34 {ShutdownType::hard, "xyz.openbmc_project.Sensor.Threshold.HardShutdown"}, 35 {ShutdownType::soft, "xyz.openbmc_project.Sensor.Threshold.SoftShutdown"}}; 36 37 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmProperties{ 38 {ShutdownType::hard, 39 {{AlarmType::low, "HardShutdownAlarmLow"}, 40 {AlarmType::high, "HardShutdownAlarmHigh"}}}, 41 {ShutdownType::soft, 42 {{AlarmType::low, "SoftShutdownAlarmLow"}, 43 {AlarmType::high, "SoftShutdownAlarmHigh"}}}}; 44 45 const std::map<ShutdownType, std::chrono::milliseconds> shutdownDelays{ 46 {ShutdownType::hard, 47 std::chrono::milliseconds{SHUTDOWN_ALARM_HARD_SHUTDOWN_DELAY_MS}}, 48 {ShutdownType::soft, 49 std::chrono::milliseconds{SHUTDOWN_ALARM_SOFT_SHUTDOWN_DELAY_MS}}}; 50 51 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmEventLogs{ 52 {ShutdownType::hard, 53 {{AlarmType::high, 54 "xyz.openbmc_project.Sensor.Threshold.Error.HardShutdownAlarmHigh"}, 55 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 56 "HardShutdownAlarmLow"}}}, 57 {ShutdownType::soft, 58 {{AlarmType::high, 59 "xyz.openbmc_project.Sensor.Threshold.Error.SoftShutdownAlarmHigh"}, 60 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 61 "SoftShutdownAlarmLow"}}}}; 62 63 const std::map<ShutdownType, std::map<AlarmType, std::string>> 64 alarmClearEventLogs{ 65 {ShutdownType::hard, 66 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error." 67 "HardShutdownAlarmHighClear"}, 68 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 69 "HardShutdownAlarmLowClear"}}}, 70 {ShutdownType::soft, 71 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error." 72 "SoftShutdownAlarmHighClear"}, 73 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 74 "SoftShutdownAlarmLowClear"}}}}; 75 76 constexpr auto systemdService = "org.freedesktop.systemd1"; 77 constexpr auto systemdPath = "/org/freedesktop/systemd1"; 78 constexpr auto systemdMgrIface = "org.freedesktop.systemd1.Manager"; 79 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value"; 80 constexpr auto valueProperty = "Value"; 81 const auto loggingService = "xyz.openbmc_project.Logging"; 82 const auto loggingPath = "/xyz/openbmc_project/logging"; 83 const auto loggingCreateIface = "xyz.openbmc_project.Logging.Create"; 84 85 using namespace sdbusplus::bus::match; 86 87 ShutdownAlarmMonitor::ShutdownAlarmMonitor( 88 sdbusplus::bus_t& bus, sdeventplus::Event& event, 89 std::shared_ptr<PowerState> powerState) : 90 bus(bus), 91 event(event), _powerState(std::move(powerState)), 92 hardShutdownMatch(bus, 93 "type='signal',member='PropertiesChanged'," 94 "path_namespace='/xyz/openbmc_project/sensors'," 95 "arg0='" + 96 shutdownInterfaces.at(ShutdownType::hard) + "'", 97 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this, 98 std::placeholders::_1)), 99 softShutdownMatch(bus, 100 "type='signal',member='PropertiesChanged'," 101 "path_namespace='/xyz/openbmc_project/sensors'," 102 "arg0='" + 103 shutdownInterfaces.at(ShutdownType::soft) + "'", 104 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this, 105 std::placeholders::_1)) 106 { 107 _powerState->addCallback("shutdownMon", 108 std::bind(&ShutdownAlarmMonitor::powerStateChanged, 109 this, std::placeholders::_1)); 110 findAlarms(); 111 112 if (_powerState->isPowerOn()) 113 { 114 checkAlarms(); 115 116 // Get rid of any previous saved timestamps that don't 117 // apply anymore. 118 timestamps.prune(alarms); 119 } 120 else 121 { 122 timestamps.clear(); 123 } 124 } 125 126 void ShutdownAlarmMonitor::findAlarms() 127 { 128 // Find all shutdown threshold ifaces currently on D-Bus. 129 for (const auto& [shutdownType, interface] : shutdownInterfaces) 130 { 131 auto paths = SDBusPlus::getSubTreePathsRaw(bus, "/", interface, 0); 132 133 auto shutdownType2 = shutdownType; 134 135 std::for_each( 136 paths.begin(), paths.end(), 137 [this, shutdownType2](const auto& path) { 138 alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::high}, 139 nullptr); 140 alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::low}, 141 nullptr); 142 }); 143 } 144 } 145 146 void ShutdownAlarmMonitor::checkAlarms() 147 { 148 for (auto& [alarmKey, timer] : alarms) 149 { 150 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 151 const auto& interface = shutdownInterfaces.at(shutdownType); 152 auto propertyName = alarmProperties.at(shutdownType).at(alarmType); 153 bool value; 154 155 try 156 { 157 value = SDBusPlus::getProperty<bool>(bus, sensorPath, interface, 158 propertyName); 159 } 160 catch (const DBusServiceError& e) 161 { 162 // The sensor isn't on D-Bus anymore 163 log<level::INFO>(fmt::format("No {} interface on {} anymore.", 164 interface, sensorPath) 165 .c_str()); 166 continue; 167 } 168 169 checkAlarm(value, alarmKey); 170 } 171 } 172 173 void ShutdownAlarmMonitor::propertiesChanged(sdbusplus::message_t& message) 174 { 175 std::map<std::string, std::variant<bool>> properties; 176 std::string interface; 177 178 if (!_powerState->isPowerOn()) 179 { 180 return; 181 } 182 183 message.read(interface, properties); 184 185 auto type = getShutdownType(interface); 186 if (!type) 187 { 188 return; 189 } 190 191 std::string sensorPath = message.get_path(); 192 193 const auto& lowAlarmName = alarmProperties.at(*type).at(AlarmType::low); 194 if (properties.count(lowAlarmName) > 0) 195 { 196 AlarmKey alarmKey{sensorPath, *type, AlarmType::low}; 197 auto alarm = alarms.find(alarmKey); 198 if (alarm == alarms.end()) 199 { 200 alarms.emplace(alarmKey, nullptr); 201 } 202 checkAlarm(std::get<bool>(properties.at(lowAlarmName)), alarmKey); 203 } 204 205 const auto& highAlarmName = alarmProperties.at(*type).at(AlarmType::high); 206 if (properties.count(highAlarmName) > 0) 207 { 208 AlarmKey alarmKey{sensorPath, *type, AlarmType::high}; 209 auto alarm = alarms.find(alarmKey); 210 if (alarm == alarms.end()) 211 { 212 alarms.emplace(alarmKey, nullptr); 213 } 214 checkAlarm(std::get<bool>(properties.at(highAlarmName)), alarmKey); 215 } 216 } 217 218 void ShutdownAlarmMonitor::checkAlarm(bool value, const AlarmKey& alarmKey) 219 { 220 auto alarm = alarms.find(alarmKey); 221 if (alarm == alarms.end()) 222 { 223 return; 224 } 225 226 // Start or stop the timer if necessary. 227 auto& timer = alarm->second; 228 if (value) 229 { 230 if (!timer) 231 { 232 startTimer(alarmKey); 233 } 234 } 235 else 236 { 237 if (timer) 238 { 239 stopTimer(alarmKey); 240 } 241 } 242 } 243 244 void ShutdownAlarmMonitor::startTimer(const AlarmKey& alarmKey) 245 { 246 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 247 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 248 std::chrono::milliseconds shutdownDelay{shutdownDelays.at(shutdownType)}; 249 std::optional<double> value; 250 251 auto alarm = alarms.find(alarmKey); 252 if (alarm == alarms.end()) 253 { 254 throw std::runtime_error("Couldn't find alarm inside startTimer"); 255 } 256 257 try 258 { 259 value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 260 valueProperty); 261 } 262 catch (const DBusServiceError& e) 263 { 264 // If the sensor was just added, the Value interface for it may 265 // not be in the mapper yet. This could only happen if the sensor 266 // application was started with power up and the value exceeded the 267 // threshold immediately. 268 } 269 270 createEventLog(alarmKey, true, value); 271 272 uint64_t now = std::chrono::duration_cast<std::chrono::milliseconds>( 273 std::chrono::system_clock::now().time_since_epoch()) 274 .count(); 275 276 // If there is a saved timestamp for this timer, then we were restarted 277 // while the timer was running. Calculate the remaining time to use 278 // for the timer. 279 auto previousStartTime = timestamps.get().find(alarmKey); 280 if (previousStartTime != timestamps.get().end()) 281 { 282 const uint64_t& original = previousStartTime->second; 283 284 log<level::INFO>(fmt::format("Found previously running {} timer " 285 "for {} with start time {}", 286 propertyName, sensorPath, original) 287 .c_str()); 288 289 // Sanity check it isn't total garbage. 290 if (now > original) 291 { 292 uint64_t remainingTime = 0; 293 auto elapsedTime = now - original; 294 295 if (elapsedTime < static_cast<uint64_t>(shutdownDelay.count())) 296 { 297 remainingTime = 298 static_cast<uint64_t>(shutdownDelay.count()) - elapsedTime; 299 } 300 301 shutdownDelay = std::chrono::milliseconds{remainingTime}; 302 } 303 else 304 { 305 log<level::WARNING>( 306 fmt::format( 307 "Restarting {} shutdown timer for {} for full " 308 "time because saved time {} is after current time {}", 309 propertyName, sensorPath, original, now) 310 .c_str()); 311 } 312 } 313 314 log<level::INFO>( 315 fmt::format("Starting {}ms {} shutdown timer due to sensor {} value {}", 316 shutdownDelay.count(), propertyName, sensorPath, *value) 317 .c_str()); 318 319 auto& timer = alarm->second; 320 321 timer = std::make_unique< 322 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 323 event, std::bind(&ShutdownAlarmMonitor::timerExpired, this, alarmKey)); 324 325 timer->restartOnce(shutdownDelay); 326 327 // Note that if this key is already in the timestamps map because 328 // the timer was already running the timestamp wil not be updated. 329 timestamps.add(alarmKey, now); 330 } 331 332 void ShutdownAlarmMonitor::stopTimer(const AlarmKey& alarmKey) 333 { 334 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 335 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 336 337 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 338 valueProperty); 339 340 auto alarm = alarms.find(alarmKey); 341 if (alarm == alarms.end()) 342 { 343 throw std::runtime_error("Couldn't find alarm inside stopTimer"); 344 } 345 346 createEventLog(alarmKey, false, value); 347 348 log<level::INFO>( 349 fmt::format("Stopping {} shutdown timer due to sensor {} value {}", 350 propertyName, sensorPath, value) 351 .c_str()); 352 353 auto& timer = alarm->second; 354 timer->setEnabled(false); 355 timer.reset(); 356 357 timestamps.erase(alarmKey); 358 } 359 360 void ShutdownAlarmMonitor::createBmcDump() const 361 { 362 try 363 { 364 util::SDBusPlus::callMethod( 365 "xyz.openbmc_project.Dump.Manager", "/xyz/openbmc_project/dump/bmc", 366 "xyz.openbmc_project.Dump.Create", "CreateDump", 367 std::vector< 368 std::pair<std::string, std::variant<std::string, uint64_t>>>()); 369 } 370 catch (const std::exception& e) 371 { 372 auto message = fmt::format( 373 "Caught exception while creating BMC dump: {}", e.what()); 374 375 log<level::ERR>(message.c_str()); 376 } 377 } 378 379 void ShutdownAlarmMonitor::timerExpired(const AlarmKey& alarmKey) 380 { 381 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 382 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 383 384 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 385 valueProperty); 386 387 log<level::ERR>( 388 fmt::format( 389 "The {} shutdown timer expired for sensor {}, shutting down", 390 propertyName, sensorPath) 391 .c_str()); 392 393 // Re-send the event log. If someone didn't want this it could be 394 // wrapped by a compile option. 395 createEventLog(alarmKey, true, value, true); 396 397 SDBusPlus::callMethod(systemdService, systemdPath, systemdMgrIface, 398 "StartUnit", "obmc-chassis-hard-poweroff@0.target", 399 "replace"); 400 401 timestamps.erase(alarmKey); 402 createBmcDump(); 403 } 404 405 void ShutdownAlarmMonitor::powerStateChanged(bool powerStateOn) 406 { 407 if (powerStateOn) 408 { 409 checkAlarms(); 410 } 411 else 412 { 413 timestamps.clear(); 414 415 // Cancel and delete all timers 416 std::for_each(alarms.begin(), alarms.end(), [](auto& alarm) { 417 auto& timer = alarm.second; 418 if (timer) 419 { 420 timer->setEnabled(false); 421 timer.reset(); 422 } 423 }); 424 } 425 } 426 427 void ShutdownAlarmMonitor::createEventLog( 428 const AlarmKey& alarmKey, bool alarmValue, 429 const std::optional<double>& sensorValue, bool isPowerOffError) 430 { 431 using namespace sdbusplus::xyz::openbmc_project::Logging::server; 432 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 433 std::map<std::string, std::string> ad{{"SENSOR_NAME", sensorPath}, 434 {"_PID", std::to_string(getpid())}}; 435 436 std::string errorName = 437 (alarmValue) ? alarmEventLogs.at(shutdownType).at(alarmType) 438 : alarmClearEventLogs.at(shutdownType).at(alarmType); 439 440 // severity = Critical if a power off 441 // severity = Error if alarm was asserted 442 // severity = Informational if alarm was deasserted 443 Entry::Level severity = Entry::Level::Error; 444 if (isPowerOffError) 445 { 446 severity = Entry::Level::Critical; 447 } 448 else if (!alarmValue) 449 { 450 severity = Entry::Level::Informational; 451 } 452 453 if (sensorValue) 454 { 455 ad.emplace("SENSOR_VALUE", std::to_string(*sensorValue)); 456 } 457 458 // If this is a power off, specify that it's a power 459 // fault and a system termination. This is used by some 460 // implementations for service reasons. 461 if (isPowerOffError) 462 { 463 ad.emplace("SEVERITY_DETAIL", "SYSTEM_TERM"); 464 } 465 466 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface, 467 "Create", errorName, convertForMessage(severity), ad); 468 } 469 470 std::optional<ShutdownType> 471 ShutdownAlarmMonitor::getShutdownType(const std::string& interface) const 472 { 473 auto it = std::find_if( 474 shutdownInterfaces.begin(), shutdownInterfaces.end(), 475 [interface](const auto& a) { return a.second == interface; }); 476 477 if (it == shutdownInterfaces.end()) 478 { 479 return std::nullopt; 480 } 481 482 return it->first; 483 } 484 485 } // namespace sensor::monitor 486