1 /** 2 * Copyright © 2021 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "config.h" 17 18 #include "shutdown_alarm_monitor.hpp" 19 20 #include <fmt/format.h> 21 #include <unistd.h> 22 23 #include <phosphor-logging/log.hpp> 24 #include <xyz/openbmc_project/Logging/Entry/server.hpp> 25 26 namespace sensor::monitor 27 { 28 using namespace phosphor::logging; 29 using namespace phosphor::fan::util; 30 using namespace phosphor::fan; 31 namespace fs = std::filesystem; 32 33 const std::map<ShutdownType, std::string> shutdownInterfaces{ 34 {ShutdownType::hard, "xyz.openbmc_project.Sensor.Threshold.HardShutdown"}, 35 {ShutdownType::soft, "xyz.openbmc_project.Sensor.Threshold.SoftShutdown"}}; 36 37 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmProperties{ 38 {ShutdownType::hard, 39 {{AlarmType::low, "HardShutdownAlarmLow"}, 40 {AlarmType::high, "HardShutdownAlarmHigh"}}}, 41 {ShutdownType::soft, 42 {{AlarmType::low, "SoftShutdownAlarmLow"}, 43 {AlarmType::high, "SoftShutdownAlarmHigh"}}}}; 44 45 const std::map<ShutdownType, std::chrono::milliseconds> shutdownDelays{ 46 {ShutdownType::hard, 47 std::chrono::milliseconds{SHUTDOWN_ALARM_HARD_SHUTDOWN_DELAY_MS}}, 48 {ShutdownType::soft, 49 std::chrono::milliseconds{SHUTDOWN_ALARM_SOFT_SHUTDOWN_DELAY_MS}}}; 50 51 const std::map<ShutdownType, std::map<AlarmType, std::string>> alarmEventLogs{ 52 {ShutdownType::hard, 53 {{AlarmType::high, 54 "xyz.openbmc_project.Sensor.Threshold.Error.HardShutdownAlarmHigh"}, 55 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 56 "HardShutdownAlarmLow"}}}, 57 {ShutdownType::soft, 58 {{AlarmType::high, 59 "xyz.openbmc_project.Sensor.Threshold.Error.SoftShutdownAlarmHigh"}, 60 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 61 "SoftShutdownAlarmLow"}}}}; 62 63 const std::map<ShutdownType, std::map<AlarmType, std::string>> 64 alarmClearEventLogs{ 65 {ShutdownType::hard, 66 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error." 67 "HardShutdownAlarmHighClear"}, 68 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 69 "HardShutdownAlarmLowClear"}}}, 70 {ShutdownType::soft, 71 {{AlarmType::high, "xyz.openbmc_project.Sensor.Threshold.Error." 72 "SoftShutdownAlarmHighClear"}, 73 {AlarmType::low, "xyz.openbmc_project.Sensor.Threshold.Error." 74 "SoftShutdownAlarmLowClear"}}}}; 75 76 constexpr auto systemdService = "org.freedesktop.systemd1"; 77 constexpr auto systemdPath = "/org/freedesktop/systemd1"; 78 constexpr auto systemdMgrIface = "org.freedesktop.systemd1.Manager"; 79 constexpr auto valueInterface = "xyz.openbmc_project.Sensor.Value"; 80 constexpr auto valueProperty = "Value"; 81 const auto loggingService = "xyz.openbmc_project.Logging"; 82 const auto loggingPath = "/xyz/openbmc_project/logging"; 83 const auto loggingCreateIface = "xyz.openbmc_project.Logging.Create"; 84 85 using namespace sdbusplus::bus::match; 86 87 ShutdownAlarmMonitor::ShutdownAlarmMonitor( 88 sdbusplus::bus::bus& bus, sdeventplus::Event& event, 89 std::shared_ptr<PowerState> powerState) : 90 bus(bus), 91 event(event), _powerState(std::move(powerState)), 92 hardShutdownMatch(bus, 93 "type='signal',member='PropertiesChanged'," 94 "path_namespace='/xyz/openbmc_project/sensors'," 95 "arg0='" + 96 shutdownInterfaces.at(ShutdownType::hard) + "'", 97 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this, 98 std::placeholders::_1)), 99 softShutdownMatch(bus, 100 "type='signal',member='PropertiesChanged'," 101 "path_namespace='/xyz/openbmc_project/sensors'," 102 "arg0='" + 103 shutdownInterfaces.at(ShutdownType::soft) + "'", 104 std::bind(&ShutdownAlarmMonitor::propertiesChanged, this, 105 std::placeholders::_1)) 106 { 107 _powerState->addCallback("shutdownMon", 108 std::bind(&ShutdownAlarmMonitor::powerStateChanged, 109 this, std::placeholders::_1)); 110 findAlarms(); 111 112 if (_powerState->isPowerOn()) 113 { 114 checkAlarms(); 115 116 // Get rid of any previous saved timestamps that don't 117 // apply anymore. 118 timestamps.prune(alarms); 119 } 120 else 121 { 122 timestamps.clear(); 123 } 124 } 125 126 void ShutdownAlarmMonitor::findAlarms() 127 { 128 // Find all shutdown threshold ifaces currently on D-Bus. 129 for (const auto& [shutdownType, interface] : shutdownInterfaces) 130 { 131 auto paths = SDBusPlus::getSubTreePathsRaw(bus, "/", interface, 0); 132 133 auto shutdownType2 = shutdownType; 134 135 std::for_each( 136 paths.begin(), paths.end(), 137 [this, shutdownType2](const auto& path) { 138 alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::high}, 139 nullptr); 140 alarms.emplace(AlarmKey{path, shutdownType2, AlarmType::low}, 141 nullptr); 142 }); 143 } 144 } 145 146 void ShutdownAlarmMonitor::checkAlarms() 147 { 148 for (auto& [alarmKey, timer] : alarms) 149 { 150 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 151 const auto& interface = shutdownInterfaces.at(shutdownType); 152 auto propertyName = alarmProperties.at(shutdownType).at(alarmType); 153 bool value; 154 155 try 156 { 157 value = SDBusPlus::getProperty<bool>(bus, sensorPath, interface, 158 propertyName); 159 } 160 catch (const DBusServiceError& e) 161 { 162 // The sensor isn't on D-Bus anymore 163 log<level::INFO>(fmt::format("No {} interface on {} anymore.", 164 interface, sensorPath) 165 .c_str()); 166 continue; 167 } 168 169 checkAlarm(value, alarmKey); 170 } 171 } 172 173 void ShutdownAlarmMonitor::propertiesChanged( 174 sdbusplus::message::message& message) 175 { 176 std::map<std::string, std::variant<bool>> properties; 177 std::string interface; 178 179 if (!_powerState->isPowerOn()) 180 { 181 return; 182 } 183 184 message.read(interface, properties); 185 186 auto type = getShutdownType(interface); 187 if (!type) 188 { 189 return; 190 } 191 192 std::string sensorPath = message.get_path(); 193 194 const auto& lowAlarmName = alarmProperties.at(*type).at(AlarmType::low); 195 if (properties.count(lowAlarmName) > 0) 196 { 197 AlarmKey alarmKey{sensorPath, *type, AlarmType::low}; 198 auto alarm = alarms.find(alarmKey); 199 if (alarm == alarms.end()) 200 { 201 alarms.emplace(alarmKey, nullptr); 202 } 203 checkAlarm(std::get<bool>(properties.at(lowAlarmName)), alarmKey); 204 } 205 206 const auto& highAlarmName = alarmProperties.at(*type).at(AlarmType::high); 207 if (properties.count(highAlarmName) > 0) 208 { 209 AlarmKey alarmKey{sensorPath, *type, AlarmType::high}; 210 auto alarm = alarms.find(alarmKey); 211 if (alarm == alarms.end()) 212 { 213 alarms.emplace(alarmKey, nullptr); 214 } 215 checkAlarm(std::get<bool>(properties.at(highAlarmName)), alarmKey); 216 } 217 } 218 219 void ShutdownAlarmMonitor::checkAlarm(bool value, const AlarmKey& alarmKey) 220 { 221 auto alarm = alarms.find(alarmKey); 222 if (alarm == alarms.end()) 223 { 224 return; 225 } 226 227 // Start or stop the timer if necessary. 228 auto& timer = alarm->second; 229 if (value) 230 { 231 if (!timer) 232 { 233 startTimer(alarmKey); 234 } 235 } 236 else 237 { 238 if (timer) 239 { 240 stopTimer(alarmKey); 241 } 242 } 243 } 244 245 void ShutdownAlarmMonitor::startTimer(const AlarmKey& alarmKey) 246 { 247 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 248 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 249 std::chrono::milliseconds shutdownDelay{shutdownDelays.at(shutdownType)}; 250 std::optional<double> value; 251 252 auto alarm = alarms.find(alarmKey); 253 if (alarm == alarms.end()) 254 { 255 throw std::runtime_error("Couldn't find alarm inside startTimer"); 256 } 257 258 try 259 { 260 value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 261 valueProperty); 262 } 263 catch (const DBusServiceError& e) 264 { 265 // If the sensor was just added, the Value interface for it may 266 // not be in the mapper yet. This could only happen if the sensor 267 // application was started with power up and the value exceeded the 268 // threshold immediately. 269 } 270 271 createEventLog(alarmKey, true, value); 272 273 uint64_t now = std::chrono::duration_cast<std::chrono::milliseconds>( 274 std::chrono::system_clock::now().time_since_epoch()) 275 .count(); 276 277 // If there is a saved timestamp for this timer, then we were restarted 278 // while the timer was running. Calculate the remaining time to use 279 // for the timer. 280 auto previousStartTime = timestamps.get().find(alarmKey); 281 if (previousStartTime != timestamps.get().end()) 282 { 283 const uint64_t& original = previousStartTime->second; 284 285 log<level::INFO>(fmt::format("Found previously running {} timer " 286 "for {} with start time {}", 287 propertyName, sensorPath, original) 288 .c_str()); 289 290 // Sanity check it isn't total garbage. 291 if (now > original) 292 { 293 uint64_t remainingTime = 0; 294 auto elapsedTime = now - original; 295 296 if (elapsedTime < static_cast<uint64_t>(shutdownDelay.count())) 297 { 298 remainingTime = 299 static_cast<uint64_t>(shutdownDelay.count()) - elapsedTime; 300 } 301 302 shutdownDelay = std::chrono::milliseconds{remainingTime}; 303 } 304 else 305 { 306 log<level::WARNING>( 307 fmt::format( 308 "Restarting {} shutdown timer for {} for full " 309 "time because saved time {} is after current time {}", 310 propertyName, sensorPath, original, now) 311 .c_str()); 312 } 313 } 314 315 log<level::INFO>( 316 fmt::format("Starting {}ms {} shutdown timer due to sensor {} value {}", 317 shutdownDelay.count(), propertyName, sensorPath, *value) 318 .c_str()); 319 320 auto& timer = alarm->second; 321 322 timer = std::make_unique< 323 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 324 event, std::bind(&ShutdownAlarmMonitor::timerExpired, this, alarmKey)); 325 326 timer->restartOnce(shutdownDelay); 327 328 // Note that if this key is already in the timestamps map because 329 // the timer was already running the timestamp wil not be updated. 330 timestamps.add(alarmKey, now); 331 } 332 333 void ShutdownAlarmMonitor::stopTimer(const AlarmKey& alarmKey) 334 { 335 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 336 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 337 338 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 339 valueProperty); 340 341 auto alarm = alarms.find(alarmKey); 342 if (alarm == alarms.end()) 343 { 344 throw std::runtime_error("Couldn't find alarm inside stopTimer"); 345 } 346 347 createEventLog(alarmKey, false, value); 348 349 log<level::INFO>( 350 fmt::format("Stopping {} shutdown timer due to sensor {} value {}", 351 propertyName, sensorPath, value) 352 .c_str()); 353 354 auto& timer = alarm->second; 355 timer->setEnabled(false); 356 timer.reset(); 357 358 timestamps.erase(alarmKey); 359 } 360 361 void ShutdownAlarmMonitor::createBmcDump() const 362 { 363 try 364 { 365 util::SDBusPlus::callMethod( 366 "xyz.openbmc_project.Dump.Manager", "/xyz/openbmc_project/dump/bmc", 367 "xyz.openbmc_project.Dump.Create", "CreateDump", 368 std::vector< 369 std::pair<std::string, std::variant<std::string, uint64_t>>>()); 370 } 371 catch (const std::exception& e) 372 { 373 getLogger().log( 374 fmt::format("Caught exception while creating BMC dump: {}", 375 e.what()), 376 Logger::error); 377 } 378 } 379 380 void ShutdownAlarmMonitor::timerExpired(const AlarmKey& alarmKey) 381 { 382 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 383 const auto& propertyName = alarmProperties.at(shutdownType).at(alarmType); 384 385 auto value = SDBusPlus::getProperty<double>(bus, sensorPath, valueInterface, 386 valueProperty); 387 388 log<level::ERR>( 389 fmt::format( 390 "The {} shutdown timer expired for sensor {}, shutting down", 391 propertyName, sensorPath) 392 .c_str()); 393 394 // Re-send the event log. If someone didn't want this it could be 395 // wrapped by a compile option. 396 createEventLog(alarmKey, true, value, true); 397 398 SDBusPlus::callMethod(systemdService, systemdPath, systemdMgrIface, 399 "StartUnit", "obmc-chassis-hard-poweroff@0.target", 400 "replace"); 401 402 timestamps.erase(alarmKey); 403 createBmcDump(); 404 } 405 406 void ShutdownAlarmMonitor::powerStateChanged(bool powerStateOn) 407 { 408 if (powerStateOn) 409 { 410 checkAlarms(); 411 } 412 else 413 { 414 timestamps.clear(); 415 416 // Cancel and delete all timers 417 std::for_each(alarms.begin(), alarms.end(), [](auto& alarm) { 418 auto& timer = alarm.second; 419 if (timer) 420 { 421 timer->setEnabled(false); 422 timer.reset(); 423 } 424 }); 425 } 426 } 427 428 void ShutdownAlarmMonitor::createEventLog( 429 const AlarmKey& alarmKey, bool alarmValue, 430 const std::optional<double>& sensorValue, bool isPowerOffError) 431 { 432 using namespace sdbusplus::xyz::openbmc_project::Logging::server; 433 const auto& [sensorPath, shutdownType, alarmType] = alarmKey; 434 std::map<std::string, std::string> ad{{"SENSOR_NAME", sensorPath}, 435 {"_PID", std::to_string(getpid())}}; 436 437 std::string errorName = 438 (alarmValue) ? alarmEventLogs.at(shutdownType).at(alarmType) 439 : alarmClearEventLogs.at(shutdownType).at(alarmType); 440 441 // severity = Critical if a power off 442 // severity = Error if alarm was asserted 443 // severity = Informational if alarm was deasserted 444 Entry::Level severity = Entry::Level::Error; 445 if (isPowerOffError) 446 { 447 severity = Entry::Level::Critical; 448 } 449 else if (!alarmValue) 450 { 451 severity = Entry::Level::Informational; 452 } 453 454 if (sensorValue) 455 { 456 ad.emplace("SENSOR_VALUE", std::to_string(*sensorValue)); 457 } 458 459 // If this is a power off, specify that it's a power 460 // fault and a system termination. This is used by some 461 // implementations for service reasons. 462 if (isPowerOffError) 463 { 464 ad.emplace("SEVERITY_DETAIL", "SYSTEM_TERM"); 465 } 466 467 SDBusPlus::callMethod(loggingService, loggingPath, loggingCreateIface, 468 "Create", errorName, convertForMessage(severity), ad); 469 } 470 471 std::optional<ShutdownType> 472 ShutdownAlarmMonitor::getShutdownType(const std::string& interface) const 473 { 474 auto it = std::find_if( 475 shutdownInterfaces.begin(), shutdownInterfaces.end(), 476 [interface](const auto& a) { return a.second == interface; }); 477 478 if (it == shutdownInterfaces.end()) 479 { 480 return std::nullopt; 481 } 482 483 return it->first; 484 } 485 486 } // namespace sensor::monitor 487