1 #include "config.h" 2 3 #include "host_state_manager.hpp" 4 5 #include "host_check.hpp" 6 #include "utils.hpp" 7 8 #include <systemd/sd-bus.h> 9 10 #include <cereal/archives/json.hpp> 11 #include <cereal/cereal.hpp> 12 #include <cereal/types/string.hpp> 13 #include <cereal/types/tuple.hpp> 14 #include <cereal/types/vector.hpp> 15 #include <phosphor-logging/elog-errors.hpp> 16 #include <phosphor-logging/lg2.hpp> 17 #include <sdbusplus/exception.hpp> 18 #include <sdbusplus/server.hpp> 19 #include <xyz/openbmc_project/Common/error.hpp> 20 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp> 21 #include <xyz/openbmc_project/State/Host/error.hpp> 22 23 #include <filesystem> 24 #include <format> 25 #include <fstream> 26 #include <iostream> 27 #include <map> 28 #include <set> 29 #include <string> 30 31 // Register class version with Cereal 32 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION) 33 34 namespace phosphor 35 { 36 namespace state 37 { 38 namespace manager 39 { 40 41 PHOSPHOR_LOG2_USING; 42 43 // When you see server:: or reboot:: you know we're referencing our base class 44 namespace server = sdbusplus::server::xyz::openbmc_project::state; 45 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot; 46 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot; 47 namespace osstatus = 48 sdbusplus::server::xyz::openbmc_project::state::operating_system; 49 using namespace phosphor::logging; 50 namespace fs = std::filesystem; 51 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 52 53 constexpr auto ACTIVE_STATE = "active"; 54 constexpr auto ACTIVATING_STATE = "activating"; 55 56 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; 57 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1"; 58 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager"; 59 60 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties"; 61 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit"; 62 63 void Host::determineInitialState() 64 { 65 if (stateActive(getTarget(server::Host::HostState::Running)) || 66 isHostRunning(id)) 67 { 68 info("Initial Host State will be Running"); 69 server::Host::currentHostState(HostState::Running, true); 70 server::Host::requestedHostTransition(Transition::On, true); 71 } 72 else 73 { 74 info("Initial Host State will be Off"); 75 server::Host::currentHostState(HostState::Off, true); 76 server::Host::requestedHostTransition(Transition::Off, true); 77 } 78 79 if (!deserialize()) 80 { 81 // set to default value. 82 server::Host::requestedHostTransition(Transition::Off, true); 83 } 84 return; 85 } 86 87 void Host::setupSupportedTransitions() 88 { 89 std::set<Transition> supportedTransitions = { 90 Transition::On, 91 Transition::Off, 92 Transition::Reboot, 93 Transition::GracefulWarmReboot, 94 #if ENABLE_FORCE_WARM_REBOOT 95 Transition::ForceWarmReboot, 96 #endif 97 }; 98 server::Host::allowedHostTransitions(supportedTransitions); 99 } 100 101 void Host::createSystemdTargetMaps() 102 { 103 stateTargetTable = { 104 {HostState::Off, std::format("obmc-host-stop@{}.target", id)}, 105 {HostState::Running, std::format("obmc-host-startmin@{}.target", id)}, 106 {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)}, 107 {HostState::DiagnosticMode, 108 std::format("obmc-host-diagnostic-mode@{}.target", id)}}; 109 110 transitionTargetTable = { 111 {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)}, 112 {Transition::On, std::format("obmc-host-start@{}.target", id)}, 113 {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)}, 114 // Some systems do not support a warm reboot so just map the reboot 115 // requests to our normal cold reboot in that case 116 #if ENABLE_WARM_REBOOT 117 {Transition::GracefulWarmReboot, 118 std::format("obmc-host-warm-reboot@{}.target", id)}, 119 {Transition::ForceWarmReboot, 120 std::format("obmc-host-force-warm-reboot@{}.target", id)}}; 121 #else 122 {Transition::GracefulWarmReboot, 123 std::format("obmc-host-reboot@{}.target", id)}, 124 {Transition::ForceWarmReboot, 125 std::format("obmc-host-reboot@{}.target", id)}}; 126 #endif 127 hostCrashTarget = std::format("obmc-host-crash@{}.target", id); 128 } 129 130 const std::string& Host::getTarget(HostState state) 131 { 132 return stateTargetTable[state]; 133 }; 134 135 const std::string& Host::getTarget(Transition tranReq) 136 { 137 return transitionTargetTable[tranReq]; 138 }; 139 140 void Host::executeTransition(Transition tranReq) 141 { 142 const auto& sysdUnit = getTarget(tranReq); 143 144 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 145 SYSTEMD_INTERFACE, "StartUnit"); 146 147 method.append(sysdUnit); 148 method.append("replace"); 149 150 this->bus.call_noreply(method); 151 152 return; 153 } 154 155 bool Host::stateActive(const std::string& target) 156 { 157 std::variant<std::string> currentState; 158 sdbusplus::message::object_path unitTargetPath; 159 160 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 161 SYSTEMD_INTERFACE, "GetUnit"); 162 163 method.append(target); 164 165 try 166 { 167 auto result = this->bus.call(method); 168 result.read(unitTargetPath); 169 } 170 catch (const sdbusplus::exception_t& e) 171 { 172 error("Error in GetUnit call: {ERROR}", "ERROR", e); 173 return false; 174 } 175 176 method = this->bus.new_method_call( 177 SYSTEMD_SERVICE, 178 static_cast<const std::string&>(unitTargetPath).c_str(), 179 SYSTEMD_PROPERTY_IFACE, "Get"); 180 181 method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState"); 182 183 try 184 { 185 auto result = this->bus.call(method); 186 result.read(currentState); 187 } 188 catch (const sdbusplus::exception_t& e) 189 { 190 error("Error in ActiveState Get: {ERROR}", "ERROR", e); 191 return false; 192 } 193 194 const auto& currentStateStr = std::get<std::string>(currentState); 195 return currentStateStr == ACTIVE_STATE || 196 currentStateStr == ACTIVATING_STATE; 197 } 198 199 bool Host::isAutoReboot() 200 { 201 using namespace settings; 202 203 /* The logic here is to first check the one-time AutoReboot setting. 204 * If this property is true (the default) then look at the persistent 205 * user setting in the non one-time object, otherwise honor the one-time 206 * setting and do not auto reboot. 207 */ 208 auto methodOneTime = bus.new_method_call( 209 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 210 settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 211 methodOneTime.append(autoRebootIntf, "AutoReboot"); 212 213 auto methodUserSetting = bus.new_method_call( 214 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 215 settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 216 methodUserSetting.append(autoRebootIntf, "AutoReboot"); 217 218 try 219 { 220 auto reply = bus.call(methodOneTime); 221 std::variant<bool> result; 222 reply.read(result); 223 auto autoReboot = std::get<bool>(result); 224 225 if (!autoReboot) 226 { 227 info("Auto reboot (one-time) disabled"); 228 return false; 229 } 230 else 231 { 232 // one-time is true so read the user setting 233 reply = bus.call(methodUserSetting); 234 reply.read(result); 235 autoReboot = std::get<bool>(result); 236 } 237 238 auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft(); 239 240 if (autoReboot) 241 { 242 if (rebootCounterParam > 0) 243 { 244 // Reduce BOOTCOUNT by 1 245 info( 246 "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting", 247 "BOOTCOUNT", rebootCounterParam); 248 return true; 249 } 250 else 251 { 252 // We are at 0 so reset reboot counter and go to quiesce state 253 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0"); 254 attemptsLeft(reboot::RebootAttempts::retryAttempts()); 255 256 // Generate log since we will now be sitting in Quiesce 257 const std::string errorMsg = 258 "xyz.openbmc_project.State.Error.HostQuiesce"; 259 utils::createError(this->bus, errorMsg, 260 sdbusplus::xyz::openbmc_project::Logging:: 261 server::Entry::Level::Critical); 262 263 // Generate BMC dump to assist with debug 264 utils::createBmcDump(this->bus); 265 266 return false; 267 } 268 } 269 else 270 { 271 info("Auto reboot disabled."); 272 return false; 273 } 274 } 275 catch (const sdbusplus::exception_t& e) 276 { 277 error("Error in AutoReboot Get, {ERROR}", "ERROR", e); 278 return false; 279 } 280 } 281 282 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg) 283 { 284 uint32_t newStateID{}; 285 sdbusplus::message::object_path newStateObjPath; 286 std::string newStateUnit{}; 287 std::string newStateResult{}; 288 289 // Read the msg and populate each variable 290 msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult); 291 292 if ((newStateUnit == getTarget(server::Host::HostState::Off)) && 293 (newStateResult == "done") && 294 (!stateActive(getTarget(server::Host::HostState::Running)))) 295 { 296 info("Received signal that host is off"); 297 this->currentHostState(server::Host::HostState::Off); 298 this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified); 299 this->operatingSystemState(osstatus::Status::OSStatus::Inactive); 300 } 301 else if ((newStateUnit == getTarget(server::Host::HostState::Running)) && 302 (newStateResult == "done") && 303 (stateActive(getTarget(server::Host::HostState::Running)))) 304 { 305 info("Received signal that host is running"); 306 this->currentHostState(server::Host::HostState::Running); 307 308 // Remove temporary file which is utilized for scenarios where the 309 // BMC is rebooted while the host is still up. 310 // This file is used to indicate to host related systemd services 311 // that the host is already running and they should skip running. 312 // Once the host state is back to running we can clear this file. 313 std::string hostFile = std::format(HOST_RUNNING_FILE, 0); 314 if (std::filesystem::exists(hostFile)) 315 { 316 std::filesystem::remove(hostFile); 317 } 318 } 319 else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) && 320 (newStateResult == "done") && 321 (stateActive(getTarget(server::Host::HostState::Quiesced)))) 322 { 323 if (Host::isAutoReboot()) 324 { 325 info("Beginning reboot..."); 326 Host::requestedHostTransition(server::Host::Transition::Reboot); 327 } 328 else 329 { 330 info("Maintaining quiesce"); 331 this->currentHostState(server::Host::HostState::Quiesced); 332 } 333 } 334 } 335 336 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg) 337 { 338 uint32_t newStateID{}; 339 sdbusplus::message::object_path newStateObjPath; 340 std::string newStateUnit{}; 341 342 // Read the msg and populate each variable 343 msg.read(newStateID, newStateObjPath, newStateUnit); 344 345 if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode)) 346 { 347 info("Received signal that host is in diagnostice mode"); 348 this->currentHostState(server::Host::HostState::DiagnosticMode); 349 } 350 else if ((newStateUnit == hostCrashTarget) && 351 (server::Host::currentHostState() == 352 server::Host::HostState::Running)) 353 { 354 // Only decrease the boot count if host was running when the host crash 355 // target was started. Systemd will sometimes trigger multiple 356 // JobNew events for the same target. This seems to be related to 357 // how OpenBMC utilizes the targets in the reboot scenario 358 info("Received signal that host has crashed, decrement reboot count"); 359 360 // A host crash can cause a reboot of the host so decrement the reboot 361 // count 362 decrementRebootCount(); 363 } 364 } 365 366 uint32_t Host::decrementRebootCount() 367 { 368 auto rebootCount = reboot::RebootAttempts::attemptsLeft(); 369 if (rebootCount > 0) 370 { 371 return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1)); 372 } 373 return rebootCount; 374 } 375 376 fs::path Host::serialize() 377 { 378 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)}; 379 std::ofstream os(path.c_str(), std::ios::binary); 380 cereal::JSONOutputArchive oarchive(os); 381 oarchive(*this); 382 return path; 383 } 384 385 bool Host::deserialize() 386 { 387 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)}; 388 try 389 { 390 if (fs::exists(path)) 391 { 392 std::ifstream is(path.c_str(), std::ios::in | std::ios::binary); 393 cereal::JSONInputArchive iarchive(is); 394 iarchive(*this); 395 return true; 396 } 397 return false; 398 } 399 catch (const cereal::Exception& e) 400 { 401 error("deserialize exception: {ERROR}", "ERROR", e); 402 fs::remove(path); 403 return false; 404 } 405 } 406 407 Host::Transition Host::requestedHostTransition(Transition value) 408 { 409 info("Host state transition request of {REQ}", "REQ", value); 410 411 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY 412 if ((value != Transition::Off) && (!utils::isBmcReady(this->bus))) 413 { 414 info("BMC State is not Ready so no host on operations allowed"); 415 throw sdbusplus::xyz::openbmc_project::State::Host::Error:: 416 BMCNotReady(); 417 } 418 #endif 419 420 // If this is not a power off request then we need to 421 // decrement the reboot counter. This code should 422 // never prevent a power on, it should just decrement 423 // the count to 0. The quiesce handling is where the 424 // check of this count will occur 425 if (value != server::Host::Transition::Off) 426 { 427 decrementRebootCount(); 428 } 429 430 executeTransition(value); 431 432 auto retVal = server::Host::requestedHostTransition(value); 433 434 serialize(); 435 return retVal; 436 } 437 438 Host::ProgressStages Host::bootProgress(ProgressStages value) 439 { 440 auto retVal = bootprogress::Progress::bootProgress(value); 441 serialize(); 442 return retVal; 443 } 444 445 Host::OSStatus Host::operatingSystemState(OSStatus value) 446 { 447 auto retVal = osstatus::Status::operatingSystemState(value); 448 serialize(); 449 return retVal; 450 } 451 452 Host::HostState Host::currentHostState(HostState value) 453 { 454 info("Change to Host State: {STATE}", "STATE", value); 455 return server::Host::currentHostState(value); 456 } 457 458 } // namespace manager 459 } // namespace state 460 } // namespace phosphor 461