1 #include "config.h" 2 3 #include "host_state_manager.hpp" 4 5 #include "host_check.hpp" 6 #include "utils.hpp" 7 8 #include <systemd/sd-bus.h> 9 10 #include <cereal/archives/json.hpp> 11 #include <cereal/cereal.hpp> 12 #include <cereal/types/string.hpp> 13 #include <cereal/types/tuple.hpp> 14 #include <cereal/types/vector.hpp> 15 #include <phosphor-logging/elog-errors.hpp> 16 #include <phosphor-logging/lg2.hpp> 17 #include <sdbusplus/exception.hpp> 18 #include <sdbusplus/server.hpp> 19 #include <xyz/openbmc_project/Common/error.hpp> 20 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp> 21 #include <xyz/openbmc_project/State/Host/error.hpp> 22 23 #include <filesystem> 24 #include <format> 25 #include <fstream> 26 #include <iostream> 27 #include <map> 28 #include <set> 29 #include <string> 30 31 // Register class version with Cereal 32 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION) 33 34 namespace phosphor 35 { 36 namespace state 37 { 38 namespace manager 39 { 40 41 PHOSPHOR_LOG2_USING; 42 43 // When you see server:: or reboot:: you know we're referencing our base class 44 namespace server = sdbusplus::server::xyz::openbmc_project::state; 45 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot; 46 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot; 47 namespace osstatus = 48 sdbusplus::server::xyz::openbmc_project::state::operating_system; 49 using namespace phosphor::logging; 50 namespace fs = std::filesystem; 51 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 52 53 constexpr auto ACTIVE_STATE = "active"; 54 constexpr auto ACTIVATING_STATE = "activating"; 55 56 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; 57 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1"; 58 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager"; 59 60 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties"; 61 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit"; 62 63 void Host::determineInitialState() 64 { 65 if (stateActive(getTarget(server::Host::HostState::Running)) || 66 isHostRunning(id)) 67 { 68 info("Initial Host State will be Running"); 69 server::Host::currentHostState(HostState::Running, true); 70 server::Host::requestedHostTransition(Transition::On, true); 71 } 72 else 73 { 74 info("Initial Host State will be Off"); 75 server::Host::currentHostState(HostState::Off, true); 76 server::Host::requestedHostTransition(Transition::Off, true); 77 } 78 79 if (!deserialize()) 80 { 81 // set to default value. 82 server::Host::requestedHostTransition(Transition::Off, true); 83 } 84 return; 85 } 86 87 void Host::setupSupportedTransitions() 88 { 89 std::set<Transition> supportedTransitions = { 90 Transition::On, 91 Transition::Off, 92 Transition::Reboot, 93 Transition::GracefulWarmReboot, 94 #if ENABLE_FORCE_WARM_REBOOT 95 Transition::ForceWarmReboot, 96 #endif 97 }; 98 server::Host::allowedHostTransitions(supportedTransitions); 99 } 100 101 void Host::createSystemdTargetMaps() 102 { 103 stateTargetTable = { 104 {HostState::Off, std::format("obmc-host-stop@{}.target", id)}, 105 {HostState::Running, std::format("obmc-host-startmin@{}.target", id)}, 106 {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)}, 107 {HostState::DiagnosticMode, 108 std::format("obmc-host-diagnostic-mode@{}.target", id)}}; 109 110 transitionTargetTable = { 111 {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)}, 112 {Transition::On, std::format("obmc-host-start@{}.target", id)}, 113 {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)}, 114 // Some systems do not support a warm reboot so just map the reboot 115 // requests to our normal cold reboot in that case 116 #if ENABLE_WARM_REBOOT 117 {Transition::GracefulWarmReboot, 118 std::format("obmc-host-warm-reboot@{}.target", id)}, 119 {Transition::ForceWarmReboot, 120 std::format("obmc-host-force-warm-reboot@{}.target", id)} 121 }; 122 #else 123 {Transition::GracefulWarmReboot, 124 std::format("obmc-host-reboot@{}.target", id)}, 125 {Transition::ForceWarmReboot, 126 std::format("obmc-host-reboot@{}.target", id)} 127 }; 128 #endif 129 hostCrashTarget = std::format("obmc-host-crash@{}.target", id); 130 } 131 132 const std::string& Host::getTarget(HostState state) 133 { 134 return stateTargetTable[state]; 135 }; 136 137 const std::string& Host::getTarget(Transition tranReq) 138 { 139 return transitionTargetTable[tranReq]; 140 }; 141 142 void Host::executeTransition(Transition tranReq) 143 { 144 auto& sysdUnit = getTarget(tranReq); 145 146 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 147 SYSTEMD_INTERFACE, "StartUnit"); 148 149 method.append(sysdUnit); 150 method.append("replace"); 151 152 this->bus.call_noreply(method); 153 154 return; 155 } 156 157 bool Host::stateActive(const std::string& target) 158 { 159 std::variant<std::string> currentState; 160 sdbusplus::message::object_path unitTargetPath; 161 162 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 163 SYSTEMD_INTERFACE, "GetUnit"); 164 165 method.append(target); 166 167 try 168 { 169 auto result = this->bus.call(method); 170 result.read(unitTargetPath); 171 } 172 catch (const sdbusplus::exception_t& e) 173 { 174 error("Error in GetUnit call: {ERROR}", "ERROR", e); 175 return false; 176 } 177 178 method = this->bus.new_method_call( 179 SYSTEMD_SERVICE, 180 static_cast<const std::string&>(unitTargetPath).c_str(), 181 SYSTEMD_PROPERTY_IFACE, "Get"); 182 183 method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState"); 184 185 try 186 { 187 auto result = this->bus.call(method); 188 result.read(currentState); 189 } 190 catch (const sdbusplus::exception_t& e) 191 { 192 error("Error in ActiveState Get: {ERROR}", "ERROR", e); 193 return false; 194 } 195 196 const auto& currentStateStr = std::get<std::string>(currentState); 197 return currentStateStr == ACTIVE_STATE || 198 currentStateStr == ACTIVATING_STATE; 199 } 200 201 bool Host::isAutoReboot() 202 { 203 using namespace settings; 204 205 /* The logic here is to first check the one-time AutoReboot setting. 206 * If this property is true (the default) then look at the persistent 207 * user setting in the non one-time object, otherwise honor the one-time 208 * setting and do not auto reboot. 209 */ 210 auto methodOneTime = bus.new_method_call( 211 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 212 settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 213 methodOneTime.append(autoRebootIntf, "AutoReboot"); 214 215 auto methodUserSetting = bus.new_method_call( 216 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 217 settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 218 methodUserSetting.append(autoRebootIntf, "AutoReboot"); 219 220 try 221 { 222 auto reply = bus.call(methodOneTime); 223 std::variant<bool> result; 224 reply.read(result); 225 auto autoReboot = std::get<bool>(result); 226 227 if (!autoReboot) 228 { 229 info("Auto reboot (one-time) disabled"); 230 return false; 231 } 232 else 233 { 234 // one-time is true so read the user setting 235 reply = bus.call(methodUserSetting); 236 reply.read(result); 237 autoReboot = std::get<bool>(result); 238 } 239 240 auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft(); 241 242 if (autoReboot) 243 { 244 if (rebootCounterParam > 0) 245 { 246 // Reduce BOOTCOUNT by 1 247 info( 248 "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting", 249 "BOOTCOUNT", rebootCounterParam); 250 return true; 251 } 252 else 253 { 254 // We are at 0 so reset reboot counter and go to quiesce state 255 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0"); 256 attemptsLeft(reboot::RebootAttempts::retryAttempts()); 257 258 // Generate log since we will now be sitting in Quiesce 259 const std::string errorMsg = 260 "xyz.openbmc_project.State.Error.HostQuiesce"; 261 utils::createError(this->bus, errorMsg, 262 sdbusplus::xyz::openbmc_project::Logging:: 263 server::Entry::Level::Critical); 264 265 // Generate BMC dump to assist with debug 266 utils::createBmcDump(this->bus); 267 268 return false; 269 } 270 } 271 else 272 { 273 info("Auto reboot disabled."); 274 return false; 275 } 276 } 277 catch (const sdbusplus::exception_t& e) 278 { 279 error("Error in AutoReboot Get, {ERROR}", "ERROR", e); 280 return false; 281 } 282 } 283 284 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg) 285 { 286 uint32_t newStateID{}; 287 sdbusplus::message::object_path newStateObjPath; 288 std::string newStateUnit{}; 289 std::string newStateResult{}; 290 291 // Read the msg and populate each variable 292 msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult); 293 294 if ((newStateUnit == getTarget(server::Host::HostState::Off)) && 295 (newStateResult == "done") && 296 (!stateActive(getTarget(server::Host::HostState::Running)))) 297 { 298 info("Received signal that host is off"); 299 this->currentHostState(server::Host::HostState::Off); 300 this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified); 301 this->operatingSystemState(osstatus::Status::OSStatus::Inactive); 302 } 303 else if ((newStateUnit == getTarget(server::Host::HostState::Running)) && 304 (newStateResult == "done") && 305 (stateActive(getTarget(server::Host::HostState::Running)))) 306 { 307 info("Received signal that host is running"); 308 this->currentHostState(server::Host::HostState::Running); 309 310 // Remove temporary file which is utilized for scenarios where the 311 // BMC is rebooted while the host is still up. 312 // This file is used to indicate to host related systemd services 313 // that the host is already running and they should skip running. 314 // Once the host state is back to running we can clear this file. 315 std::string hostFile = std::format(HOST_RUNNING_FILE, 0); 316 if (std::filesystem::exists(hostFile)) 317 { 318 std::filesystem::remove(hostFile); 319 } 320 } 321 else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) && 322 (newStateResult == "done") && 323 (stateActive(getTarget(server::Host::HostState::Quiesced)))) 324 { 325 if (Host::isAutoReboot()) 326 { 327 info("Beginning reboot..."); 328 Host::requestedHostTransition(server::Host::Transition::Reboot); 329 } 330 else 331 { 332 info("Maintaining quiesce"); 333 this->currentHostState(server::Host::HostState::Quiesced); 334 } 335 } 336 } 337 338 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg) 339 { 340 uint32_t newStateID{}; 341 sdbusplus::message::object_path newStateObjPath; 342 std::string newStateUnit{}; 343 344 // Read the msg and populate each variable 345 msg.read(newStateID, newStateObjPath, newStateUnit); 346 347 if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode)) 348 { 349 info("Received signal that host is in diagnostice mode"); 350 this->currentHostState(server::Host::HostState::DiagnosticMode); 351 } 352 else if ((newStateUnit == hostCrashTarget) && 353 (server::Host::currentHostState() == 354 server::Host::HostState::Running)) 355 { 356 // Only decrease the boot count if host was running when the host crash 357 // target was started. Systemd will sometimes trigger multiple 358 // JobNew events for the same target. This seems to be related to 359 // how OpenBMC utilizes the targets in the reboot scenario 360 info("Received signal that host has crashed, decrement reboot count"); 361 362 // A host crash can cause a reboot of the host so decrement the reboot 363 // count 364 decrementRebootCount(); 365 } 366 } 367 368 uint32_t Host::decrementRebootCount() 369 { 370 auto rebootCount = reboot::RebootAttempts::attemptsLeft(); 371 if (rebootCount > 0) 372 { 373 return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1)); 374 } 375 return rebootCount; 376 } 377 378 fs::path Host::serialize() 379 { 380 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)}; 381 std::ofstream os(path.c_str(), std::ios::binary); 382 cereal::JSONOutputArchive oarchive(os); 383 oarchive(*this); 384 return path; 385 } 386 387 bool Host::deserialize() 388 { 389 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)}; 390 try 391 { 392 if (fs::exists(path)) 393 { 394 std::ifstream is(path.c_str(), std::ios::in | std::ios::binary); 395 cereal::JSONInputArchive iarchive(is); 396 iarchive(*this); 397 return true; 398 } 399 return false; 400 } 401 catch (const cereal::Exception& e) 402 { 403 error("deserialize exception: {ERROR}", "ERROR", e); 404 fs::remove(path); 405 return false; 406 } 407 } 408 409 Host::Transition Host::requestedHostTransition(Transition value) 410 { 411 info("Host state transition request of {REQ}", "REQ", value); 412 413 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY 414 if ((value != Transition::Off) && (!utils::isBmcReady(this->bus))) 415 { 416 info("BMC State is not Ready so no host on operations allowed"); 417 throw sdbusplus::xyz::openbmc_project::State::Host::Error:: 418 BMCNotReady(); 419 } 420 #endif 421 422 // If this is not a power off request then we need to 423 // decrement the reboot counter. This code should 424 // never prevent a power on, it should just decrement 425 // the count to 0. The quiesce handling is where the 426 // check of this count will occur 427 if (value != server::Host::Transition::Off) 428 { 429 decrementRebootCount(); 430 } 431 432 executeTransition(value); 433 434 auto retVal = server::Host::requestedHostTransition(value); 435 436 serialize(); 437 return retVal; 438 } 439 440 Host::ProgressStages Host::bootProgress(ProgressStages value) 441 { 442 auto retVal = bootprogress::Progress::bootProgress(value); 443 serialize(); 444 return retVal; 445 } 446 447 Host::OSStatus Host::operatingSystemState(OSStatus value) 448 { 449 auto retVal = osstatus::Status::operatingSystemState(value); 450 serialize(); 451 return retVal; 452 } 453 454 Host::HostState Host::currentHostState(HostState value) 455 { 456 info("Change to Host State: {STATE}", "STATE", value); 457 return server::Host::currentHostState(value); 458 } 459 460 } // namespace manager 461 } // namespace state 462 } // namespace phosphor 463