1 #include "config.h" 2 3 #include "host_state_manager.hpp" 4 5 #include "host_check.hpp" 6 #include "utils.hpp" 7 8 #include <stdio.h> 9 #include <systemd/sd-bus.h> 10 11 #include <cereal/archives/json.hpp> 12 #include <cereal/cereal.hpp> 13 #include <cereal/types/string.hpp> 14 #include <cereal/types/tuple.hpp> 15 #include <cereal/types/vector.hpp> 16 #include <phosphor-logging/elog-errors.hpp> 17 #include <phosphor-logging/lg2.hpp> 18 #include <sdbusplus/exception.hpp> 19 #include <sdbusplus/server.hpp> 20 #include <xyz/openbmc_project/Common/error.hpp> 21 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp> 22 #include <xyz/openbmc_project/State/Host/error.hpp> 23 24 #include <filesystem> 25 #include <format> 26 #include <fstream> 27 #include <iostream> 28 #include <map> 29 #include <set> 30 #include <string> 31 32 // Register class version with Cereal 33 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION) 34 35 namespace phosphor 36 { 37 namespace state 38 { 39 namespace manager 40 { 41 42 PHOSPHOR_LOG2_USING; 43 44 // When you see server:: or reboot:: you know we're referencing our base class 45 namespace server = sdbusplus::server::xyz::openbmc_project::state; 46 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot; 47 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot; 48 namespace osstatus = 49 sdbusplus::server::xyz::openbmc_project::state::operating_system; 50 using namespace phosphor::logging; 51 namespace fs = std::filesystem; 52 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 53 54 constexpr auto ACTIVE_STATE = "active"; 55 constexpr auto ACTIVATING_STATE = "activating"; 56 57 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; 58 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1"; 59 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager"; 60 61 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties"; 62 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit"; 63 64 void Host::determineInitialState() 65 { 66 if (stateActive(getTarget(server::Host::HostState::Running)) || 67 isHostRunning(id)) 68 { 69 info("Initial Host State will be Running"); 70 server::Host::currentHostState(HostState::Running); 71 server::Host::requestedHostTransition(Transition::On); 72 } 73 else 74 { 75 info("Initial Host State will be Off"); 76 server::Host::currentHostState(HostState::Off); 77 server::Host::requestedHostTransition(Transition::Off); 78 } 79 80 if (!deserialize()) 81 { 82 // set to default value. 83 server::Host::requestedHostTransition(Transition::Off); 84 } 85 return; 86 } 87 88 void Host::setupSupportedTransitions() 89 { 90 std::set<Transition> supportedTransitions = { 91 Transition::On, 92 Transition::Off, 93 Transition::Reboot, 94 Transition::GracefulWarmReboot, 95 #if ENABLE_FORCE_WARM_REBOOT 96 Transition::ForceWarmReboot, 97 #endif 98 }; 99 server::Host::allowedHostTransitions(supportedTransitions); 100 } 101 102 void Host::createSystemdTargetMaps() 103 { 104 stateTargetTable = { 105 {HostState::Off, std::format("obmc-host-stop@{}.target", id)}, 106 {HostState::Running, std::format("obmc-host-startmin@{}.target", id)}, 107 {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)}, 108 {HostState::DiagnosticMode, 109 std::format("obmc-host-diagnostic-mode@{}.target", id)}}; 110 111 transitionTargetTable = { 112 {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)}, 113 {Transition::On, std::format("obmc-host-start@{}.target", id)}, 114 {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)}, 115 // Some systems do not support a warm reboot so just map the reboot 116 // requests to our normal cold reboot in that case 117 #if ENABLE_WARM_REBOOT 118 {Transition::GracefulWarmReboot, 119 std::format("obmc-host-warm-reboot@{}.target", id)}, 120 {Transition::ForceWarmReboot, 121 std::format("obmc-host-force-warm-reboot@{}.target", id)} 122 }; 123 #else 124 {Transition::GracefulWarmReboot, 125 std::format("obmc-host-reboot@{}.target", id)}, 126 {Transition::ForceWarmReboot, 127 std::format("obmc-host-reboot@{}.target", id)} 128 }; 129 #endif 130 hostCrashTarget = std::format("obmc-host-crash@{}.target", id); 131 } 132 133 const std::string& Host::getTarget(HostState state) 134 { 135 return stateTargetTable[state]; 136 }; 137 138 const std::string& Host::getTarget(Transition tranReq) 139 { 140 return transitionTargetTable[tranReq]; 141 }; 142 143 void Host::executeTransition(Transition tranReq) 144 { 145 auto& sysdUnit = getTarget(tranReq); 146 147 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 148 SYSTEMD_INTERFACE, "StartUnit"); 149 150 method.append(sysdUnit); 151 method.append("replace"); 152 153 this->bus.call_noreply(method); 154 155 return; 156 } 157 158 bool Host::stateActive(const std::string& target) 159 { 160 std::variant<std::string> currentState; 161 sdbusplus::message::object_path unitTargetPath; 162 163 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 164 SYSTEMD_INTERFACE, "GetUnit"); 165 166 method.append(target); 167 168 try 169 { 170 auto result = this->bus.call(method); 171 result.read(unitTargetPath); 172 } 173 catch (const sdbusplus::exception_t& e) 174 { 175 error("Error in GetUnit call: {ERROR}", "ERROR", e); 176 return false; 177 } 178 179 method = this->bus.new_method_call( 180 SYSTEMD_SERVICE, 181 static_cast<const std::string&>(unitTargetPath).c_str(), 182 SYSTEMD_PROPERTY_IFACE, "Get"); 183 184 method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState"); 185 186 try 187 { 188 auto result = this->bus.call(method); 189 result.read(currentState); 190 } 191 catch (const sdbusplus::exception_t& e) 192 { 193 error("Error in ActiveState Get: {ERROR}", "ERROR", e); 194 return false; 195 } 196 197 const auto& currentStateStr = std::get<std::string>(currentState); 198 return currentStateStr == ACTIVE_STATE || 199 currentStateStr == ACTIVATING_STATE; 200 } 201 202 bool Host::isAutoReboot() 203 { 204 using namespace settings; 205 206 /* The logic here is to first check the one-time AutoReboot setting. 207 * If this property is true (the default) then look at the persistent 208 * user setting in the non one-time object, otherwise honor the one-time 209 * setting and do not auto reboot. 210 */ 211 auto methodOneTime = bus.new_method_call( 212 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 213 settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 214 methodOneTime.append(autoRebootIntf, "AutoReboot"); 215 216 auto methodUserSetting = bus.new_method_call( 217 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 218 settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 219 methodUserSetting.append(autoRebootIntf, "AutoReboot"); 220 221 try 222 { 223 auto reply = bus.call(methodOneTime); 224 std::variant<bool> result; 225 reply.read(result); 226 auto autoReboot = std::get<bool>(result); 227 228 if (!autoReboot) 229 { 230 info("Auto reboot (one-time) disabled"); 231 return false; 232 } 233 else 234 { 235 // one-time is true so read the user setting 236 reply = bus.call(methodUserSetting); 237 reply.read(result); 238 autoReboot = std::get<bool>(result); 239 } 240 241 auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft(); 242 243 if (autoReboot) 244 { 245 if (rebootCounterParam > 0) 246 { 247 // Reduce BOOTCOUNT by 1 248 info( 249 "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting", 250 "BOOTCOUNT", rebootCounterParam); 251 return true; 252 } 253 else 254 { 255 // We are at 0 so reset reboot counter and go to quiesce state 256 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0"); 257 attemptsLeft(reboot::RebootAttempts::retryAttempts()); 258 259 // Generate log since we will now be sitting in Quiesce 260 const std::string errorMsg = 261 "xyz.openbmc_project.State.Error.HostQuiesce"; 262 utils::createError(this->bus, errorMsg, 263 sdbusplus::xyz::openbmc_project::Logging:: 264 server::Entry::Level::Critical); 265 266 // Generate BMC dump to assist with debug 267 utils::createBmcDump(this->bus); 268 269 return false; 270 } 271 } 272 else 273 { 274 info("Auto reboot disabled."); 275 return false; 276 } 277 } 278 catch (const sdbusplus::exception_t& e) 279 { 280 error("Error in AutoReboot Get, {ERROR}", "ERROR", e); 281 return false; 282 } 283 } 284 285 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg) 286 { 287 uint32_t newStateID{}; 288 sdbusplus::message::object_path newStateObjPath; 289 std::string newStateUnit{}; 290 std::string newStateResult{}; 291 292 // Read the msg and populate each variable 293 msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult); 294 295 if ((newStateUnit == getTarget(server::Host::HostState::Off)) && 296 (newStateResult == "done") && 297 (!stateActive(getTarget(server::Host::HostState::Running)))) 298 { 299 info("Received signal that host is off"); 300 this->currentHostState(server::Host::HostState::Off); 301 this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified); 302 this->operatingSystemState(osstatus::Status::OSStatus::Inactive); 303 } 304 else if ((newStateUnit == getTarget(server::Host::HostState::Running)) && 305 (newStateResult == "done") && 306 (stateActive(getTarget(server::Host::HostState::Running)))) 307 { 308 info("Received signal that host is running"); 309 this->currentHostState(server::Host::HostState::Running); 310 311 // Remove temporary file which is utilized for scenarios where the 312 // BMC is rebooted while the host is still up. 313 // This file is used to indicate to host related systemd services 314 // that the host is already running and they should skip running. 315 // Once the host state is back to running we can clear this file. 316 std::string hostFile = std::format(HOST_RUNNING_FILE, 0); 317 if (std::filesystem::exists(hostFile)) 318 { 319 std::filesystem::remove(hostFile); 320 } 321 } 322 else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) && 323 (newStateResult == "done") && 324 (stateActive(getTarget(server::Host::HostState::Quiesced)))) 325 { 326 if (Host::isAutoReboot()) 327 { 328 info("Beginning reboot..."); 329 Host::requestedHostTransition(server::Host::Transition::Reboot); 330 } 331 else 332 { 333 info("Maintaining quiesce"); 334 this->currentHostState(server::Host::HostState::Quiesced); 335 } 336 } 337 } 338 339 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg) 340 { 341 uint32_t newStateID{}; 342 sdbusplus::message::object_path newStateObjPath; 343 std::string newStateUnit{}; 344 345 // Read the msg and populate each variable 346 msg.read(newStateID, newStateObjPath, newStateUnit); 347 348 if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode)) 349 { 350 info("Received signal that host is in diagnostice mode"); 351 this->currentHostState(server::Host::HostState::DiagnosticMode); 352 } 353 else if ((newStateUnit == hostCrashTarget) && 354 (server::Host::currentHostState() == 355 server::Host::HostState::Running)) 356 { 357 // Only decrease the boot count if host was running when the host crash 358 // target was started. Systemd will sometimes trigger multiple 359 // JobNew events for the same target. This seems to be related to 360 // how OpenBMC utilizes the targets in the reboot scenario 361 info("Received signal that host has crashed, decrement reboot count"); 362 363 // A host crash can cause a reboot of the host so decrement the reboot 364 // count 365 decrementRebootCount(); 366 } 367 } 368 369 uint32_t Host::decrementRebootCount() 370 { 371 auto rebootCount = reboot::RebootAttempts::attemptsLeft(); 372 if (rebootCount > 0) 373 { 374 return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1)); 375 } 376 return rebootCount; 377 } 378 379 fs::path Host::serialize() 380 { 381 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)}; 382 std::ofstream os(path.c_str(), std::ios::binary); 383 cereal::JSONOutputArchive oarchive(os); 384 oarchive(*this); 385 return path; 386 } 387 388 bool Host::deserialize() 389 { 390 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)}; 391 try 392 { 393 if (fs::exists(path)) 394 { 395 std::ifstream is(path.c_str(), std::ios::in | std::ios::binary); 396 cereal::JSONInputArchive iarchive(is); 397 iarchive(*this); 398 return true; 399 } 400 return false; 401 } 402 catch (const cereal::Exception& e) 403 { 404 error("deserialize exception: {ERROR}", "ERROR", e); 405 fs::remove(path); 406 return false; 407 } 408 } 409 410 Host::Transition Host::requestedHostTransition(Transition value) 411 { 412 info("Host state transition request of {REQ}", "REQ", value); 413 414 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY 415 if ((value != Transition::Off) && (!utils::isBmcReady(this->bus))) 416 { 417 info("BMC State is not Ready so no host on operations allowed"); 418 throw sdbusplus::xyz::openbmc_project::State::Host::Error:: 419 BMCNotReady(); 420 } 421 #endif 422 423 // If this is not a power off request then we need to 424 // decrement the reboot counter. This code should 425 // never prevent a power on, it should just decrement 426 // the count to 0. The quiesce handling is where the 427 // check of this count will occur 428 if (value != server::Host::Transition::Off) 429 { 430 decrementRebootCount(); 431 } 432 433 executeTransition(value); 434 435 auto retVal = server::Host::requestedHostTransition(value); 436 437 serialize(); 438 return retVal; 439 } 440 441 Host::ProgressStages Host::bootProgress(ProgressStages value) 442 { 443 auto retVal = bootprogress::Progress::bootProgress(value); 444 serialize(); 445 return retVal; 446 } 447 448 Host::OSStatus Host::operatingSystemState(OSStatus value) 449 { 450 auto retVal = osstatus::Status::operatingSystemState(value); 451 serialize(); 452 return retVal; 453 } 454 455 Host::HostState Host::currentHostState(HostState value) 456 { 457 info("Change to Host State: {STATE}", "STATE", value); 458 return server::Host::currentHostState(value); 459 } 460 461 } // namespace manager 462 } // namespace state 463 } // namespace phosphor 464