1 #include "config.h" 2 3 #include "host_state_manager.hpp" 4 5 #include "host_check.hpp" 6 #include "utils.hpp" 7 8 #include <fmt/format.h> 9 #include <stdio.h> 10 #include <systemd/sd-bus.h> 11 12 #include <cereal/archives/json.hpp> 13 #include <cereal/cereal.hpp> 14 #include <cereal/types/string.hpp> 15 #include <cereal/types/tuple.hpp> 16 #include <cereal/types/vector.hpp> 17 #include <phosphor-logging/elog-errors.hpp> 18 #include <phosphor-logging/lg2.hpp> 19 #include <sdbusplus/exception.hpp> 20 #include <sdbusplus/server.hpp> 21 #include <xyz/openbmc_project/Common/error.hpp> 22 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp> 23 #include <xyz/openbmc_project/State/Host/error.hpp> 24 25 #include <filesystem> 26 #include <fstream> 27 #include <iostream> 28 #include <map> 29 #include <string> 30 31 // Register class version with Cereal 32 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION) 33 34 namespace phosphor 35 { 36 namespace state 37 { 38 namespace manager 39 { 40 41 PHOSPHOR_LOG2_USING; 42 43 // When you see server:: or reboot:: you know we're referencing our base class 44 namespace server = sdbusplus::server::xyz::openbmc_project::state; 45 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot; 46 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot; 47 namespace osstatus = 48 sdbusplus::server::xyz::openbmc_project::state::operating_system; 49 using namespace phosphor::logging; 50 namespace fs = std::filesystem; 51 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 52 53 constexpr auto ACTIVE_STATE = "active"; 54 constexpr auto ACTIVATING_STATE = "activating"; 55 56 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; 57 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1"; 58 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager"; 59 60 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties"; 61 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit"; 62 63 void Host::determineInitialState() 64 { 65 if (stateActive(getTarget(server::Host::HostState::Running)) || 66 isHostRunning(id)) 67 { 68 info("Initial Host State will be Running"); 69 server::Host::currentHostState(HostState::Running); 70 server::Host::requestedHostTransition(Transition::On); 71 } 72 else 73 { 74 info("Initial Host State will be Off"); 75 server::Host::currentHostState(HostState::Off); 76 server::Host::requestedHostTransition(Transition::Off); 77 } 78 79 if (!deserialize()) 80 { 81 // set to default value. 82 server::Host::requestedHostTransition(Transition::Off); 83 } 84 return; 85 } 86 87 void Host::createSystemdTargetMaps() 88 { 89 stateTargetTable = { 90 {HostState::Off, fmt::format("obmc-host-stop@{}.target", id)}, 91 {HostState::Running, fmt::format("obmc-host-startmin@{}.target", id)}, 92 {HostState::Quiesced, fmt::format("obmc-host-quiesce@{}.target", id)}, 93 {HostState::DiagnosticMode, 94 fmt::format("obmc-host-diagnostic-mode@{}.target", id)}}; 95 96 transitionTargetTable = { 97 {Transition::Off, fmt::format("obmc-host-shutdown@{}.target", id)}, 98 {Transition::On, fmt::format("obmc-host-start@{}.target", id)}, 99 {Transition::Reboot, fmt::format("obmc-host-reboot@{}.target", id)}, 100 // Some systems do not support a warm reboot so just map the reboot 101 // requests to our normal cold reboot in that case 102 #if ENABLE_WARM_REBOOT 103 {Transition::GracefulWarmReboot, 104 fmt::format("obmc-host-warm-reboot@{}.target", id)}, 105 {Transition::ForceWarmReboot, 106 fmt::format("obmc-host-force-warm-reboot@{}.target", id)} 107 }; 108 #else 109 {Transition::GracefulWarmReboot, 110 fmt::format("obmc-host-reboot@{}.target", id)}, 111 {Transition::ForceWarmReboot, 112 fmt::format("obmc-host-reboot@{}.target", id)} 113 }; 114 #endif 115 hostCrashTarget = fmt::format("obmc-host-crash@{}.target", id); 116 } 117 118 const std::string& Host::getTarget(HostState state) 119 { 120 return stateTargetTable[state]; 121 }; 122 123 const std::string& Host::getTarget(Transition tranReq) 124 { 125 return transitionTargetTable[tranReq]; 126 }; 127 128 void Host::executeTransition(Transition tranReq) 129 { 130 auto& sysdUnit = getTarget(tranReq); 131 132 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 133 SYSTEMD_INTERFACE, "StartUnit"); 134 135 method.append(sysdUnit); 136 method.append("replace"); 137 138 this->bus.call_noreply(method); 139 140 return; 141 } 142 143 bool Host::stateActive(const std::string& target) 144 { 145 std::variant<std::string> currentState; 146 sdbusplus::message::object_path unitTargetPath; 147 148 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 149 SYSTEMD_INTERFACE, "GetUnit"); 150 151 method.append(target); 152 153 try 154 { 155 auto result = this->bus.call(method); 156 result.read(unitTargetPath); 157 } 158 catch (const sdbusplus::exception_t& e) 159 { 160 error("Error in GetUnit call: {ERROR}", "ERROR", e); 161 return false; 162 } 163 164 method = this->bus.new_method_call( 165 SYSTEMD_SERVICE, 166 static_cast<const std::string&>(unitTargetPath).c_str(), 167 SYSTEMD_PROPERTY_IFACE, "Get"); 168 169 method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState"); 170 171 try 172 { 173 auto result = this->bus.call(method); 174 result.read(currentState); 175 } 176 catch (const sdbusplus::exception_t& e) 177 { 178 error("Error in ActiveState Get: {ERROR}", "ERROR", e); 179 return false; 180 } 181 182 const auto& currentStateStr = std::get<std::string>(currentState); 183 return currentStateStr == ACTIVE_STATE || 184 currentStateStr == ACTIVATING_STATE; 185 } 186 187 bool Host::isAutoReboot() 188 { 189 using namespace settings; 190 191 /* The logic here is to first check the one-time AutoReboot setting. 192 * If this property is true (the default) then look at the persistent 193 * user setting in the non one-time object, otherwise honor the one-time 194 * setting and do not auto reboot. 195 */ 196 auto methodOneTime = bus.new_method_call( 197 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 198 settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 199 methodOneTime.append(autoRebootIntf, "AutoReboot"); 200 201 auto methodUserSetting = bus.new_method_call( 202 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 203 settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 204 methodUserSetting.append(autoRebootIntf, "AutoReboot"); 205 206 try 207 { 208 auto reply = bus.call(methodOneTime); 209 std::variant<bool> result; 210 reply.read(result); 211 auto autoReboot = std::get<bool>(result); 212 213 if (!autoReboot) 214 { 215 info("Auto reboot (one-time) disabled"); 216 return false; 217 } 218 else 219 { 220 // one-time is true so read the user setting 221 reply = bus.call(methodUserSetting); 222 reply.read(result); 223 autoReboot = std::get<bool>(result); 224 } 225 226 auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft(); 227 228 if (autoReboot) 229 { 230 if (rebootCounterParam > 0) 231 { 232 // Reduce BOOTCOUNT by 1 233 info( 234 "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting", 235 "BOOTCOUNT", rebootCounterParam); 236 return true; 237 } 238 else 239 { 240 // We are at 0 so reset reboot counter and go to quiesce state 241 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0"); 242 attemptsLeft(reboot::RebootAttempts::retryAttempts()); 243 244 // Generate log since we will now be sitting in Quiesce 245 const std::string errorMsg = 246 "xyz.openbmc_project.State.Error.HostQuiesce"; 247 utils::createError(this->bus, errorMsg, 248 sdbusplus::xyz::openbmc_project::Logging:: 249 server::Entry::Level::Critical); 250 251 // Generate BMC dump to assist with debug 252 utils::createBmcDump(this->bus); 253 254 return false; 255 } 256 } 257 else 258 { 259 info("Auto reboot disabled."); 260 return false; 261 } 262 } 263 catch (const sdbusplus::exception_t& e) 264 { 265 error("Error in AutoReboot Get, {ERROR}", "ERROR", e); 266 return false; 267 } 268 } 269 270 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg) 271 { 272 uint32_t newStateID{}; 273 sdbusplus::message::object_path newStateObjPath; 274 std::string newStateUnit{}; 275 std::string newStateResult{}; 276 277 // Read the msg and populate each variable 278 msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult); 279 280 if ((newStateUnit == getTarget(server::Host::HostState::Off)) && 281 (newStateResult == "done") && 282 (!stateActive(getTarget(server::Host::HostState::Running)))) 283 { 284 info("Received signal that host is off"); 285 this->currentHostState(server::Host::HostState::Off); 286 this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified); 287 this->operatingSystemState(osstatus::Status::OSStatus::Inactive); 288 } 289 else if ((newStateUnit == getTarget(server::Host::HostState::Running)) && 290 (newStateResult == "done") && 291 (stateActive(getTarget(server::Host::HostState::Running)))) 292 { 293 info("Received signal that host is running"); 294 this->currentHostState(server::Host::HostState::Running); 295 296 // Remove temporary file which is utilized for scenarios where the 297 // BMC is rebooted while the host is still up. 298 // This file is used to indicate to host related systemd services 299 // that the host is already running and they should skip running. 300 // Once the host state is back to running we can clear this file. 301 auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0); 302 size++; // null 303 std::unique_ptr<char[]> hostFile(new char[size]); 304 std::snprintf(hostFile.get(), size, HOST_RUNNING_FILE, 0); 305 if (std::filesystem::exists(hostFile.get())) 306 { 307 std::filesystem::remove(hostFile.get()); 308 } 309 } 310 else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) && 311 (newStateResult == "done") && 312 (stateActive(getTarget(server::Host::HostState::Quiesced)))) 313 { 314 if (Host::isAutoReboot()) 315 { 316 info("Beginning reboot..."); 317 Host::requestedHostTransition(server::Host::Transition::Reboot); 318 } 319 else 320 { 321 info("Maintaining quiesce"); 322 this->currentHostState(server::Host::HostState::Quiesced); 323 } 324 } 325 } 326 327 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg) 328 { 329 uint32_t newStateID{}; 330 sdbusplus::message::object_path newStateObjPath; 331 std::string newStateUnit{}; 332 333 // Read the msg and populate each variable 334 msg.read(newStateID, newStateObjPath, newStateUnit); 335 336 if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode)) 337 { 338 info("Received signal that host is in diagnostice mode"); 339 this->currentHostState(server::Host::HostState::DiagnosticMode); 340 } 341 else if ((newStateUnit == hostCrashTarget) && 342 (server::Host::currentHostState() == 343 server::Host::HostState::Running)) 344 { 345 // Only decrease the boot count if host was running when the host crash 346 // target was started. Systemd will sometimes trigger multiple 347 // JobNew events for the same target. This seems to be related to 348 // how OpenBMC utilizes the targets in the reboot scenario 349 info("Received signal that host has crashed, decrement reboot count"); 350 351 // A host crash can cause a reboot of the host so decrement the reboot 352 // count 353 decrementRebootCount(); 354 } 355 } 356 357 uint32_t Host::decrementRebootCount() 358 { 359 auto rebootCount = reboot::RebootAttempts::attemptsLeft(); 360 if (rebootCount > 0) 361 { 362 return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1)); 363 } 364 return rebootCount; 365 } 366 367 fs::path Host::serialize() 368 { 369 fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)}; 370 std::ofstream os(path.c_str(), std::ios::binary); 371 cereal::JSONOutputArchive oarchive(os); 372 oarchive(*this); 373 return path; 374 } 375 376 bool Host::deserialize() 377 { 378 fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)}; 379 try 380 { 381 if (fs::exists(path)) 382 { 383 std::ifstream is(path.c_str(), std::ios::in | std::ios::binary); 384 cereal::JSONInputArchive iarchive(is); 385 iarchive(*this); 386 return true; 387 } 388 return false; 389 } 390 catch (const cereal::Exception& e) 391 { 392 error("deserialize exception: {ERROR}", "ERROR", e); 393 fs::remove(path); 394 return false; 395 } 396 } 397 398 Host::Transition Host::requestedHostTransition(Transition value) 399 { 400 info("Host state transition request of {REQ}", "REQ", value); 401 402 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY 403 if ((value != Transition::Off) && (!utils::isBmcReady(this->bus))) 404 { 405 info("BMC State is not Ready so no host on operations allowed"); 406 throw sdbusplus::xyz::openbmc_project::State::Host::Error:: 407 BMCNotReady(); 408 } 409 #endif 410 411 // If this is not a power off request then we need to 412 // decrement the reboot counter. This code should 413 // never prevent a power on, it should just decrement 414 // the count to 0. The quiesce handling is where the 415 // check of this count will occur 416 if (value != server::Host::Transition::Off) 417 { 418 decrementRebootCount(); 419 } 420 421 executeTransition(value); 422 423 auto retVal = server::Host::requestedHostTransition(value); 424 425 serialize(); 426 return retVal; 427 } 428 429 Host::ProgressStages Host::bootProgress(ProgressStages value) 430 { 431 auto retVal = bootprogress::Progress::bootProgress(value); 432 serialize(); 433 return retVal; 434 } 435 436 Host::OSStatus Host::operatingSystemState(OSStatus value) 437 { 438 auto retVal = osstatus::Status::operatingSystemState(value); 439 serialize(); 440 return retVal; 441 } 442 443 Host::HostState Host::currentHostState(HostState value) 444 { 445 info("Change to Host State: {STATE}", "STATE", value); 446 return server::Host::currentHostState(value); 447 } 448 449 } // namespace manager 450 } // namespace state 451 } // namespace phosphor 452