1 #include "config.h" 2 3 #include "host_state_manager.hpp" 4 5 #include "host_check.hpp" 6 #include "utils.hpp" 7 8 #include <fmt/format.h> 9 #include <stdio.h> 10 #include <systemd/sd-bus.h> 11 12 #include <cereal/archives/json.hpp> 13 #include <cereal/cereal.hpp> 14 #include <cereal/types/string.hpp> 15 #include <cereal/types/tuple.hpp> 16 #include <cereal/types/vector.hpp> 17 #include <phosphor-logging/elog-errors.hpp> 18 #include <phosphor-logging/lg2.hpp> 19 #include <sdbusplus/exception.hpp> 20 #include <sdbusplus/server.hpp> 21 #include <xyz/openbmc_project/Common/error.hpp> 22 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp> 23 24 #include <filesystem> 25 #include <fstream> 26 #include <iostream> 27 #include <map> 28 #include <string> 29 30 // Register class version with Cereal 31 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION) 32 33 namespace phosphor 34 { 35 namespace state 36 { 37 namespace manager 38 { 39 40 PHOSPHOR_LOG2_USING; 41 42 // When you see server:: or reboot:: you know we're referencing our base class 43 namespace server = sdbusplus::xyz::openbmc_project::State::server; 44 namespace reboot = sdbusplus::xyz::openbmc_project::Control::Boot::server; 45 namespace bootprogress = sdbusplus::xyz::openbmc_project::State::Boot::server; 46 namespace osstatus = 47 sdbusplus::xyz::openbmc_project::State::OperatingSystem::server; 48 using namespace phosphor::logging; 49 namespace fs = std::filesystem; 50 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 51 52 constexpr auto ACTIVE_STATE = "active"; 53 constexpr auto ACTIVATING_STATE = "activating"; 54 55 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1"; 56 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1"; 57 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager"; 58 59 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties"; 60 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit"; 61 62 void Host::determineInitialState() 63 { 64 if (stateActive(getTarget(server::Host::HostState::Running)) || 65 isHostRunning(id)) 66 { 67 info("Initial Host State will be Running"); 68 server::Host::currentHostState(HostState::Running); 69 server::Host::requestedHostTransition(Transition::On); 70 } 71 else 72 { 73 info("Initial Host State will be Off"); 74 server::Host::currentHostState(HostState::Off); 75 server::Host::requestedHostTransition(Transition::Off); 76 } 77 78 if (!deserialize()) 79 { 80 // set to default value. 81 server::Host::requestedHostTransition(Transition::Off); 82 } 83 return; 84 } 85 86 void Host::createSystemdTargetMaps() 87 { 88 stateTargetTable = { 89 {HostState::Off, fmt::format("obmc-host-stop@{}.target", id)}, 90 {HostState::Running, fmt::format("obmc-host-startmin@{}.target", id)}, 91 {HostState::Quiesced, fmt::format("obmc-host-quiesce@{}.target", id)}, 92 {HostState::DiagnosticMode, 93 fmt::format("obmc-host-diagnostic-mode@{}.target", id)}}; 94 95 transitionTargetTable = { 96 {Transition::Off, fmt::format("obmc-host-shutdown@{}.target", id)}, 97 {Transition::On, fmt::format("obmc-host-start@{}.target", id)}, 98 {Transition::Reboot, fmt::format("obmc-host-reboot@{}.target", id)}, 99 // Some systems do not support a warm reboot so just map the reboot 100 // requests to our normal cold reboot in that case 101 #if ENABLE_WARM_REBOOT 102 {Transition::GracefulWarmReboot, 103 fmt::format("obmc-host-warm-reboot@{}.target", id)}, 104 {Transition::ForceWarmReboot, 105 fmt::format("obmc-host-force-warm-reboot@{}.target", id)} 106 }; 107 #else 108 {Transition::GracefulWarmReboot, 109 fmt::format("obmc-host-reboot@{}.target", id)}, 110 {Transition::ForceWarmReboot, 111 fmt::format("obmc-host-reboot@{}.target", id)} 112 }; 113 #endif 114 hostCrashTarget = fmt::format("obmc-host-crash@{}.target", id); 115 } 116 117 const std::string& Host::getTarget(HostState state) 118 { 119 return stateTargetTable[state]; 120 }; 121 122 const std::string& Host::getTarget(Transition tranReq) 123 { 124 return transitionTargetTable[tranReq]; 125 }; 126 127 void Host::executeTransition(Transition tranReq) 128 { 129 auto& sysdUnit = getTarget(tranReq); 130 131 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 132 SYSTEMD_INTERFACE, "StartUnit"); 133 134 method.append(sysdUnit); 135 method.append("replace"); 136 137 this->bus.call_noreply(method); 138 139 return; 140 } 141 142 bool Host::stateActive(const std::string& target) 143 { 144 std::variant<std::string> currentState; 145 sdbusplus::message::object_path unitTargetPath; 146 147 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH, 148 SYSTEMD_INTERFACE, "GetUnit"); 149 150 method.append(target); 151 152 try 153 { 154 auto result = this->bus.call(method); 155 result.read(unitTargetPath); 156 } 157 catch (const sdbusplus::exception_t& e) 158 { 159 error("Error in GetUnit call: {ERROR}", "ERROR", e); 160 return false; 161 } 162 163 method = this->bus.new_method_call( 164 SYSTEMD_SERVICE, 165 static_cast<const std::string&>(unitTargetPath).c_str(), 166 SYSTEMD_PROPERTY_IFACE, "Get"); 167 168 method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState"); 169 170 try 171 { 172 auto result = this->bus.call(method); 173 result.read(currentState); 174 } 175 catch (const sdbusplus::exception_t& e) 176 { 177 error("Error in ActiveState Get: {ERROR}", "ERROR", e); 178 return false; 179 } 180 181 const auto& currentStateStr = std::get<std::string>(currentState); 182 return currentStateStr == ACTIVE_STATE || 183 currentStateStr == ACTIVATING_STATE; 184 } 185 186 bool Host::isAutoReboot() 187 { 188 using namespace settings; 189 190 /* The logic here is to first check the one-time AutoReboot setting. 191 * If this property is true (the default) then look at the persistent 192 * user setting in the non one-time object, otherwise honor the one-time 193 * setting and do not auto reboot. 194 */ 195 auto methodOneTime = bus.new_method_call( 196 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 197 settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 198 methodOneTime.append(autoRebootIntf, "AutoReboot"); 199 200 auto methodUserSetting = bus.new_method_call( 201 settings.service(settings.autoReboot, autoRebootIntf).c_str(), 202 settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get"); 203 methodUserSetting.append(autoRebootIntf, "AutoReboot"); 204 205 try 206 { 207 auto reply = bus.call(methodOneTime); 208 std::variant<bool> result; 209 reply.read(result); 210 auto autoReboot = std::get<bool>(result); 211 212 if (!autoReboot) 213 { 214 info("Auto reboot (one-time) disabled"); 215 return false; 216 } 217 else 218 { 219 // one-time is true so read the user setting 220 reply = bus.call(methodUserSetting); 221 reply.read(result); 222 autoReboot = std::get<bool>(result); 223 } 224 225 auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft(); 226 227 if (autoReboot) 228 { 229 if (rebootCounterParam > 0) 230 { 231 // Reduce BOOTCOUNT by 1 232 info( 233 "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting", 234 "BOOTCOUNT", rebootCounterParam); 235 return true; 236 } 237 else 238 { 239 // We are at 0 so reset reboot counter and go to quiesce state 240 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0"); 241 attemptsLeft(reboot::RebootAttempts::retryAttempts()); 242 243 // Generate log since we will now be sitting in Quiesce 244 const std::string errorMsg = 245 "xyz.openbmc_project.State.Error.HostQuiesce"; 246 utils::createError(this->bus, errorMsg, 247 sdbusplus::xyz::openbmc_project::Logging:: 248 server::Entry::Level::Critical); 249 250 // Generate BMC dump to assist with debug 251 utils::createBmcDump(this->bus); 252 253 return false; 254 } 255 } 256 else 257 { 258 info("Auto reboot disabled."); 259 return false; 260 } 261 } 262 catch (const sdbusplus::exception_t& e) 263 { 264 error("Error in AutoReboot Get, {ERROR}", "ERROR", e); 265 return false; 266 } 267 } 268 269 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg) 270 { 271 uint32_t newStateID{}; 272 sdbusplus::message::object_path newStateObjPath; 273 std::string newStateUnit{}; 274 std::string newStateResult{}; 275 276 // Read the msg and populate each variable 277 msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult); 278 279 if ((newStateUnit == getTarget(server::Host::HostState::Off)) && 280 (newStateResult == "done") && 281 (!stateActive(getTarget(server::Host::HostState::Running)))) 282 { 283 info("Received signal that host is off"); 284 this->currentHostState(server::Host::HostState::Off); 285 this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified); 286 this->operatingSystemState(osstatus::Status::OSStatus::Inactive); 287 } 288 else if ((newStateUnit == getTarget(server::Host::HostState::Running)) && 289 (newStateResult == "done") && 290 (stateActive(getTarget(server::Host::HostState::Running)))) 291 { 292 info("Received signal that host is running"); 293 this->currentHostState(server::Host::HostState::Running); 294 295 // Remove temporary file which is utilized for scenarios where the 296 // BMC is rebooted while the host is still up. 297 // This file is used to indicate to host related systemd services 298 // that the host is already running and they should skip running. 299 // Once the host state is back to running we can clear this file. 300 auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0); 301 size++; // null 302 std::unique_ptr<char[]> hostFile(new char[size]); 303 std::snprintf(hostFile.get(), size, HOST_RUNNING_FILE, 0); 304 if (std::filesystem::exists(hostFile.get())) 305 { 306 std::filesystem::remove(hostFile.get()); 307 } 308 } 309 else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) && 310 (newStateResult == "done") && 311 (stateActive(getTarget(server::Host::HostState::Quiesced)))) 312 { 313 if (Host::isAutoReboot()) 314 { 315 info("Beginning reboot..."); 316 Host::requestedHostTransition(server::Host::Transition::Reboot); 317 } 318 else 319 { 320 info("Maintaining quiesce"); 321 this->currentHostState(server::Host::HostState::Quiesced); 322 } 323 } 324 } 325 326 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg) 327 { 328 uint32_t newStateID{}; 329 sdbusplus::message::object_path newStateObjPath; 330 std::string newStateUnit{}; 331 332 // Read the msg and populate each variable 333 msg.read(newStateID, newStateObjPath, newStateUnit); 334 335 if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode)) 336 { 337 info("Received signal that host is in diagnostice mode"); 338 this->currentHostState(server::Host::HostState::DiagnosticMode); 339 } 340 else if ((newStateUnit == hostCrashTarget) && 341 (server::Host::currentHostState() == 342 server::Host::HostState::Running)) 343 { 344 // Only decrease the boot count if host was running when the host crash 345 // target was started. Systemd will sometimes trigger multiple 346 // JobNew events for the same target. This seems to be related to 347 // how OpenBMC utilizes the targets in the reboot scenario 348 info("Received signal that host has crashed, decrement reboot count"); 349 350 // A host crash can cause a reboot of the host so decrement the reboot 351 // count 352 decrementRebootCount(); 353 } 354 } 355 356 uint32_t Host::decrementRebootCount() 357 { 358 auto rebootCount = reboot::RebootAttempts::attemptsLeft(); 359 if (rebootCount > 0) 360 { 361 return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1)); 362 } 363 return rebootCount; 364 } 365 366 fs::path Host::serialize() 367 { 368 fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)}; 369 std::ofstream os(path.c_str(), std::ios::binary); 370 cereal::JSONOutputArchive oarchive(os); 371 oarchive(*this); 372 return path; 373 } 374 375 bool Host::deserialize() 376 { 377 fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)}; 378 try 379 { 380 if (fs::exists(path)) 381 { 382 std::ifstream is(path.c_str(), std::ios::in | std::ios::binary); 383 cereal::JSONInputArchive iarchive(is); 384 iarchive(*this); 385 return true; 386 } 387 return false; 388 } 389 catch (const cereal::Exception& e) 390 { 391 error("deserialize exception: {ERROR}", "ERROR", e); 392 fs::remove(path); 393 return false; 394 } 395 } 396 397 Host::Transition Host::requestedHostTransition(Transition value) 398 { 399 info("Host state transition request of {REQ}", "REQ", value); 400 // If this is not a power off request then we need to 401 // decrement the reboot counter. This code should 402 // never prevent a power on, it should just decrement 403 // the count to 0. The quiesce handling is where the 404 // check of this count will occur 405 if (value != server::Host::Transition::Off) 406 { 407 decrementRebootCount(); 408 } 409 410 executeTransition(value); 411 412 auto retVal = server::Host::requestedHostTransition(value); 413 414 serialize(); 415 return retVal; 416 } 417 418 Host::ProgressStages Host::bootProgress(ProgressStages value) 419 { 420 auto retVal = bootprogress::Progress::bootProgress(value); 421 serialize(); 422 return retVal; 423 } 424 425 Host::OSStatus Host::operatingSystemState(OSStatus value) 426 { 427 auto retVal = osstatus::Status::operatingSystemState(value); 428 serialize(); 429 return retVal; 430 } 431 432 Host::HostState Host::currentHostState(HostState value) 433 { 434 info("Change to Host State: {STATE}", "STATE", value); 435 return server::Host::currentHostState(value); 436 } 437 438 } // namespace manager 439 } // namespace state 440 } // namespace phosphor 441