1 #include "config.h"
2
3 #include "host_state_manager.hpp"
4
5 #include "host_check.hpp"
6 #include "utils.hpp"
7
8 #include <systemd/sd-bus.h>
9
10 #include <cereal/archives/json.hpp>
11 #include <cereal/cereal.hpp>
12 #include <cereal/types/string.hpp>
13 #include <cereal/types/tuple.hpp>
14 #include <cereal/types/vector.hpp>
15 #include <phosphor-logging/elog-errors.hpp>
16 #include <phosphor-logging/lg2.hpp>
17 #include <sdbusplus/exception.hpp>
18 #include <sdbusplus/server.hpp>
19 #include <xyz/openbmc_project/Common/error.hpp>
20 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
21 #include <xyz/openbmc_project/State/Host/error.hpp>
22
23 #include <chrono>
24 #include <filesystem>
25 #include <format>
26 #include <fstream>
27 #include <iostream>
28 #include <map>
29 #include <set>
30 #include <string>
31
32 // Register class version with Cereal
33 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
34
35 namespace phosphor
36 {
37 namespace state
38 {
39 namespace manager
40 {
41
42 PHOSPHOR_LOG2_USING;
43
44 // When you see server:: or reboot:: you know we're referencing our base class
45 namespace server = sdbusplus::server::xyz::openbmc_project::state;
46 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot;
47 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot;
48 namespace osstatus =
49 sdbusplus::server::xyz::openbmc_project::state::operating_system;
50 using namespace phosphor::logging;
51 namespace fs = std::filesystem;
52 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
53
54 constexpr auto ACTIVE_STATE = "active";
55 constexpr auto ACTIVATING_STATE = "activating";
56
57 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
58 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
59 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
60
61 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
62 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
63
determineInitialState()64 void Host::determineInitialState()
65 {
66 if (stateActive(getTarget(server::Host::HostState::Running)) ||
67 isHostRunning(id))
68 {
69 info("Initial Host State will be Running");
70 server::Host::currentHostState(HostState::Running, true);
71 server::Host::requestedHostTransition(Transition::On, true);
72 }
73 else
74 {
75 info("Initial Host State will be Off");
76 server::Host::currentHostState(HostState::Off, true);
77 server::Host::requestedHostTransition(Transition::Off, true);
78 }
79
80 if (!deserialize())
81 {
82 // set to default value.
83 server::Host::requestedHostTransition(Transition::Off, true);
84 reboot::RebootAttempts::retryAttempts(BOOT_COUNT_MAX_ALLOWED);
85 }
86 return;
87 }
88
setupSupportedTransitions()89 void Host::setupSupportedTransitions()
90 {
91 std::set<Transition> supportedTransitions = {
92 Transition::On,
93 Transition::Off,
94 Transition::Reboot,
95 Transition::GracefulWarmReboot,
96 #if ENABLE_FORCE_WARM_REBOOT
97 Transition::ForceWarmReboot,
98 #endif
99 };
100 server::Host::allowedHostTransitions(supportedTransitions);
101 }
102
createSystemdTargetMaps()103 void Host::createSystemdTargetMaps()
104 {
105 stateTargetTable = {
106 {HostState::Off, std::format("obmc-host-stop@{}.target", id)},
107 {HostState::Running, std::format("obmc-host-startmin@{}.target", id)},
108 {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)},
109 {HostState::DiagnosticMode,
110 std::format("obmc-host-diagnostic-mode@{}.target", id)}};
111
112 transitionTargetTable = {
113 {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)},
114 {Transition::On, std::format("obmc-host-start@{}.target", id)},
115 {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)},
116 // Some systems do not support a warm reboot so just map the reboot
117 // requests to our normal cold reboot in that case
118 #if ENABLE_WARM_REBOOT
119 {Transition::GracefulWarmReboot,
120 std::format("obmc-host-warm-reboot@{}.target", id)},
121 {Transition::ForceWarmReboot,
122 std::format("obmc-host-force-warm-reboot@{}.target", id)}};
123 #else
124 {Transition::GracefulWarmReboot,
125 std::format("obmc-host-reboot@{}.target", id)},
126 {Transition::ForceWarmReboot,
127 std::format("obmc-host-reboot@{}.target", id)}};
128 #endif
129 hostCrashTarget = std::format("obmc-host-crash@{}.target", id);
130 }
131
getTarget(HostState state)132 const std::string& Host::getTarget(HostState state)
133 {
134 return stateTargetTable[state];
135 };
136
getTarget(Transition tranReq)137 const std::string& Host::getTarget(Transition tranReq)
138 {
139 return transitionTargetTable[tranReq];
140 };
141
executeTransition(Transition tranReq)142 void Host::executeTransition(Transition tranReq)
143 {
144 const auto& sysdUnit = getTarget(tranReq);
145
146 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
147 SYSTEMD_INTERFACE, "StartUnit");
148
149 method.append(sysdUnit);
150 method.append("replace");
151
152 this->bus.call_noreply(method);
153
154 return;
155 }
156
stateActive(const std::string & target)157 bool Host::stateActive(const std::string& target)
158 {
159 std::variant<std::string> currentState;
160 sdbusplus::message::object_path unitTargetPath;
161
162 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
163 SYSTEMD_INTERFACE, "GetUnit");
164
165 method.append(target);
166
167 try
168 {
169 auto result = this->bus.call(method);
170 result.read(unitTargetPath);
171 }
172 catch (const sdbusplus::exception_t& e)
173 {
174 error("Error in GetUnit call: {ERROR}", "ERROR", e);
175 return false;
176 }
177
178 method = this->bus.new_method_call(
179 SYSTEMD_SERVICE,
180 static_cast<const std::string&>(unitTargetPath).c_str(),
181 SYSTEMD_PROPERTY_IFACE, "Get");
182
183 method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
184
185 try
186 {
187 auto result = this->bus.call(method);
188 result.read(currentState);
189 }
190 catch (const sdbusplus::exception_t& e)
191 {
192 error("Error in ActiveState Get: {ERROR}", "ERROR", e);
193 return false;
194 }
195
196 const auto& currentStateStr = std::get<std::string>(currentState);
197 return currentStateStr == ACTIVE_STATE ||
198 currentStateStr == ACTIVATING_STATE;
199 }
200
isAutoReboot()201 bool Host::isAutoReboot()
202 {
203 using namespace settings;
204
205 /* The logic here is to first check the one-time AutoReboot setting.
206 * If this property is true (the default) then look at the persistent
207 * user setting in the non one-time object, otherwise honor the one-time
208 * setting and do not auto reboot.
209 */
210 auto methodOneTime = bus.new_method_call(
211 settings.service(settings.autoReboot, autoRebootIntf).c_str(),
212 settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
213 methodOneTime.append(autoRebootIntf, "AutoReboot");
214
215 auto methodUserSetting = bus.new_method_call(
216 settings.service(settings.autoReboot, autoRebootIntf).c_str(),
217 settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
218 methodUserSetting.append(autoRebootIntf, "AutoReboot");
219
220 try
221 {
222 auto reply = bus.call(methodOneTime);
223 auto result = reply.unpack<std::variant<bool>>();
224
225 auto autoReboot = std::get<bool>(result);
226
227 if (!autoReboot)
228 {
229 info("Auto reboot (one-time) disabled");
230 return false;
231 }
232 else
233 {
234 // one-time is true so read the user setting
235 reply = bus.call(methodUserSetting);
236 reply.read(result);
237 autoReboot = std::get<bool>(result);
238 }
239
240 auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
241
242 if (autoReboot)
243 {
244 if (rebootCounterParam > 0)
245 {
246 // Reduce BOOTCOUNT by 1
247 info(
248 "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
249 "BOOTCOUNT", rebootCounterParam);
250 return true;
251 }
252 else
253 {
254 // We are at 0 so reset reboot counter and go to quiesce state
255 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
256 attemptsLeft(reboot::RebootAttempts::retryAttempts());
257
258 // Generate log since we will now be sitting in Quiesce
259 const std::string errorMsg =
260 "xyz.openbmc_project.State.Error.HostQuiesce";
261 utils::createError(this->bus, errorMsg,
262 sdbusplus::xyz::openbmc_project::Logging::
263 server::Entry::Level::Critical);
264
265 // Generate BMC dump to assist with debug
266 utils::createBmcDump(this->bus);
267
268 return false;
269 }
270 }
271 else
272 {
273 info("Auto reboot disabled.");
274 return false;
275 }
276 }
277 catch (const sdbusplus::exception_t& e)
278 {
279 error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
280 return false;
281 }
282 }
283
sysStateChangeJobRemoved(sdbusplus::message_t & msg)284 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
285 {
286 uint32_t newStateID{};
287 sdbusplus::message::object_path newStateObjPath;
288 std::string newStateUnit{};
289 std::string newStateResult{};
290
291 // Read the msg and populate each variable
292 msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
293
294 if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
295 (newStateResult == "done") &&
296 (!stateActive(getTarget(server::Host::HostState::Running))))
297 {
298 info("Received signal that host is off");
299 this->currentHostState(server::Host::HostState::Off);
300 this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
301 this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
302 }
303 else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
304 (newStateResult == "done") &&
305 (stateActive(getTarget(server::Host::HostState::Running))))
306 {
307 info("Received signal that host is running");
308 this->currentHostState(server::Host::HostState::Running);
309
310 // Remove temporary file which is utilized for scenarios where the
311 // BMC is rebooted while the host is still up.
312 // This file is used to indicate to host related systemd services
313 // that the host is already running and they should skip running.
314 // Once the host state is back to running we can clear this file.
315 std::string hostFile = std::format(HOST_RUNNING_FILE, 0);
316 if (std::filesystem::exists(hostFile))
317 {
318 std::filesystem::remove(hostFile);
319 }
320 }
321 else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
322 (newStateResult == "done") &&
323 (stateActive(getTarget(server::Host::HostState::Quiesced))))
324 {
325 if (Host::isAutoReboot())
326 {
327 info("Beginning reboot...");
328 Host::requestedHostTransition(server::Host::Transition::Reboot);
329 }
330 else
331 {
332 info("Maintaining quiesce");
333 this->currentHostState(server::Host::HostState::Quiesced);
334 }
335 }
336 }
337
sysStateChangeJobNew(sdbusplus::message_t & msg)338 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
339 {
340 uint32_t newStateID{};
341 sdbusplus::message::object_path newStateObjPath;
342 std::string newStateUnit{};
343
344 // Read the msg and populate each variable
345 msg.read(newStateID, newStateObjPath, newStateUnit);
346
347 if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
348 {
349 info("Received signal that host is in diagnostice mode");
350 this->currentHostState(server::Host::HostState::DiagnosticMode);
351 }
352 else if ((newStateUnit == hostCrashTarget) &&
353 (server::Host::currentHostState() ==
354 server::Host::HostState::Running))
355 {
356 // Only decrease the boot count if host was running when the host crash
357 // target was started. Systemd will sometimes trigger multiple
358 // JobNew events for the same target. This seems to be related to
359 // how OpenBMC utilizes the targets in the reboot scenario
360 info("Received signal that host has crashed, decrement reboot count");
361
362 // A host crash can cause a reboot of the host so decrement the reboot
363 // count
364 decrementRebootCount();
365 }
366 }
367
decrementRebootCount()368 uint32_t Host::decrementRebootCount()
369 {
370 auto rebootCount = reboot::RebootAttempts::attemptsLeft();
371 if (rebootCount > 0)
372 {
373 return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
374 }
375 return rebootCount;
376 }
377
serialize()378 fs::path Host::serialize()
379 {
380 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
381 std::ofstream os(path.c_str(), std::ios::binary);
382 cereal::JSONOutputArchive oarchive(os);
383 oarchive(*this);
384 return path;
385 }
386
deserialize()387 bool Host::deserialize()
388 {
389 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
390 try
391 {
392 if (fs::exists(path))
393 {
394 std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
395 cereal::JSONInputArchive iarchive(is);
396 iarchive(*this);
397 return true;
398 }
399 return false;
400 }
401 catch (const cereal::Exception& e)
402 {
403 error("deserialize exception: {ERROR}", "ERROR", e);
404 fs::remove(path);
405 return false;
406 }
407 }
408
requestedHostTransition(Transition value)409 Host::Transition Host::requestedHostTransition(Transition value)
410 {
411 info("Host{HOST_ID} state transition request of {REQ}", "HOST_ID", id,
412 "REQ", value);
413
414 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY
415 if ((value != Transition::Off) && (!utils::isBmcReady(this->bus)))
416 {
417 info("BMC State is not Ready so no host on operations allowed");
418 throw sdbusplus::xyz::openbmc_project::State::Host::Error::
419 BMCNotReady();
420 }
421 #endif
422
423 // If this is not a power off request then we need to
424 // decrement the reboot counter. This code should
425 // never prevent a power on, it should just decrement
426 // the count to 0. The quiesce handling is where the
427 // check of this count will occur
428 if (value != server::Host::Transition::Off)
429 {
430 #ifdef CHECK_FWUPDATE_BEFORE_DO_TRANSITION
431 /*
432 * Do not do transition when the any firmware being updated
433 */
434 if (phosphor::state::manager::utils::isFirmwareUpdating(this->bus))
435 {
436 info("Firmware being updated, reject the transition request");
437 throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
438 }
439 #endif // CHECK_FWUPDATE_BEFORE_DO_TRANSITION
440
441 decrementRebootCount();
442 }
443
444 executeTransition(value);
445
446 auto retVal = server::Host::requestedHostTransition(value);
447
448 serialize();
449 return retVal;
450 }
451
bootProgress(ProgressStages value)452 Host::ProgressStages Host::bootProgress(ProgressStages value)
453 {
454 auto retVal = bootprogress::Progress::bootProgress(value);
455
456 // Update the BootProgressLastUpdate anytime BootProgress is updated
457 auto timeStamp = std::chrono::duration_cast<std::chrono::microseconds>(
458 std::chrono::system_clock::now().time_since_epoch())
459 .count();
460 this->bootProgressLastUpdate(timeStamp);
461 serialize();
462 return retVal;
463 }
464
bootProgressLastUpdate(uint64_t value)465 uint64_t Host::bootProgressLastUpdate(uint64_t value)
466 {
467 auto retVal = bootprogress::Progress::bootProgressLastUpdate(value);
468 serialize();
469 return retVal;
470 }
471
operatingSystemState(OSStatus value)472 Host::OSStatus Host::operatingSystemState(OSStatus value)
473 {
474 auto retVal = osstatus::Status::operatingSystemState(value);
475 serialize();
476 return retVal;
477 }
478
currentHostState(HostState value)479 Host::HostState Host::currentHostState(HostState value)
480 {
481 info("Change to Host{HOST_ID} State: {STATE}", "HOST_ID", id, "STATE",
482 value);
483 return server::Host::currentHostState(value);
484 }
485
486 } // namespace manager
487 } // namespace state
488 } // namespace phosphor
489