1 #include "config.h"
2
3 #include "host_state_manager.hpp"
4
5 #include "host_check.hpp"
6 #include "utils.hpp"
7
8 #include <systemd/sd-bus.h>
9
10 #include <cereal/archives/json.hpp>
11 #include <cereal/cereal.hpp>
12 #include <cereal/types/string.hpp>
13 #include <cereal/types/tuple.hpp>
14 #include <cereal/types/vector.hpp>
15 #include <phosphor-logging/elog-errors.hpp>
16 #include <phosphor-logging/lg2.hpp>
17 #include <sdbusplus/exception.hpp>
18 #include <sdbusplus/server.hpp>
19 #include <xyz/openbmc_project/Common/error.hpp>
20 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
21 #include <xyz/openbmc_project/State/Host/error.hpp>
22
23 #include <chrono>
24 #include <filesystem>
25 #include <format>
26 #include <fstream>
27 #include <iostream>
28 #include <map>
29 #include <set>
30 #include <string>
31
32 // Register class version with Cereal
33 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
34
35 namespace phosphor
36 {
37 namespace state
38 {
39 namespace manager
40 {
41
42 PHOSPHOR_LOG2_USING;
43
44 // When you see server:: or reboot:: you know we're referencing our base class
45 namespace server = sdbusplus::server::xyz::openbmc_project::state;
46 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot;
47 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot;
48 namespace osstatus =
49 sdbusplus::server::xyz::openbmc_project::state::operating_system;
50 using namespace phosphor::logging;
51 namespace fs = std::filesystem;
52 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
53
54 constexpr auto ACTIVE_STATE = "active";
55 constexpr auto ACTIVATING_STATE = "activating";
56
57 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
58 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
59 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
60
61 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
62 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
63
64 constexpr auto AUTO_REBOOT_PROPERTY = "AutoReboot";
65
determineInitialState()66 void Host::determineInitialState()
67 {
68 if (stateActive(getTarget(server::Host::HostState::Running)) ||
69 isHostRunning(id))
70 {
71 info("Initial Host State will be Running");
72 server::Host::currentHostState(HostState::Running, true);
73 server::Host::requestedHostTransition(Transition::On, true);
74 }
75 else
76 {
77 info("Initial Host State will be Off");
78 server::Host::currentHostState(HostState::Off, true);
79 server::Host::requestedHostTransition(Transition::Off, true);
80 }
81
82 if (!deserialize())
83 {
84 // set to default value.
85 server::Host::requestedHostTransition(Transition::Off, true);
86 reboot::RebootAttempts::retryAttempts(BOOT_COUNT_MAX_ALLOWED);
87 }
88 return;
89 }
90
setupSupportedTransitions()91 void Host::setupSupportedTransitions()
92 {
93 std::set<Transition> supportedTransitions = {
94 Transition::On,
95 Transition::Off,
96 Transition::Reboot,
97 Transition::GracefulWarmReboot,
98 #if ENABLE_FORCE_WARM_REBOOT
99 Transition::ForceWarmReboot,
100 #endif
101 };
102 server::Host::allowedHostTransitions(supportedTransitions);
103 }
104
createSystemdTargetMaps()105 void Host::createSystemdTargetMaps()
106 {
107 stateTargetTable = {
108 {HostState::Off, std::format("obmc-host-stop@{}.target", id)},
109 {HostState::Running, std::format("obmc-host-startmin@{}.target", id)},
110 {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)},
111 {HostState::DiagnosticMode,
112 std::format("obmc-host-diagnostic-mode@{}.target", id)}};
113
114 transitionTargetTable = {
115 {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)},
116 {Transition::On, std::format("obmc-host-start@{}.target", id)},
117 {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)},
118 // Some systems do not support a warm reboot so just map the reboot
119 // requests to our normal cold reboot in that case
120 #if ENABLE_WARM_REBOOT
121 {Transition::GracefulWarmReboot,
122 std::format("obmc-host-warm-reboot@{}.target", id)},
123 {Transition::ForceWarmReboot,
124 std::format("obmc-host-force-warm-reboot@{}.target", id)}};
125 #else
126 {Transition::GracefulWarmReboot,
127 std::format("obmc-host-reboot@{}.target", id)},
128 {Transition::ForceWarmReboot,
129 std::format("obmc-host-reboot@{}.target", id)}};
130 #endif
131 hostCrashTarget = std::format("obmc-host-crash@{}.target", id);
132 }
133
getTarget(HostState state)134 const std::string& Host::getTarget(HostState state)
135 {
136 return stateTargetTable[state];
137 };
138
getTarget(Transition tranReq)139 const std::string& Host::getTarget(Transition tranReq)
140 {
141 return transitionTargetTable[tranReq];
142 };
143
executeTransition(Transition tranReq)144 void Host::executeTransition(Transition tranReq)
145 {
146 const auto& sysdUnit = getTarget(tranReq);
147
148 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
149 SYSTEMD_INTERFACE, "StartUnit");
150
151 method.append(sysdUnit);
152 method.append("replace");
153
154 this->bus.call_noreply(method);
155
156 return;
157 }
158
stateActive(const std::string & target)159 bool Host::stateActive(const std::string& target)
160 {
161 std::variant<std::string> currentState;
162 sdbusplus::message::object_path unitTargetPath;
163
164 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
165 SYSTEMD_INTERFACE, "GetUnit");
166
167 method.append(target);
168
169 try
170 {
171 auto result = this->bus.call(method);
172 result.read(unitTargetPath);
173 }
174 catch (const sdbusplus::exception_t& e)
175 {
176 error("Error in GetUnit call: {ERROR}", "ERROR", e);
177 return false;
178 }
179
180 method = this->bus.new_method_call(
181 SYSTEMD_SERVICE,
182 static_cast<const std::string&>(unitTargetPath).c_str(),
183 SYSTEMD_PROPERTY_IFACE, "Get");
184
185 method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
186
187 try
188 {
189 auto result = this->bus.call(method);
190 result.read(currentState);
191 }
192 catch (const sdbusplus::exception_t& e)
193 {
194 error("Error in ActiveState Get: {ERROR}", "ERROR", e);
195 return false;
196 }
197
198 const auto& currentStateStr = std::get<std::string>(currentState);
199 return currentStateStr == ACTIVE_STATE ||
200 currentStateStr == ACTIVATING_STATE;
201 }
202
isAutoReboot()203 bool Host::isAutoReboot()
204 {
205 using namespace settings;
206
207 /* The logic here is to first check the one-time AutoReboot setting.
208 * If this property is true (the default) then look at the persistent
209 * user setting in the non one-time object, otherwise honor the one-time
210 * setting and do not auto reboot.
211 */
212 auto methodOneTime = bus.new_method_call(
213 settings.service(settings.autoReboot, autoRebootIntf).c_str(),
214 settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
215 methodOneTime.append(autoRebootIntf, AUTO_REBOOT_PROPERTY);
216
217 auto methodUserSetting = bus.new_method_call(
218 settings.service(settings.autoReboot, autoRebootIntf).c_str(),
219 settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
220 methodUserSetting.append(autoRebootIntf, AUTO_REBOOT_PROPERTY);
221
222 try
223 {
224 auto reply = bus.call(methodOneTime);
225 auto result = reply.unpack<std::variant<bool>>();
226
227 auto autoReboot = std::get<bool>(result);
228
229 if (!autoReboot)
230 {
231 info("Auto reboot (one-time) disabled");
232 return false;
233 }
234 else
235 {
236 // one-time is true so read the user setting
237 reply = bus.call(methodUserSetting);
238 reply.read(result);
239 autoReboot = std::get<bool>(result);
240 }
241
242 auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
243
244 if (autoReboot)
245 {
246 if (rebootCounterParam > 0)
247 {
248 // Reduce BOOTCOUNT by 1
249 info(
250 "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
251 "BOOTCOUNT", rebootCounterParam);
252 return true;
253 }
254 else
255 {
256 // We are at 0 so reset reboot counter and go to quiesce state
257 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
258 attemptsLeft(reboot::RebootAttempts::retryAttempts());
259
260 // Generate log since we will now be sitting in Quiesce
261 const std::string errorMsg =
262 "xyz.openbmc_project.State.Error.HostQuiesce";
263 utils::createError(this->bus, errorMsg,
264 sdbusplus::xyz::openbmc_project::Logging::
265 server::Entry::Level::Critical);
266
267 // Generate BMC dump to assist with debug
268 utils::createBmcDump(this->bus);
269
270 return false;
271 }
272 }
273 else
274 {
275 info("Auto reboot disabled.");
276 return false;
277 }
278 }
279 catch (const sdbusplus::exception_t& e)
280 {
281 error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
282 return false;
283 }
284 }
285
sysStateChangeJobRemoved(sdbusplus::message_t & msg)286 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
287 {
288 uint32_t newStateID{};
289 sdbusplus::message::object_path newStateObjPath;
290 std::string newStateUnit{};
291 std::string newStateResult{};
292
293 // Read the msg and populate each variable
294 msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
295
296 if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
297 (newStateResult == "done") &&
298 (!stateActive(getTarget(server::Host::HostState::Running))))
299 {
300 info("Received signal that host is off");
301 this->currentHostState(server::Host::HostState::Off);
302 this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
303 this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
304 }
305 else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
306 (newStateResult == "done") &&
307 (stateActive(getTarget(server::Host::HostState::Running))))
308 {
309 info("Received signal that host is running");
310 this->currentHostState(server::Host::HostState::Running);
311
312 // Remove temporary file which is utilized for scenarios where the
313 // BMC is rebooted while the host is still up.
314 // This file is used to indicate to host related systemd services
315 // that the host is already running and they should skip running.
316 // Once the host state is back to running we can clear this file.
317 std::string hostFile = std::format(HOST_RUNNING_FILE, id);
318 if (std::filesystem::exists(hostFile))
319 {
320 try
321 {
322 std::filesystem::remove(hostFile);
323 }
324 catch (const std::filesystem::filesystem_error& e)
325 {
326 error("Failed to remove host running file {FILE}: {ERROR}",
327 "FILE", hostFile, "ERROR", e.what());
328 }
329 }
330 }
331 else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
332 (newStateResult == "done") &&
333 (stateActive(getTarget(server::Host::HostState::Quiesced))))
334 {
335 if (Host::isAutoReboot())
336 {
337 info("Beginning reboot...");
338 Host::requestedHostTransition(server::Host::Transition::Reboot);
339 }
340 else
341 {
342 info("Maintaining quiesce");
343 this->currentHostState(server::Host::HostState::Quiesced);
344 }
345 }
346 }
347
sysStateChangeJobNew(sdbusplus::message_t & msg)348 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
349 {
350 uint32_t newStateID{};
351 sdbusplus::message::object_path newStateObjPath;
352 std::string newStateUnit{};
353
354 // Read the msg and populate each variable
355 msg.read(newStateID, newStateObjPath, newStateUnit);
356
357 if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
358 {
359 info("Received signal that host is in diagnostice mode");
360 this->currentHostState(server::Host::HostState::DiagnosticMode);
361 }
362 else if ((newStateUnit == hostCrashTarget) &&
363 (server::Host::currentHostState() ==
364 server::Host::HostState::Running))
365 {
366 // Only decrease the boot count if host was running when the host crash
367 // target was started. Systemd will sometimes trigger multiple
368 // JobNew events for the same target. This seems to be related to
369 // how OpenBMC utilizes the targets in the reboot scenario
370 info("Received signal that host has crashed, decrement reboot count");
371
372 // A host crash can cause a reboot of the host so decrement the reboot
373 // count
374 decrementRebootCount();
375 }
376 }
377
decrementRebootCount()378 uint32_t Host::decrementRebootCount()
379 {
380 auto rebootCount = reboot::RebootAttempts::attemptsLeft();
381 if (rebootCount > 0)
382 {
383 return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
384 }
385 return rebootCount;
386 }
387
serialize()388 fs::path Host::serialize()
389 {
390 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
391 std::ofstream os(path.c_str(), std::ios::binary);
392 cereal::JSONOutputArchive oarchive(os);
393 oarchive(*this);
394 return path;
395 }
396
deserialize()397 bool Host::deserialize()
398 {
399 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
400 try
401 {
402 if (fs::exists(path))
403 {
404 std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
405 cereal::JSONInputArchive iarchive(is);
406 iarchive(*this);
407 return true;
408 }
409 return false;
410 }
411 catch (const cereal::Exception& e)
412 {
413 error("deserialize exception: {ERROR}", "ERROR", e);
414 fs::remove(path);
415 return false;
416 }
417 }
418
requestedHostTransition(Transition value)419 Host::Transition Host::requestedHostTransition(Transition value)
420 {
421 info("Host{HOST_ID} state transition request of {REQ}", "HOST_ID", id,
422 "REQ", value);
423
424 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY
425 if ((value != Transition::Off) && (!utils::isBmcReady(this->bus)))
426 {
427 info("BMC State is not Ready so no host on operations allowed");
428 throw sdbusplus::xyz::openbmc_project::State::Host::Error::
429 BMCNotReady();
430 }
431 #endif
432
433 // If this is not a power off request then we need to
434 // decrement the reboot counter. This code should
435 // never prevent a power on, it should just decrement
436 // the count to 0. The quiesce handling is where the
437 // check of this count will occur
438 if (value != server::Host::Transition::Off)
439 {
440 #ifdef CHECK_FWUPDATE_BEFORE_DO_TRANSITION
441 /*
442 * Do not do transition when the any firmware being updated
443 */
444 if (phosphor::state::manager::utils::isFirmwareUpdating(this->bus))
445 {
446 info("Firmware being updated, reject the transition request");
447 throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
448 }
449 #endif // CHECK_FWUPDATE_BEFORE_DO_TRANSITION
450
451 decrementRebootCount();
452 }
453
454 executeTransition(value);
455
456 auto retVal = server::Host::requestedHostTransition(value);
457
458 serialize();
459 return retVal;
460 }
461
bootProgress(ProgressStages value)462 Host::ProgressStages Host::bootProgress(ProgressStages value)
463 {
464 auto retVal = bootprogress::Progress::bootProgress(value);
465
466 // Update the BootProgressLastUpdate anytime BootProgress is updated
467 auto timeStamp = std::chrono::duration_cast<std::chrono::microseconds>(
468 std::chrono::system_clock::now().time_since_epoch())
469 .count();
470 this->bootProgressLastUpdate(timeStamp);
471 serialize();
472 return retVal;
473 }
474
bootProgressLastUpdate(uint64_t value)475 uint64_t Host::bootProgressLastUpdate(uint64_t value)
476 {
477 auto retVal = bootprogress::Progress::bootProgressLastUpdate(value);
478 serialize();
479 return retVal;
480 }
481
operatingSystemState(OSStatus value)482 Host::OSStatus Host::operatingSystemState(OSStatus value)
483 {
484 auto retVal = osstatus::Status::operatingSystemState(value);
485 serialize();
486 return retVal;
487 }
488
currentHostState(HostState value)489 Host::HostState Host::currentHostState(HostState value)
490 {
491 info("Change to Host{HOST_ID} State: {STATE}", "HOST_ID", id, "STATE",
492 value);
493 return server::Host::currentHostState(value);
494 }
495
496 } // namespace manager
497 } // namespace state
498 } // namespace phosphor
499