1 #include "config.h"
2
3 #include "host_state_manager.hpp"
4
5 #include "host_check.hpp"
6 #include "utils.hpp"
7
8 #include <systemd/sd-bus.h>
9
10 #include <cereal/archives/json.hpp>
11 #include <cereal/cereal.hpp>
12 #include <cereal/types/string.hpp>
13 #include <cereal/types/tuple.hpp>
14 #include <cereal/types/vector.hpp>
15 #include <phosphor-logging/elog-errors.hpp>
16 #include <phosphor-logging/lg2.hpp>
17 #include <sdbusplus/exception.hpp>
18 #include <sdbusplus/server.hpp>
19 #include <xyz/openbmc_project/Common/error.hpp>
20 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
21 #include <xyz/openbmc_project/State/Host/error.hpp>
22
23 #include <filesystem>
24 #include <format>
25 #include <fstream>
26 #include <iostream>
27 #include <map>
28 #include <set>
29 #include <string>
30
31 // Register class version with Cereal
32 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
33
34 namespace phosphor
35 {
36 namespace state
37 {
38 namespace manager
39 {
40
41 PHOSPHOR_LOG2_USING;
42
43 // When you see server:: or reboot:: you know we're referencing our base class
44 namespace server = sdbusplus::server::xyz::openbmc_project::state;
45 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot;
46 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot;
47 namespace osstatus =
48 sdbusplus::server::xyz::openbmc_project::state::operating_system;
49 using namespace phosphor::logging;
50 namespace fs = std::filesystem;
51 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
52
53 constexpr auto ACTIVE_STATE = "active";
54 constexpr auto ACTIVATING_STATE = "activating";
55
56 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
57 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
58 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
59
60 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
61 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
62
determineInitialState()63 void Host::determineInitialState()
64 {
65 if (stateActive(getTarget(server::Host::HostState::Running)) ||
66 isHostRunning(id))
67 {
68 info("Initial Host State will be Running");
69 server::Host::currentHostState(HostState::Running, true);
70 server::Host::requestedHostTransition(Transition::On, true);
71 }
72 else
73 {
74 info("Initial Host State will be Off");
75 server::Host::currentHostState(HostState::Off, true);
76 server::Host::requestedHostTransition(Transition::Off, true);
77 }
78
79 if (!deserialize())
80 {
81 // set to default value.
82 server::Host::requestedHostTransition(Transition::Off, true);
83 }
84 return;
85 }
86
setupSupportedTransitions()87 void Host::setupSupportedTransitions()
88 {
89 std::set<Transition> supportedTransitions = {
90 Transition::On,
91 Transition::Off,
92 Transition::Reboot,
93 Transition::GracefulWarmReboot,
94 #if ENABLE_FORCE_WARM_REBOOT
95 Transition::ForceWarmReboot,
96 #endif
97 };
98 server::Host::allowedHostTransitions(supportedTransitions);
99 }
100
createSystemdTargetMaps()101 void Host::createSystemdTargetMaps()
102 {
103 stateTargetTable = {
104 {HostState::Off, std::format("obmc-host-stop@{}.target", id)},
105 {HostState::Running, std::format("obmc-host-startmin@{}.target", id)},
106 {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)},
107 {HostState::DiagnosticMode,
108 std::format("obmc-host-diagnostic-mode@{}.target", id)}};
109
110 transitionTargetTable = {
111 {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)},
112 {Transition::On, std::format("obmc-host-start@{}.target", id)},
113 {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)},
114 // Some systems do not support a warm reboot so just map the reboot
115 // requests to our normal cold reboot in that case
116 #if ENABLE_WARM_REBOOT
117 {Transition::GracefulWarmReboot,
118 std::format("obmc-host-warm-reboot@{}.target", id)},
119 {Transition::ForceWarmReboot,
120 std::format("obmc-host-force-warm-reboot@{}.target", id)}};
121 #else
122 {Transition::GracefulWarmReboot,
123 std::format("obmc-host-reboot@{}.target", id)},
124 {Transition::ForceWarmReboot,
125 std::format("obmc-host-reboot@{}.target", id)}};
126 #endif
127 hostCrashTarget = std::format("obmc-host-crash@{}.target", id);
128 }
129
getTarget(HostState state)130 const std::string& Host::getTarget(HostState state)
131 {
132 return stateTargetTable[state];
133 };
134
getTarget(Transition tranReq)135 const std::string& Host::getTarget(Transition tranReq)
136 {
137 return transitionTargetTable[tranReq];
138 };
139
executeTransition(Transition tranReq)140 void Host::executeTransition(Transition tranReq)
141 {
142 const auto& sysdUnit = getTarget(tranReq);
143
144 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
145 SYSTEMD_INTERFACE, "StartUnit");
146
147 method.append(sysdUnit);
148 method.append("replace");
149
150 this->bus.call_noreply(method);
151
152 return;
153 }
154
stateActive(const std::string & target)155 bool Host::stateActive(const std::string& target)
156 {
157 std::variant<std::string> currentState;
158 sdbusplus::message::object_path unitTargetPath;
159
160 auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
161 SYSTEMD_INTERFACE, "GetUnit");
162
163 method.append(target);
164
165 try
166 {
167 auto result = this->bus.call(method);
168 result.read(unitTargetPath);
169 }
170 catch (const sdbusplus::exception_t& e)
171 {
172 error("Error in GetUnit call: {ERROR}", "ERROR", e);
173 return false;
174 }
175
176 method = this->bus.new_method_call(
177 SYSTEMD_SERVICE,
178 static_cast<const std::string&>(unitTargetPath).c_str(),
179 SYSTEMD_PROPERTY_IFACE, "Get");
180
181 method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
182
183 try
184 {
185 auto result = this->bus.call(method);
186 result.read(currentState);
187 }
188 catch (const sdbusplus::exception_t& e)
189 {
190 error("Error in ActiveState Get: {ERROR}", "ERROR", e);
191 return false;
192 }
193
194 const auto& currentStateStr = std::get<std::string>(currentState);
195 return currentStateStr == ACTIVE_STATE ||
196 currentStateStr == ACTIVATING_STATE;
197 }
198
isAutoReboot()199 bool Host::isAutoReboot()
200 {
201 using namespace settings;
202
203 /* The logic here is to first check the one-time AutoReboot setting.
204 * If this property is true (the default) then look at the persistent
205 * user setting in the non one-time object, otherwise honor the one-time
206 * setting and do not auto reboot.
207 */
208 auto methodOneTime = bus.new_method_call(
209 settings.service(settings.autoReboot, autoRebootIntf).c_str(),
210 settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
211 methodOneTime.append(autoRebootIntf, "AutoReboot");
212
213 auto methodUserSetting = bus.new_method_call(
214 settings.service(settings.autoReboot, autoRebootIntf).c_str(),
215 settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
216 methodUserSetting.append(autoRebootIntf, "AutoReboot");
217
218 try
219 {
220 auto reply = bus.call(methodOneTime);
221 std::variant<bool> result;
222 reply.read(result);
223 auto autoReboot = std::get<bool>(result);
224
225 if (!autoReboot)
226 {
227 info("Auto reboot (one-time) disabled");
228 return false;
229 }
230 else
231 {
232 // one-time is true so read the user setting
233 reply = bus.call(methodUserSetting);
234 reply.read(result);
235 autoReboot = std::get<bool>(result);
236 }
237
238 auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
239
240 if (autoReboot)
241 {
242 if (rebootCounterParam > 0)
243 {
244 // Reduce BOOTCOUNT by 1
245 info(
246 "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
247 "BOOTCOUNT", rebootCounterParam);
248 return true;
249 }
250 else
251 {
252 // We are at 0 so reset reboot counter and go to quiesce state
253 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
254 attemptsLeft(reboot::RebootAttempts::retryAttempts());
255
256 // Generate log since we will now be sitting in Quiesce
257 const std::string errorMsg =
258 "xyz.openbmc_project.State.Error.HostQuiesce";
259 utils::createError(this->bus, errorMsg,
260 sdbusplus::xyz::openbmc_project::Logging::
261 server::Entry::Level::Critical);
262
263 // Generate BMC dump to assist with debug
264 utils::createBmcDump(this->bus);
265
266 return false;
267 }
268 }
269 else
270 {
271 info("Auto reboot disabled.");
272 return false;
273 }
274 }
275 catch (const sdbusplus::exception_t& e)
276 {
277 error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
278 return false;
279 }
280 }
281
sysStateChangeJobRemoved(sdbusplus::message_t & msg)282 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
283 {
284 uint32_t newStateID{};
285 sdbusplus::message::object_path newStateObjPath;
286 std::string newStateUnit{};
287 std::string newStateResult{};
288
289 // Read the msg and populate each variable
290 msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
291
292 if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
293 (newStateResult == "done") &&
294 (!stateActive(getTarget(server::Host::HostState::Running))))
295 {
296 info("Received signal that host is off");
297 this->currentHostState(server::Host::HostState::Off);
298 this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
299 this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
300 }
301 else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
302 (newStateResult == "done") &&
303 (stateActive(getTarget(server::Host::HostState::Running))))
304 {
305 info("Received signal that host is running");
306 this->currentHostState(server::Host::HostState::Running);
307
308 // Remove temporary file which is utilized for scenarios where the
309 // BMC is rebooted while the host is still up.
310 // This file is used to indicate to host related systemd services
311 // that the host is already running and they should skip running.
312 // Once the host state is back to running we can clear this file.
313 std::string hostFile = std::format(HOST_RUNNING_FILE, 0);
314 if (std::filesystem::exists(hostFile))
315 {
316 std::filesystem::remove(hostFile);
317 }
318 }
319 else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
320 (newStateResult == "done") &&
321 (stateActive(getTarget(server::Host::HostState::Quiesced))))
322 {
323 if (Host::isAutoReboot())
324 {
325 info("Beginning reboot...");
326 Host::requestedHostTransition(server::Host::Transition::Reboot);
327 }
328 else
329 {
330 info("Maintaining quiesce");
331 this->currentHostState(server::Host::HostState::Quiesced);
332 }
333 }
334 }
335
sysStateChangeJobNew(sdbusplus::message_t & msg)336 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
337 {
338 uint32_t newStateID{};
339 sdbusplus::message::object_path newStateObjPath;
340 std::string newStateUnit{};
341
342 // Read the msg and populate each variable
343 msg.read(newStateID, newStateObjPath, newStateUnit);
344
345 if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
346 {
347 info("Received signal that host is in diagnostice mode");
348 this->currentHostState(server::Host::HostState::DiagnosticMode);
349 }
350 else if ((newStateUnit == hostCrashTarget) &&
351 (server::Host::currentHostState() ==
352 server::Host::HostState::Running))
353 {
354 // Only decrease the boot count if host was running when the host crash
355 // target was started. Systemd will sometimes trigger multiple
356 // JobNew events for the same target. This seems to be related to
357 // how OpenBMC utilizes the targets in the reboot scenario
358 info("Received signal that host has crashed, decrement reboot count");
359
360 // A host crash can cause a reboot of the host so decrement the reboot
361 // count
362 decrementRebootCount();
363 }
364 }
365
decrementRebootCount()366 uint32_t Host::decrementRebootCount()
367 {
368 auto rebootCount = reboot::RebootAttempts::attemptsLeft();
369 if (rebootCount > 0)
370 {
371 return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
372 }
373 return rebootCount;
374 }
375
serialize()376 fs::path Host::serialize()
377 {
378 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
379 std::ofstream os(path.c_str(), std::ios::binary);
380 cereal::JSONOutputArchive oarchive(os);
381 oarchive(*this);
382 return path;
383 }
384
deserialize()385 bool Host::deserialize()
386 {
387 fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
388 try
389 {
390 if (fs::exists(path))
391 {
392 std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
393 cereal::JSONInputArchive iarchive(is);
394 iarchive(*this);
395 return true;
396 }
397 return false;
398 }
399 catch (const cereal::Exception& e)
400 {
401 error("deserialize exception: {ERROR}", "ERROR", e);
402 fs::remove(path);
403 return false;
404 }
405 }
406
requestedHostTransition(Transition value)407 Host::Transition Host::requestedHostTransition(Transition value)
408 {
409 info("Host state transition request of {REQ}", "REQ", value);
410
411 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY
412 if ((value != Transition::Off) && (!utils::isBmcReady(this->bus)))
413 {
414 info("BMC State is not Ready so no host on operations allowed");
415 throw sdbusplus::xyz::openbmc_project::State::Host::Error::
416 BMCNotReady();
417 }
418 #endif
419
420 // If this is not a power off request then we need to
421 // decrement the reboot counter. This code should
422 // never prevent a power on, it should just decrement
423 // the count to 0. The quiesce handling is where the
424 // check of this count will occur
425 if (value != server::Host::Transition::Off)
426 {
427 #ifdef CHECK_FWUPDATE_BEFORE_DO_TRANSITION
428 /*
429 * Do not do transition when the any firmware being updated
430 */
431 if (phosphor::state::manager::utils::isFirmwareUpdating(this->bus))
432 {
433 info("Firmware being updated, reject the transition request");
434 throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
435 }
436 #endif // CHECK_FWUPDATE_BEFORE_DO_TRANSITION
437
438 decrementRebootCount();
439 }
440
441 executeTransition(value);
442
443 auto retVal = server::Host::requestedHostTransition(value);
444
445 serialize();
446 return retVal;
447 }
448
bootProgress(ProgressStages value)449 Host::ProgressStages Host::bootProgress(ProgressStages value)
450 {
451 auto retVal = bootprogress::Progress::bootProgress(value);
452 serialize();
453 return retVal;
454 }
455
operatingSystemState(OSStatus value)456 Host::OSStatus Host::operatingSystemState(OSStatus value)
457 {
458 auto retVal = osstatus::Status::operatingSystemState(value);
459 serialize();
460 return retVal;
461 }
462
currentHostState(HostState value)463 Host::HostState Host::currentHostState(HostState value)
464 {
465 info("Change to Host State: {STATE}", "STATE", value);
466 return server::Host::currentHostState(value);
467 }
468
469 } // namespace manager
470 } // namespace state
471 } // namespace phosphor
472