xref: /openbmc/phosphor-state-manager/host_state_manager.cpp (revision 9f38152abf80aba2f0177cb55729ff107f54947d)
1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <systemd/sd-bus.h>
9 
10 #include <cereal/archives/json.hpp>
11 #include <cereal/cereal.hpp>
12 #include <cereal/types/string.hpp>
13 #include <cereal/types/tuple.hpp>
14 #include <cereal/types/vector.hpp>
15 #include <phosphor-logging/elog-errors.hpp>
16 #include <phosphor-logging/lg2.hpp>
17 #include <sdbusplus/exception.hpp>
18 #include <sdbusplus/server.hpp>
19 #include <xyz/openbmc_project/Common/error.hpp>
20 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
21 #include <xyz/openbmc_project/State/Host/error.hpp>
22 
23 #include <filesystem>
24 #include <format>
25 #include <fstream>
26 #include <iostream>
27 #include <map>
28 #include <set>
29 #include <string>
30 
31 // Register class version with Cereal
32 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
33 
34 namespace phosphor
35 {
36 namespace state
37 {
38 namespace manager
39 {
40 
41 PHOSPHOR_LOG2_USING;
42 
43 // When you see server:: or reboot:: you know we're referencing our base class
44 namespace server = sdbusplus::server::xyz::openbmc_project::state;
45 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot;
46 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot;
47 namespace osstatus =
48     sdbusplus::server::xyz::openbmc_project::state::operating_system;
49 using namespace phosphor::logging;
50 namespace fs = std::filesystem;
51 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
52 
53 constexpr auto ACTIVE_STATE = "active";
54 constexpr auto ACTIVATING_STATE = "activating";
55 
56 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
57 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
58 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
59 
60 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
61 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
62 
determineInitialState()63 void Host::determineInitialState()
64 {
65     if (stateActive(getTarget(server::Host::HostState::Running)) ||
66         isHostRunning(id))
67     {
68         info("Initial Host State will be Running");
69         server::Host::currentHostState(HostState::Running, true);
70         server::Host::requestedHostTransition(Transition::On, true);
71     }
72     else
73     {
74         info("Initial Host State will be Off");
75         server::Host::currentHostState(HostState::Off, true);
76         server::Host::requestedHostTransition(Transition::Off, true);
77     }
78 
79     if (!deserialize())
80     {
81         // set to default value.
82         server::Host::requestedHostTransition(Transition::Off, true);
83     }
84     return;
85 }
86 
setupSupportedTransitions()87 void Host::setupSupportedTransitions()
88 {
89     std::set<Transition> supportedTransitions = {
90         Transition::On,
91         Transition::Off,
92         Transition::Reboot,
93         Transition::GracefulWarmReboot,
94 #if ENABLE_FORCE_WARM_REBOOT
95         Transition::ForceWarmReboot,
96 #endif
97     };
98     server::Host::allowedHostTransitions(supportedTransitions);
99 }
100 
createSystemdTargetMaps()101 void Host::createSystemdTargetMaps()
102 {
103     stateTargetTable = {
104         {HostState::Off, std::format("obmc-host-stop@{}.target", id)},
105         {HostState::Running, std::format("obmc-host-startmin@{}.target", id)},
106         {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)},
107         {HostState::DiagnosticMode,
108          std::format("obmc-host-diagnostic-mode@{}.target", id)}};
109 
110     transitionTargetTable = {
111         {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)},
112         {Transition::On, std::format("obmc-host-start@{}.target", id)},
113         {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)},
114 // Some systems do not support a warm reboot so just map the reboot
115 // requests to our normal cold reboot in that case
116 #if ENABLE_WARM_REBOOT
117         {Transition::GracefulWarmReboot,
118          std::format("obmc-host-warm-reboot@{}.target", id)},
119         {Transition::ForceWarmReboot,
120          std::format("obmc-host-force-warm-reboot@{}.target", id)}};
121 #else
122         {Transition::GracefulWarmReboot,
123          std::format("obmc-host-reboot@{}.target", id)},
124         {Transition::ForceWarmReboot,
125          std::format("obmc-host-reboot@{}.target", id)}};
126 #endif
127     hostCrashTarget = std::format("obmc-host-crash@{}.target", id);
128 }
129 
getTarget(HostState state)130 const std::string& Host::getTarget(HostState state)
131 {
132     return stateTargetTable[state];
133 };
134 
getTarget(Transition tranReq)135 const std::string& Host::getTarget(Transition tranReq)
136 {
137     return transitionTargetTable[tranReq];
138 };
139 
executeTransition(Transition tranReq)140 void Host::executeTransition(Transition tranReq)
141 {
142     const auto& sysdUnit = getTarget(tranReq);
143 
144     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
145                                             SYSTEMD_INTERFACE, "StartUnit");
146 
147     method.append(sysdUnit);
148     method.append("replace");
149 
150     this->bus.call_noreply(method);
151 
152     return;
153 }
154 
stateActive(const std::string & target)155 bool Host::stateActive(const std::string& target)
156 {
157     std::variant<std::string> currentState;
158     sdbusplus::message::object_path unitTargetPath;
159 
160     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
161                                             SYSTEMD_INTERFACE, "GetUnit");
162 
163     method.append(target);
164 
165     try
166     {
167         auto result = this->bus.call(method);
168         result.read(unitTargetPath);
169     }
170     catch (const sdbusplus::exception_t& e)
171     {
172         error("Error in GetUnit call: {ERROR}", "ERROR", e);
173         return false;
174     }
175 
176     method = this->bus.new_method_call(
177         SYSTEMD_SERVICE,
178         static_cast<const std::string&>(unitTargetPath).c_str(),
179         SYSTEMD_PROPERTY_IFACE, "Get");
180 
181     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
182 
183     try
184     {
185         auto result = this->bus.call(method);
186         result.read(currentState);
187     }
188     catch (const sdbusplus::exception_t& e)
189     {
190         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
191         return false;
192     }
193 
194     const auto& currentStateStr = std::get<std::string>(currentState);
195     return currentStateStr == ACTIVE_STATE ||
196            currentStateStr == ACTIVATING_STATE;
197 }
198 
isAutoReboot()199 bool Host::isAutoReboot()
200 {
201     using namespace settings;
202 
203     /* The logic here is to first check the one-time AutoReboot setting.
204      * If this property is true (the default) then look at the persistent
205      * user setting in the non one-time object, otherwise honor the one-time
206      * setting and do not auto reboot.
207      */
208     auto methodOneTime = bus.new_method_call(
209         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
210         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
211     methodOneTime.append(autoRebootIntf, "AutoReboot");
212 
213     auto methodUserSetting = bus.new_method_call(
214         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
215         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
216     methodUserSetting.append(autoRebootIntf, "AutoReboot");
217 
218     try
219     {
220         auto reply = bus.call(methodOneTime);
221         std::variant<bool> result;
222         reply.read(result);
223         auto autoReboot = std::get<bool>(result);
224 
225         if (!autoReboot)
226         {
227             info("Auto reboot (one-time) disabled");
228             return false;
229         }
230         else
231         {
232             // one-time is true so read the user setting
233             reply = bus.call(methodUserSetting);
234             reply.read(result);
235             autoReboot = std::get<bool>(result);
236         }
237 
238         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
239 
240         if (autoReboot)
241         {
242             if (rebootCounterParam > 0)
243             {
244                 // Reduce BOOTCOUNT by 1
245                 info(
246                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
247                     "BOOTCOUNT", rebootCounterParam);
248                 return true;
249             }
250             else
251             {
252                 // We are at 0 so reset reboot counter and go to quiesce state
253                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
254                 attemptsLeft(reboot::RebootAttempts::retryAttempts());
255 
256                 // Generate log since we will now be sitting in Quiesce
257                 const std::string errorMsg =
258                     "xyz.openbmc_project.State.Error.HostQuiesce";
259                 utils::createError(this->bus, errorMsg,
260                                    sdbusplus::xyz::openbmc_project::Logging::
261                                        server::Entry::Level::Critical);
262 
263                 // Generate BMC dump to assist with debug
264                 utils::createBmcDump(this->bus);
265 
266                 return false;
267             }
268         }
269         else
270         {
271             info("Auto reboot disabled.");
272             return false;
273         }
274     }
275     catch (const sdbusplus::exception_t& e)
276     {
277         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
278         return false;
279     }
280 }
281 
sysStateChangeJobRemoved(sdbusplus::message_t & msg)282 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
283 {
284     uint32_t newStateID{};
285     sdbusplus::message::object_path newStateObjPath;
286     std::string newStateUnit{};
287     std::string newStateResult{};
288 
289     // Read the msg and populate each variable
290     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
291 
292     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
293         (newStateResult == "done") &&
294         (!stateActive(getTarget(server::Host::HostState::Running))))
295     {
296         info("Received signal that host is off");
297         this->currentHostState(server::Host::HostState::Off);
298         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
299         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
300     }
301     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
302              (newStateResult == "done") &&
303              (stateActive(getTarget(server::Host::HostState::Running))))
304     {
305         info("Received signal that host is running");
306         this->currentHostState(server::Host::HostState::Running);
307 
308         // Remove temporary file which is utilized for scenarios where the
309         // BMC is rebooted while the host is still up.
310         // This file is used to indicate to host related systemd services
311         // that the host is already running and they should skip running.
312         // Once the host state is back to running we can clear this file.
313         std::string hostFile = std::format(HOST_RUNNING_FILE, 0);
314         if (std::filesystem::exists(hostFile))
315         {
316             std::filesystem::remove(hostFile);
317         }
318     }
319     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
320              (newStateResult == "done") &&
321              (stateActive(getTarget(server::Host::HostState::Quiesced))))
322     {
323         if (Host::isAutoReboot())
324         {
325             info("Beginning reboot...");
326             Host::requestedHostTransition(server::Host::Transition::Reboot);
327         }
328         else
329         {
330             info("Maintaining quiesce");
331             this->currentHostState(server::Host::HostState::Quiesced);
332         }
333     }
334 }
335 
sysStateChangeJobNew(sdbusplus::message_t & msg)336 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
337 {
338     uint32_t newStateID{};
339     sdbusplus::message::object_path newStateObjPath;
340     std::string newStateUnit{};
341 
342     // Read the msg and populate each variable
343     msg.read(newStateID, newStateObjPath, newStateUnit);
344 
345     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
346     {
347         info("Received signal that host is in diagnostice mode");
348         this->currentHostState(server::Host::HostState::DiagnosticMode);
349     }
350     else if ((newStateUnit == hostCrashTarget) &&
351              (server::Host::currentHostState() ==
352               server::Host::HostState::Running))
353     {
354         // Only decrease the boot count if host was running when the host crash
355         // target was started. Systemd will sometimes trigger multiple
356         // JobNew events for the same target. This seems to be related to
357         // how OpenBMC utilizes the targets in the reboot scenario
358         info("Received signal that host has crashed, decrement reboot count");
359 
360         // A host crash can cause a reboot of the host so decrement the reboot
361         // count
362         decrementRebootCount();
363     }
364 }
365 
decrementRebootCount()366 uint32_t Host::decrementRebootCount()
367 {
368     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
369     if (rebootCount > 0)
370     {
371         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
372     }
373     return rebootCount;
374 }
375 
serialize()376 fs::path Host::serialize()
377 {
378     fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
379     std::ofstream os(path.c_str(), std::ios::binary);
380     cereal::JSONOutputArchive oarchive(os);
381     oarchive(*this);
382     return path;
383 }
384 
deserialize()385 bool Host::deserialize()
386 {
387     fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
388     try
389     {
390         if (fs::exists(path))
391         {
392             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
393             cereal::JSONInputArchive iarchive(is);
394             iarchive(*this);
395             return true;
396         }
397         return false;
398     }
399     catch (const cereal::Exception& e)
400     {
401         error("deserialize exception: {ERROR}", "ERROR", e);
402         fs::remove(path);
403         return false;
404     }
405 }
406 
requestedHostTransition(Transition value)407 Host::Transition Host::requestedHostTransition(Transition value)
408 {
409     info("Host state transition request of {REQ}", "REQ", value);
410 
411 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY
412     if ((value != Transition::Off) && (!utils::isBmcReady(this->bus)))
413     {
414         info("BMC State is not Ready so no host on operations allowed");
415         throw sdbusplus::xyz::openbmc_project::State::Host::Error::
416             BMCNotReady();
417     }
418 #endif
419 
420     // If this is not a power off request then we need to
421     // decrement the reboot counter.  This code should
422     // never prevent a power on, it should just decrement
423     // the count to 0.  The quiesce handling is where the
424     // check of this count will occur
425     if (value != server::Host::Transition::Off)
426     {
427 #ifdef CHECK_FWUPDATE_BEFORE_DO_TRANSITION
428         /*
429          * Do not do transition when the any firmware being updated
430          */
431         if (phosphor::state::manager::utils::isFirmwareUpdating(this->bus))
432         {
433             info("Firmware being updated, reject the transition request");
434             throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
435         }
436 #endif // CHECK_FWUPDATE_BEFORE_DO_TRANSITION
437 
438         decrementRebootCount();
439     }
440 
441     executeTransition(value);
442 
443     auto retVal = server::Host::requestedHostTransition(value);
444 
445     serialize();
446     return retVal;
447 }
448 
bootProgress(ProgressStages value)449 Host::ProgressStages Host::bootProgress(ProgressStages value)
450 {
451     auto retVal = bootprogress::Progress::bootProgress(value);
452     serialize();
453     return retVal;
454 }
455 
operatingSystemState(OSStatus value)456 Host::OSStatus Host::operatingSystemState(OSStatus value)
457 {
458     auto retVal = osstatus::Status::operatingSystemState(value);
459     serialize();
460     return retVal;
461 }
462 
currentHostState(HostState value)463 Host::HostState Host::currentHostState(HostState value)
464 {
465     info("Change to Host State: {STATE}", "STATE", value);
466     return server::Host::currentHostState(value);
467 }
468 
469 } // namespace manager
470 } // namespace state
471 } // namespace phosphor
472