xref: /openbmc/phosphor-state-manager/host_state_manager.cpp (revision 17846992b844c78415a226247dd6d28e4b33b900)
1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <systemd/sd-bus.h>
9 
10 #include <cereal/archives/json.hpp>
11 #include <cereal/cereal.hpp>
12 #include <cereal/types/string.hpp>
13 #include <cereal/types/tuple.hpp>
14 #include <cereal/types/vector.hpp>
15 #include <phosphor-logging/elog-errors.hpp>
16 #include <phosphor-logging/lg2.hpp>
17 #include <sdbusplus/exception.hpp>
18 #include <sdbusplus/server.hpp>
19 #include <xyz/openbmc_project/Common/error.hpp>
20 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
21 #include <xyz/openbmc_project/State/Host/error.hpp>
22 
23 #include <chrono>
24 #include <filesystem>
25 #include <format>
26 #include <fstream>
27 #include <iostream>
28 #include <map>
29 #include <set>
30 #include <string>
31 
32 // Register class version with Cereal
33 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
34 
35 namespace phosphor
36 {
37 namespace state
38 {
39 namespace manager
40 {
41 
42 PHOSPHOR_LOG2_USING;
43 
44 // When you see server:: or reboot:: you know we're referencing our base class
45 namespace server = sdbusplus::server::xyz::openbmc_project::state;
46 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot;
47 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot;
48 namespace osstatus =
49     sdbusplus::server::xyz::openbmc_project::state::operating_system;
50 using namespace phosphor::logging;
51 namespace fs = std::filesystem;
52 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
53 
54 constexpr auto ACTIVE_STATE = "active";
55 constexpr auto ACTIVATING_STATE = "activating";
56 
57 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
58 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
59 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
60 
61 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
62 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
63 
64 constexpr auto AUTO_REBOOT_PROPERTY = "AutoReboot";
65 
determineInitialState()66 void Host::determineInitialState()
67 {
68     if (stateActive(getTarget(server::Host::HostState::Running)) ||
69         isHostRunning(id))
70     {
71         info("Initial Host State will be Running");
72         server::Host::currentHostState(HostState::Running, true);
73         server::Host::requestedHostTransition(Transition::On, true);
74     }
75     else
76     {
77         info("Initial Host State will be Off");
78         server::Host::currentHostState(HostState::Off, true);
79         server::Host::requestedHostTransition(Transition::Off, true);
80     }
81 
82     if (!deserialize())
83     {
84         // set to default value.
85         server::Host::requestedHostTransition(Transition::Off, true);
86         reboot::RebootAttempts::retryAttempts(BOOT_COUNT_MAX_ALLOWED);
87     }
88     return;
89 }
90 
setupSupportedTransitions()91 void Host::setupSupportedTransitions()
92 {
93     std::set<Transition> supportedTransitions = {
94         Transition::On,
95         Transition::Off,
96         Transition::Reboot,
97         Transition::GracefulWarmReboot,
98 #if ENABLE_FORCE_WARM_REBOOT
99         Transition::ForceWarmReboot,
100 #endif
101     };
102     server::Host::allowedHostTransitions(supportedTransitions);
103 }
104 
createSystemdTargetMaps()105 void Host::createSystemdTargetMaps()
106 {
107     stateTargetTable = {
108         {HostState::Off, std::format("obmc-host-stop@{}.target", id)},
109         {HostState::Running, std::format("obmc-host-startmin@{}.target", id)},
110         {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)},
111         {HostState::DiagnosticMode,
112          std::format("obmc-host-diagnostic-mode@{}.target", id)}};
113 
114     transitionTargetTable = {
115         {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)},
116         {Transition::On, std::format("obmc-host-start@{}.target", id)},
117         {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)},
118 // Some systems do not support a warm reboot so just map the reboot
119 // requests to our normal cold reboot in that case
120 #if ENABLE_WARM_REBOOT
121         {Transition::GracefulWarmReboot,
122          std::format("obmc-host-warm-reboot@{}.target", id)},
123         {Transition::ForceWarmReboot,
124          std::format("obmc-host-force-warm-reboot@{}.target", id)}};
125 #else
126         {Transition::GracefulWarmReboot,
127          std::format("obmc-host-reboot@{}.target", id)},
128         {Transition::ForceWarmReboot,
129          std::format("obmc-host-reboot@{}.target", id)}};
130 #endif
131     hostCrashTarget = std::format("obmc-host-crash@{}.target", id);
132 }
133 
getTarget(HostState state)134 const std::string& Host::getTarget(HostState state)
135 {
136     return stateTargetTable[state];
137 };
138 
getTarget(Transition tranReq)139 const std::string& Host::getTarget(Transition tranReq)
140 {
141     return transitionTargetTable[tranReq];
142 };
143 
executeTransition(Transition tranReq)144 void Host::executeTransition(Transition tranReq)
145 {
146     const auto& sysdUnit = getTarget(tranReq);
147 
148     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
149                                             SYSTEMD_INTERFACE, "StartUnit");
150 
151     method.append(sysdUnit);
152     method.append("replace");
153 
154     this->bus.call_noreply(method);
155 
156     return;
157 }
158 
stateActive(const std::string & target)159 bool Host::stateActive(const std::string& target)
160 {
161     std::variant<std::string> currentState;
162     sdbusplus::message::object_path unitTargetPath;
163 
164     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
165                                             SYSTEMD_INTERFACE, "GetUnit");
166 
167     method.append(target);
168 
169     try
170     {
171         auto result = this->bus.call(method);
172         result.read(unitTargetPath);
173     }
174     catch (const sdbusplus::exception_t& e)
175     {
176         error("Error in GetUnit call: {ERROR}", "ERROR", e);
177         return false;
178     }
179 
180     method = this->bus.new_method_call(
181         SYSTEMD_SERVICE,
182         static_cast<const std::string&>(unitTargetPath).c_str(),
183         SYSTEMD_PROPERTY_IFACE, "Get");
184 
185     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
186 
187     try
188     {
189         auto result = this->bus.call(method);
190         result.read(currentState);
191     }
192     catch (const sdbusplus::exception_t& e)
193     {
194         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
195         return false;
196     }
197 
198     const auto& currentStateStr = std::get<std::string>(currentState);
199     return currentStateStr == ACTIVE_STATE ||
200            currentStateStr == ACTIVATING_STATE;
201 }
202 
isAutoReboot()203 bool Host::isAutoReboot()
204 {
205     using namespace settings;
206 
207     /* The logic here is to first check the one-time AutoReboot setting.
208      * If this property is true (the default) then look at the persistent
209      * user setting in the non one-time object, otherwise honor the one-time
210      * setting and do not auto reboot.
211      */
212     auto methodOneTime = bus.new_method_call(
213         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
214         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
215     methodOneTime.append(autoRebootIntf, AUTO_REBOOT_PROPERTY);
216 
217     auto methodUserSetting = bus.new_method_call(
218         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
219         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
220     methodUserSetting.append(autoRebootIntf, AUTO_REBOOT_PROPERTY);
221 
222     try
223     {
224         auto reply = bus.call(methodOneTime);
225         auto result = reply.unpack<std::variant<bool>>();
226 
227         auto autoReboot = std::get<bool>(result);
228 
229         if (!autoReboot)
230         {
231             info("Auto reboot (one-time) disabled");
232             return false;
233         }
234         else
235         {
236             // one-time is true so read the user setting
237             reply = bus.call(methodUserSetting);
238             reply.read(result);
239             autoReboot = std::get<bool>(result);
240         }
241 
242         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
243 
244         if (autoReboot)
245         {
246             if (rebootCounterParam > 0)
247             {
248                 // Reduce BOOTCOUNT by 1
249                 info(
250                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
251                     "BOOTCOUNT", rebootCounterParam);
252                 return true;
253             }
254             else
255             {
256                 // We are at 0 so reset reboot counter and go to quiesce state
257                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
258                 attemptsLeft(reboot::RebootAttempts::retryAttempts());
259 
260                 // Generate log since we will now be sitting in Quiesce
261                 const std::string errorMsg =
262                     "xyz.openbmc_project.State.Error.HostQuiesce";
263                 utils::createError(this->bus, errorMsg,
264                                    sdbusplus::xyz::openbmc_project::Logging::
265                                        server::Entry::Level::Critical);
266 
267                 // Generate BMC dump to assist with debug
268                 utils::createBmcDump(this->bus);
269 
270                 return false;
271             }
272         }
273         else
274         {
275             info("Auto reboot disabled.");
276             return false;
277         }
278     }
279     catch (const sdbusplus::exception_t& e)
280     {
281         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
282         return false;
283     }
284 }
285 
sysStateChangeJobRemoved(sdbusplus::message_t & msg)286 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
287 {
288     uint32_t newStateID{};
289     sdbusplus::message::object_path newStateObjPath;
290     std::string newStateUnit{};
291     std::string newStateResult{};
292 
293     // Read the msg and populate each variable
294     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
295 
296     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
297         (newStateResult == "done") &&
298         (!stateActive(getTarget(server::Host::HostState::Running))))
299     {
300         info("Received signal that host is off");
301         this->currentHostState(server::Host::HostState::Off);
302         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
303         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
304     }
305     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
306              (newStateResult == "done") &&
307              (stateActive(getTarget(server::Host::HostState::Running))))
308     {
309         info("Received signal that host is running");
310         this->currentHostState(server::Host::HostState::Running);
311 
312         // Remove temporary file which is utilized for scenarios where the
313         // BMC is rebooted while the host is still up.
314         // This file is used to indicate to host related systemd services
315         // that the host is already running and they should skip running.
316         // Once the host state is back to running we can clear this file.
317         std::string hostFile = std::format(HOST_RUNNING_FILE, id);
318         if (std::filesystem::exists(hostFile))
319         {
320             try
321             {
322                 std::filesystem::remove(hostFile);
323             }
324             catch (const std::filesystem::filesystem_error& e)
325             {
326                 error("Failed to remove host running file {FILE}: {ERROR}",
327                       "FILE", hostFile, "ERROR", e.what());
328             }
329         }
330     }
331     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
332              (newStateResult == "done") &&
333              (stateActive(getTarget(server::Host::HostState::Quiesced))))
334     {
335         if (Host::isAutoReboot())
336         {
337             info("Beginning reboot...");
338             Host::requestedHostTransition(server::Host::Transition::Reboot);
339         }
340         else
341         {
342             info("Maintaining quiesce");
343             this->currentHostState(server::Host::HostState::Quiesced);
344         }
345     }
346 }
347 
sysStateChangeJobNew(sdbusplus::message_t & msg)348 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
349 {
350     uint32_t newStateID{};
351     sdbusplus::message::object_path newStateObjPath;
352     std::string newStateUnit{};
353 
354     // Read the msg and populate each variable
355     msg.read(newStateID, newStateObjPath, newStateUnit);
356 
357     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
358     {
359         info("Received signal that host is in diagnostice mode");
360         this->currentHostState(server::Host::HostState::DiagnosticMode);
361     }
362     else if ((newStateUnit == hostCrashTarget) &&
363              (server::Host::currentHostState() ==
364               server::Host::HostState::Running))
365     {
366         // Only decrease the boot count if host was running when the host crash
367         // target was started. Systemd will sometimes trigger multiple
368         // JobNew events for the same target. This seems to be related to
369         // how OpenBMC utilizes the targets in the reboot scenario
370         info("Received signal that host has crashed, decrement reboot count");
371 
372         // A host crash can cause a reboot of the host so decrement the reboot
373         // count
374         decrementRebootCount();
375     }
376 }
377 
decrementRebootCount()378 uint32_t Host::decrementRebootCount()
379 {
380     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
381     if (rebootCount > 0)
382     {
383         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
384     }
385     return rebootCount;
386 }
387 
serialize()388 fs::path Host::serialize()
389 {
390     fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
391     std::ofstream os(path.c_str(), std::ios::binary);
392     cereal::JSONOutputArchive oarchive(os);
393     oarchive(*this);
394     return path;
395 }
396 
deserialize()397 bool Host::deserialize()
398 {
399     fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
400     try
401     {
402         if (fs::exists(path))
403         {
404             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
405             cereal::JSONInputArchive iarchive(is);
406             iarchive(*this);
407             return true;
408         }
409         return false;
410     }
411     catch (const cereal::Exception& e)
412     {
413         error("deserialize exception: {ERROR}", "ERROR", e);
414         fs::remove(path);
415         return false;
416     }
417 }
418 
requestedHostTransition(Transition value)419 Host::Transition Host::requestedHostTransition(Transition value)
420 {
421     info("Host{HOST_ID} state transition request of {REQ}", "HOST_ID", id,
422          "REQ", value);
423 
424 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY
425     if ((value != Transition::Off) && (!utils::isBmcReady(this->bus)))
426     {
427         info("BMC State is not Ready so no host on operations allowed");
428         throw sdbusplus::xyz::openbmc_project::State::Host::Error::
429             BMCNotReady();
430     }
431 #endif
432 
433     // If this is not a power off request then we need to
434     // decrement the reboot counter.  This code should
435     // never prevent a power on, it should just decrement
436     // the count to 0.  The quiesce handling is where the
437     // check of this count will occur
438     if (value != server::Host::Transition::Off)
439     {
440 #ifdef CHECK_FWUPDATE_BEFORE_DO_TRANSITION
441         /*
442          * Do not do transition when the any firmware being updated
443          */
444         if (phosphor::state::manager::utils::isFirmwareUpdating(this->bus))
445         {
446             info("Firmware being updated, reject the transition request");
447             throw sdbusplus::xyz::openbmc_project::Common::Error::Unavailable();
448         }
449 #endif // CHECK_FWUPDATE_BEFORE_DO_TRANSITION
450 
451         decrementRebootCount();
452     }
453 
454     executeTransition(value);
455 
456     auto retVal = server::Host::requestedHostTransition(value);
457 
458     serialize();
459     return retVal;
460 }
461 
bootProgress(ProgressStages value)462 Host::ProgressStages Host::bootProgress(ProgressStages value)
463 {
464     auto retVal = bootprogress::Progress::bootProgress(value);
465 
466     // Update the BootProgressLastUpdate anytime BootProgress is updated
467     auto timeStamp = std::chrono::duration_cast<std::chrono::microseconds>(
468                          std::chrono::system_clock::now().time_since_epoch())
469                          .count();
470     this->bootProgressLastUpdate(timeStamp);
471     serialize();
472     return retVal;
473 }
474 
bootProgressLastUpdate(uint64_t value)475 uint64_t Host::bootProgressLastUpdate(uint64_t value)
476 {
477     auto retVal = bootprogress::Progress::bootProgressLastUpdate(value);
478     serialize();
479     return retVal;
480 }
481 
operatingSystemState(OSStatus value)482 Host::OSStatus Host::operatingSystemState(OSStatus value)
483 {
484     auto retVal = osstatus::Status::operatingSystemState(value);
485     serialize();
486     return retVal;
487 }
488 
currentHostState(HostState value)489 Host::HostState Host::currentHostState(HostState value)
490 {
491     info("Change to Host{HOST_ID} State: {STATE}", "HOST_ID", id, "STATE",
492          value);
493     return server::Host::currentHostState(value);
494 }
495 
496 } // namespace manager
497 } // namespace state
498 } // namespace phosphor
499