1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <systemd/sd-bus.h>
9 
10 #include <cereal/archives/json.hpp>
11 #include <cereal/cereal.hpp>
12 #include <cereal/types/string.hpp>
13 #include <cereal/types/tuple.hpp>
14 #include <cereal/types/vector.hpp>
15 #include <phosphor-logging/elog-errors.hpp>
16 #include <phosphor-logging/lg2.hpp>
17 #include <sdbusplus/exception.hpp>
18 #include <sdbusplus/server.hpp>
19 #include <xyz/openbmc_project/Common/error.hpp>
20 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
21 #include <xyz/openbmc_project/State/Host/error.hpp>
22 
23 #include <filesystem>
24 #include <format>
25 #include <fstream>
26 #include <iostream>
27 #include <map>
28 #include <set>
29 #include <string>
30 
31 // Register class version with Cereal
32 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
33 
34 namespace phosphor
35 {
36 namespace state
37 {
38 namespace manager
39 {
40 
41 PHOSPHOR_LOG2_USING;
42 
43 // When you see server:: or reboot:: you know we're referencing our base class
44 namespace server = sdbusplus::server::xyz::openbmc_project::state;
45 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot;
46 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot;
47 namespace osstatus =
48     sdbusplus::server::xyz::openbmc_project::state::operating_system;
49 using namespace phosphor::logging;
50 namespace fs = std::filesystem;
51 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
52 
53 constexpr auto ACTIVE_STATE = "active";
54 constexpr auto ACTIVATING_STATE = "activating";
55 
56 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
57 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
58 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
59 
60 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
61 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
62 
63 void Host::determineInitialState()
64 {
65     if (stateActive(getTarget(server::Host::HostState::Running)) ||
66         isHostRunning(id))
67     {
68         info("Initial Host State will be Running");
69         server::Host::currentHostState(HostState::Running, true);
70         server::Host::requestedHostTransition(Transition::On, true);
71     }
72     else
73     {
74         info("Initial Host State will be Off");
75         server::Host::currentHostState(HostState::Off, true);
76         server::Host::requestedHostTransition(Transition::Off, true);
77     }
78 
79     if (!deserialize())
80     {
81         // set to default value.
82         server::Host::requestedHostTransition(Transition::Off, true);
83     }
84     return;
85 }
86 
87 void Host::setupSupportedTransitions()
88 {
89     std::set<Transition> supportedTransitions = {
90         Transition::On,
91         Transition::Off,
92         Transition::Reboot,
93         Transition::GracefulWarmReboot,
94 #if ENABLE_FORCE_WARM_REBOOT
95         Transition::ForceWarmReboot,
96 #endif
97     };
98     server::Host::allowedHostTransitions(supportedTransitions);
99 }
100 
101 void Host::createSystemdTargetMaps()
102 {
103     stateTargetTable = {
104         {HostState::Off, std::format("obmc-host-stop@{}.target", id)},
105         {HostState::Running, std::format("obmc-host-startmin@{}.target", id)},
106         {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)},
107         {HostState::DiagnosticMode,
108          std::format("obmc-host-diagnostic-mode@{}.target", id)}};
109 
110     transitionTargetTable = {
111         {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)},
112         {Transition::On, std::format("obmc-host-start@{}.target", id)},
113         {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)},
114 // Some systems do not support a warm reboot so just map the reboot
115 // requests to our normal cold reboot in that case
116 #if ENABLE_WARM_REBOOT
117         {Transition::GracefulWarmReboot,
118          std::format("obmc-host-warm-reboot@{}.target", id)},
119         {Transition::ForceWarmReboot,
120          std::format("obmc-host-force-warm-reboot@{}.target", id)}
121     };
122 #else
123         {Transition::GracefulWarmReboot,
124          std::format("obmc-host-reboot@{}.target", id)},
125         {Transition::ForceWarmReboot,
126          std::format("obmc-host-reboot@{}.target", id)}
127     };
128 #endif
129     hostCrashTarget = std::format("obmc-host-crash@{}.target", id);
130 }
131 
132 const std::string& Host::getTarget(HostState state)
133 {
134     return stateTargetTable[state];
135 };
136 
137 const std::string& Host::getTarget(Transition tranReq)
138 {
139     return transitionTargetTable[tranReq];
140 };
141 
142 void Host::executeTransition(Transition tranReq)
143 {
144     auto& sysdUnit = getTarget(tranReq);
145 
146     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
147                                             SYSTEMD_INTERFACE, "StartUnit");
148 
149     method.append(sysdUnit);
150     method.append("replace");
151 
152     this->bus.call_noreply(method);
153 
154     return;
155 }
156 
157 bool Host::stateActive(const std::string& target)
158 {
159     std::variant<std::string> currentState;
160     sdbusplus::message::object_path unitTargetPath;
161 
162     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
163                                             SYSTEMD_INTERFACE, "GetUnit");
164 
165     method.append(target);
166 
167     try
168     {
169         auto result = this->bus.call(method);
170         result.read(unitTargetPath);
171     }
172     catch (const sdbusplus::exception_t& e)
173     {
174         error("Error in GetUnit call: {ERROR}", "ERROR", e);
175         return false;
176     }
177 
178     method = this->bus.new_method_call(
179         SYSTEMD_SERVICE,
180         static_cast<const std::string&>(unitTargetPath).c_str(),
181         SYSTEMD_PROPERTY_IFACE, "Get");
182 
183     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
184 
185     try
186     {
187         auto result = this->bus.call(method);
188         result.read(currentState);
189     }
190     catch (const sdbusplus::exception_t& e)
191     {
192         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
193         return false;
194     }
195 
196     const auto& currentStateStr = std::get<std::string>(currentState);
197     return currentStateStr == ACTIVE_STATE ||
198            currentStateStr == ACTIVATING_STATE;
199 }
200 
201 bool Host::isAutoReboot()
202 {
203     using namespace settings;
204 
205     /* The logic here is to first check the one-time AutoReboot setting.
206      * If this property is true (the default) then look at the persistent
207      * user setting in the non one-time object, otherwise honor the one-time
208      * setting and do not auto reboot.
209      */
210     auto methodOneTime = bus.new_method_call(
211         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
212         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
213     methodOneTime.append(autoRebootIntf, "AutoReboot");
214 
215     auto methodUserSetting = bus.new_method_call(
216         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
217         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
218     methodUserSetting.append(autoRebootIntf, "AutoReboot");
219 
220     try
221     {
222         auto reply = bus.call(methodOneTime);
223         std::variant<bool> result;
224         reply.read(result);
225         auto autoReboot = std::get<bool>(result);
226 
227         if (!autoReboot)
228         {
229             info("Auto reboot (one-time) disabled");
230             return false;
231         }
232         else
233         {
234             // one-time is true so read the user setting
235             reply = bus.call(methodUserSetting);
236             reply.read(result);
237             autoReboot = std::get<bool>(result);
238         }
239 
240         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
241 
242         if (autoReboot)
243         {
244             if (rebootCounterParam > 0)
245             {
246                 // Reduce BOOTCOUNT by 1
247                 info(
248                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
249                     "BOOTCOUNT", rebootCounterParam);
250                 return true;
251             }
252             else
253             {
254                 // We are at 0 so reset reboot counter and go to quiesce state
255                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
256                 attemptsLeft(reboot::RebootAttempts::retryAttempts());
257 
258                 // Generate log since we will now be sitting in Quiesce
259                 const std::string errorMsg =
260                     "xyz.openbmc_project.State.Error.HostQuiesce";
261                 utils::createError(this->bus, errorMsg,
262                                    sdbusplus::xyz::openbmc_project::Logging::
263                                        server::Entry::Level::Critical);
264 
265                 // Generate BMC dump to assist with debug
266                 utils::createBmcDump(this->bus);
267 
268                 return false;
269             }
270         }
271         else
272         {
273             info("Auto reboot disabled.");
274             return false;
275         }
276     }
277     catch (const sdbusplus::exception_t& e)
278     {
279         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
280         return false;
281     }
282 }
283 
284 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
285 {
286     uint32_t newStateID{};
287     sdbusplus::message::object_path newStateObjPath;
288     std::string newStateUnit{};
289     std::string newStateResult{};
290 
291     // Read the msg and populate each variable
292     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
293 
294     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
295         (newStateResult == "done") &&
296         (!stateActive(getTarget(server::Host::HostState::Running))))
297     {
298         info("Received signal that host is off");
299         this->currentHostState(server::Host::HostState::Off);
300         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
301         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
302     }
303     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
304              (newStateResult == "done") &&
305              (stateActive(getTarget(server::Host::HostState::Running))))
306     {
307         info("Received signal that host is running");
308         this->currentHostState(server::Host::HostState::Running);
309 
310         // Remove temporary file which is utilized for scenarios where the
311         // BMC is rebooted while the host is still up.
312         // This file is used to indicate to host related systemd services
313         // that the host is already running and they should skip running.
314         // Once the host state is back to running we can clear this file.
315         std::string hostFile = std::format(HOST_RUNNING_FILE, 0);
316         if (std::filesystem::exists(hostFile))
317         {
318             std::filesystem::remove(hostFile);
319         }
320     }
321     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
322              (newStateResult == "done") &&
323              (stateActive(getTarget(server::Host::HostState::Quiesced))))
324     {
325         if (Host::isAutoReboot())
326         {
327             info("Beginning reboot...");
328             Host::requestedHostTransition(server::Host::Transition::Reboot);
329         }
330         else
331         {
332             info("Maintaining quiesce");
333             this->currentHostState(server::Host::HostState::Quiesced);
334         }
335     }
336 }
337 
338 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
339 {
340     uint32_t newStateID{};
341     sdbusplus::message::object_path newStateObjPath;
342     std::string newStateUnit{};
343 
344     // Read the msg and populate each variable
345     msg.read(newStateID, newStateObjPath, newStateUnit);
346 
347     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
348     {
349         info("Received signal that host is in diagnostice mode");
350         this->currentHostState(server::Host::HostState::DiagnosticMode);
351     }
352     else if ((newStateUnit == hostCrashTarget) &&
353              (server::Host::currentHostState() ==
354               server::Host::HostState::Running))
355     {
356         // Only decrease the boot count if host was running when the host crash
357         // target was started. Systemd will sometimes trigger multiple
358         // JobNew events for the same target. This seems to be related to
359         // how OpenBMC utilizes the targets in the reboot scenario
360         info("Received signal that host has crashed, decrement reboot count");
361 
362         // A host crash can cause a reboot of the host so decrement the reboot
363         // count
364         decrementRebootCount();
365     }
366 }
367 
368 uint32_t Host::decrementRebootCount()
369 {
370     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
371     if (rebootCount > 0)
372     {
373         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
374     }
375     return rebootCount;
376 }
377 
378 fs::path Host::serialize()
379 {
380     fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
381     std::ofstream os(path.c_str(), std::ios::binary);
382     cereal::JSONOutputArchive oarchive(os);
383     oarchive(*this);
384     return path;
385 }
386 
387 bool Host::deserialize()
388 {
389     fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
390     try
391     {
392         if (fs::exists(path))
393         {
394             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
395             cereal::JSONInputArchive iarchive(is);
396             iarchive(*this);
397             return true;
398         }
399         return false;
400     }
401     catch (const cereal::Exception& e)
402     {
403         error("deserialize exception: {ERROR}", "ERROR", e);
404         fs::remove(path);
405         return false;
406     }
407 }
408 
409 Host::Transition Host::requestedHostTransition(Transition value)
410 {
411     info("Host state transition request of {REQ}", "REQ", value);
412 
413 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY
414     if ((value != Transition::Off) && (!utils::isBmcReady(this->bus)))
415     {
416         info("BMC State is not Ready so no host on operations allowed");
417         throw sdbusplus::xyz::openbmc_project::State::Host::Error::
418             BMCNotReady();
419     }
420 #endif
421 
422     // If this is not a power off request then we need to
423     // decrement the reboot counter.  This code should
424     // never prevent a power on, it should just decrement
425     // the count to 0.  The quiesce handling is where the
426     // check of this count will occur
427     if (value != server::Host::Transition::Off)
428     {
429         decrementRebootCount();
430     }
431 
432     executeTransition(value);
433 
434     auto retVal = server::Host::requestedHostTransition(value);
435 
436     serialize();
437     return retVal;
438 }
439 
440 Host::ProgressStages Host::bootProgress(ProgressStages value)
441 {
442     auto retVal = bootprogress::Progress::bootProgress(value);
443     serialize();
444     return retVal;
445 }
446 
447 Host::OSStatus Host::operatingSystemState(OSStatus value)
448 {
449     auto retVal = osstatus::Status::operatingSystemState(value);
450     serialize();
451     return retVal;
452 }
453 
454 Host::HostState Host::currentHostState(HostState value)
455 {
456     info("Change to Host State: {STATE}", "STATE", value);
457     return server::Host::currentHostState(value);
458 }
459 
460 } // namespace manager
461 } // namespace state
462 } // namespace phosphor
463