1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <stdio.h>
9 #include <systemd/sd-bus.h>
10 
11 #include <cereal/archives/json.hpp>
12 #include <cereal/cereal.hpp>
13 #include <cereal/types/string.hpp>
14 #include <cereal/types/tuple.hpp>
15 #include <cereal/types/vector.hpp>
16 #include <phosphor-logging/elog-errors.hpp>
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/exception.hpp>
19 #include <sdbusplus/server.hpp>
20 #include <xyz/openbmc_project/Common/error.hpp>
21 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
22 #include <xyz/openbmc_project/State/Host/error.hpp>
23 
24 #include <filesystem>
25 #include <format>
26 #include <fstream>
27 #include <iostream>
28 #include <map>
29 #include <set>
30 #include <string>
31 
32 // Register class version with Cereal
33 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
34 
35 namespace phosphor
36 {
37 namespace state
38 {
39 namespace manager
40 {
41 
42 PHOSPHOR_LOG2_USING;
43 
44 // When you see server:: or reboot:: you know we're referencing our base class
45 namespace server = sdbusplus::server::xyz::openbmc_project::state;
46 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot;
47 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot;
48 namespace osstatus =
49     sdbusplus::server::xyz::openbmc_project::state::operating_system;
50 using namespace phosphor::logging;
51 namespace fs = std::filesystem;
52 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
53 
54 constexpr auto ACTIVE_STATE = "active";
55 constexpr auto ACTIVATING_STATE = "activating";
56 
57 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
58 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
59 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
60 
61 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
62 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
63 
64 void Host::determineInitialState()
65 {
66     if (stateActive(getTarget(server::Host::HostState::Running)) ||
67         isHostRunning(id))
68     {
69         info("Initial Host State will be Running");
70         server::Host::currentHostState(HostState::Running);
71         server::Host::requestedHostTransition(Transition::On);
72     }
73     else
74     {
75         info("Initial Host State will be Off");
76         server::Host::currentHostState(HostState::Off);
77         server::Host::requestedHostTransition(Transition::Off);
78     }
79 
80     if (!deserialize())
81     {
82         // set to default value.
83         server::Host::requestedHostTransition(Transition::Off);
84     }
85     return;
86 }
87 
88 void Host::setupSupportedTransitions()
89 {
90     std::set<Transition> supportedTransitions = {
91         Transition::On,
92         Transition::Off,
93         Transition::Reboot,
94         Transition::GracefulWarmReboot,
95 #if ENABLE_FORCE_WARM_REBOOT
96         Transition::ForceWarmReboot,
97 #endif
98     };
99     server::Host::allowedHostTransitions(supportedTransitions);
100 }
101 
102 void Host::createSystemdTargetMaps()
103 {
104     stateTargetTable = {
105         {HostState::Off, std::format("obmc-host-stop@{}.target", id)},
106         {HostState::Running, std::format("obmc-host-startmin@{}.target", id)},
107         {HostState::Quiesced, std::format("obmc-host-quiesce@{}.target", id)},
108         {HostState::DiagnosticMode,
109          std::format("obmc-host-diagnostic-mode@{}.target", id)}};
110 
111     transitionTargetTable = {
112         {Transition::Off, std::format("obmc-host-shutdown@{}.target", id)},
113         {Transition::On, std::format("obmc-host-start@{}.target", id)},
114         {Transition::Reboot, std::format("obmc-host-reboot@{}.target", id)},
115 // Some systems do not support a warm reboot so just map the reboot
116 // requests to our normal cold reboot in that case
117 #if ENABLE_WARM_REBOOT
118         {Transition::GracefulWarmReboot,
119          std::format("obmc-host-warm-reboot@{}.target", id)},
120         {Transition::ForceWarmReboot,
121          std::format("obmc-host-force-warm-reboot@{}.target", id)}
122     };
123 #else
124         {Transition::GracefulWarmReboot,
125          std::format("obmc-host-reboot@{}.target", id)},
126         {Transition::ForceWarmReboot,
127          std::format("obmc-host-reboot@{}.target", id)}
128     };
129 #endif
130     hostCrashTarget = std::format("obmc-host-crash@{}.target", id);
131 }
132 
133 const std::string& Host::getTarget(HostState state)
134 {
135     return stateTargetTable[state];
136 };
137 
138 const std::string& Host::getTarget(Transition tranReq)
139 {
140     return transitionTargetTable[tranReq];
141 };
142 
143 void Host::executeTransition(Transition tranReq)
144 {
145     auto& sysdUnit = getTarget(tranReq);
146 
147     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
148                                             SYSTEMD_INTERFACE, "StartUnit");
149 
150     method.append(sysdUnit);
151     method.append("replace");
152 
153     this->bus.call_noreply(method);
154 
155     return;
156 }
157 
158 bool Host::stateActive(const std::string& target)
159 {
160     std::variant<std::string> currentState;
161     sdbusplus::message::object_path unitTargetPath;
162 
163     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
164                                             SYSTEMD_INTERFACE, "GetUnit");
165 
166     method.append(target);
167 
168     try
169     {
170         auto result = this->bus.call(method);
171         result.read(unitTargetPath);
172     }
173     catch (const sdbusplus::exception_t& e)
174     {
175         error("Error in GetUnit call: {ERROR}", "ERROR", e);
176         return false;
177     }
178 
179     method = this->bus.new_method_call(
180         SYSTEMD_SERVICE,
181         static_cast<const std::string&>(unitTargetPath).c_str(),
182         SYSTEMD_PROPERTY_IFACE, "Get");
183 
184     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
185 
186     try
187     {
188         auto result = this->bus.call(method);
189         result.read(currentState);
190     }
191     catch (const sdbusplus::exception_t& e)
192     {
193         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
194         return false;
195     }
196 
197     const auto& currentStateStr = std::get<std::string>(currentState);
198     return currentStateStr == ACTIVE_STATE ||
199            currentStateStr == ACTIVATING_STATE;
200 }
201 
202 bool Host::isAutoReboot()
203 {
204     using namespace settings;
205 
206     /* The logic here is to first check the one-time AutoReboot setting.
207      * If this property is true (the default) then look at the persistent
208      * user setting in the non one-time object, otherwise honor the one-time
209      * setting and do not auto reboot.
210      */
211     auto methodOneTime = bus.new_method_call(
212         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
213         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
214     methodOneTime.append(autoRebootIntf, "AutoReboot");
215 
216     auto methodUserSetting = bus.new_method_call(
217         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
218         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
219     methodUserSetting.append(autoRebootIntf, "AutoReboot");
220 
221     try
222     {
223         auto reply = bus.call(methodOneTime);
224         std::variant<bool> result;
225         reply.read(result);
226         auto autoReboot = std::get<bool>(result);
227 
228         if (!autoReboot)
229         {
230             info("Auto reboot (one-time) disabled");
231             return false;
232         }
233         else
234         {
235             // one-time is true so read the user setting
236             reply = bus.call(methodUserSetting);
237             reply.read(result);
238             autoReboot = std::get<bool>(result);
239         }
240 
241         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
242 
243         if (autoReboot)
244         {
245             if (rebootCounterParam > 0)
246             {
247                 // Reduce BOOTCOUNT by 1
248                 info(
249                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
250                     "BOOTCOUNT", rebootCounterParam);
251                 return true;
252             }
253             else
254             {
255                 // We are at 0 so reset reboot counter and go to quiesce state
256                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
257                 attemptsLeft(reboot::RebootAttempts::retryAttempts());
258 
259                 // Generate log since we will now be sitting in Quiesce
260                 const std::string errorMsg =
261                     "xyz.openbmc_project.State.Error.HostQuiesce";
262                 utils::createError(this->bus, errorMsg,
263                                    sdbusplus::xyz::openbmc_project::Logging::
264                                        server::Entry::Level::Critical);
265 
266                 // Generate BMC dump to assist with debug
267                 utils::createBmcDump(this->bus);
268 
269                 return false;
270             }
271         }
272         else
273         {
274             info("Auto reboot disabled.");
275             return false;
276         }
277     }
278     catch (const sdbusplus::exception_t& e)
279     {
280         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
281         return false;
282     }
283 }
284 
285 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
286 {
287     uint32_t newStateID{};
288     sdbusplus::message::object_path newStateObjPath;
289     std::string newStateUnit{};
290     std::string newStateResult{};
291 
292     // Read the msg and populate each variable
293     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
294 
295     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
296         (newStateResult == "done") &&
297         (!stateActive(getTarget(server::Host::HostState::Running))))
298     {
299         info("Received signal that host is off");
300         this->currentHostState(server::Host::HostState::Off);
301         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
302         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
303     }
304     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
305              (newStateResult == "done") &&
306              (stateActive(getTarget(server::Host::HostState::Running))))
307     {
308         info("Received signal that host is running");
309         this->currentHostState(server::Host::HostState::Running);
310 
311         // Remove temporary file which is utilized for scenarios where the
312         // BMC is rebooted while the host is still up.
313         // This file is used to indicate to host related systemd services
314         // that the host is already running and they should skip running.
315         // Once the host state is back to running we can clear this file.
316         std::string hostFile = std::format(HOST_RUNNING_FILE, 0);
317         if (std::filesystem::exists(hostFile))
318         {
319             std::filesystem::remove(hostFile);
320         }
321     }
322     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
323              (newStateResult == "done") &&
324              (stateActive(getTarget(server::Host::HostState::Quiesced))))
325     {
326         if (Host::isAutoReboot())
327         {
328             info("Beginning reboot...");
329             Host::requestedHostTransition(server::Host::Transition::Reboot);
330         }
331         else
332         {
333             info("Maintaining quiesce");
334             this->currentHostState(server::Host::HostState::Quiesced);
335         }
336     }
337 }
338 
339 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
340 {
341     uint32_t newStateID{};
342     sdbusplus::message::object_path newStateObjPath;
343     std::string newStateUnit{};
344 
345     // Read the msg and populate each variable
346     msg.read(newStateID, newStateObjPath, newStateUnit);
347 
348     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
349     {
350         info("Received signal that host is in diagnostice mode");
351         this->currentHostState(server::Host::HostState::DiagnosticMode);
352     }
353     else if ((newStateUnit == hostCrashTarget) &&
354              (server::Host::currentHostState() ==
355               server::Host::HostState::Running))
356     {
357         // Only decrease the boot count if host was running when the host crash
358         // target was started. Systemd will sometimes trigger multiple
359         // JobNew events for the same target. This seems to be related to
360         // how OpenBMC utilizes the targets in the reboot scenario
361         info("Received signal that host has crashed, decrement reboot count");
362 
363         // A host crash can cause a reboot of the host so decrement the reboot
364         // count
365         decrementRebootCount();
366     }
367 }
368 
369 uint32_t Host::decrementRebootCount()
370 {
371     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
372     if (rebootCount > 0)
373     {
374         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
375     }
376     return rebootCount;
377 }
378 
379 fs::path Host::serialize()
380 {
381     fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
382     std::ofstream os(path.c_str(), std::ios::binary);
383     cereal::JSONOutputArchive oarchive(os);
384     oarchive(*this);
385     return path;
386 }
387 
388 bool Host::deserialize()
389 {
390     fs::path path{std::format(HOST_STATE_PERSIST_PATH, id)};
391     try
392     {
393         if (fs::exists(path))
394         {
395             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
396             cereal::JSONInputArchive iarchive(is);
397             iarchive(*this);
398             return true;
399         }
400         return false;
401     }
402     catch (const cereal::Exception& e)
403     {
404         error("deserialize exception: {ERROR}", "ERROR", e);
405         fs::remove(path);
406         return false;
407     }
408 }
409 
410 Host::Transition Host::requestedHostTransition(Transition value)
411 {
412     info("Host state transition request of {REQ}", "REQ", value);
413 
414 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY
415     if ((value != Transition::Off) && (!utils::isBmcReady(this->bus)))
416     {
417         info("BMC State is not Ready so no host on operations allowed");
418         throw sdbusplus::xyz::openbmc_project::State::Host::Error::
419             BMCNotReady();
420     }
421 #endif
422 
423     // If this is not a power off request then we need to
424     // decrement the reboot counter.  This code should
425     // never prevent a power on, it should just decrement
426     // the count to 0.  The quiesce handling is where the
427     // check of this count will occur
428     if (value != server::Host::Transition::Off)
429     {
430         decrementRebootCount();
431     }
432 
433     executeTransition(value);
434 
435     auto retVal = server::Host::requestedHostTransition(value);
436 
437     serialize();
438     return retVal;
439 }
440 
441 Host::ProgressStages Host::bootProgress(ProgressStages value)
442 {
443     auto retVal = bootprogress::Progress::bootProgress(value);
444     serialize();
445     return retVal;
446 }
447 
448 Host::OSStatus Host::operatingSystemState(OSStatus value)
449 {
450     auto retVal = osstatus::Status::operatingSystemState(value);
451     serialize();
452     return retVal;
453 }
454 
455 Host::HostState Host::currentHostState(HostState value)
456 {
457     info("Change to Host State: {STATE}", "STATE", value);
458     return server::Host::currentHostState(value);
459 }
460 
461 } // namespace manager
462 } // namespace state
463 } // namespace phosphor
464