1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <fmt/format.h>
9 #include <stdio.h>
10 #include <systemd/sd-bus.h>
11 
12 #include <cereal/archives/json.hpp>
13 #include <cereal/cereal.hpp>
14 #include <cereal/types/string.hpp>
15 #include <cereal/types/tuple.hpp>
16 #include <cereal/types/vector.hpp>
17 #include <phosphor-logging/elog-errors.hpp>
18 #include <phosphor-logging/lg2.hpp>
19 #include <sdbusplus/exception.hpp>
20 #include <sdbusplus/server.hpp>
21 #include <xyz/openbmc_project/Common/error.hpp>
22 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
23 #include <xyz/openbmc_project/State/Host/error.hpp>
24 
25 #include <filesystem>
26 #include <fstream>
27 #include <iostream>
28 #include <map>
29 #include <string>
30 
31 // Register class version with Cereal
32 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
33 
34 namespace phosphor
35 {
36 namespace state
37 {
38 namespace manager
39 {
40 
41 PHOSPHOR_LOG2_USING;
42 
43 // When you see server:: or reboot:: you know we're referencing our base class
44 namespace server = sdbusplus::server::xyz::openbmc_project::state;
45 namespace reboot = sdbusplus::server::xyz::openbmc_project::control::boot;
46 namespace bootprogress = sdbusplus::server::xyz::openbmc_project::state::boot;
47 namespace osstatus =
48     sdbusplus::server::xyz::openbmc_project::state::operating_system;
49 using namespace phosphor::logging;
50 namespace fs = std::filesystem;
51 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
52 
53 constexpr auto ACTIVE_STATE = "active";
54 constexpr auto ACTIVATING_STATE = "activating";
55 
56 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
57 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
58 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
59 
60 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
61 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
62 
63 void Host::determineInitialState()
64 {
65     if (stateActive(getTarget(server::Host::HostState::Running)) ||
66         isHostRunning(id))
67     {
68         info("Initial Host State will be Running");
69         server::Host::currentHostState(HostState::Running);
70         server::Host::requestedHostTransition(Transition::On);
71     }
72     else
73     {
74         info("Initial Host State will be Off");
75         server::Host::currentHostState(HostState::Off);
76         server::Host::requestedHostTransition(Transition::Off);
77     }
78 
79     if (!deserialize())
80     {
81         // set to default value.
82         server::Host::requestedHostTransition(Transition::Off);
83     }
84     return;
85 }
86 
87 void Host::createSystemdTargetMaps()
88 {
89     stateTargetTable = {
90         {HostState::Off, fmt::format("obmc-host-stop@{}.target", id)},
91         {HostState::Running, fmt::format("obmc-host-startmin@{}.target", id)},
92         {HostState::Quiesced, fmt::format("obmc-host-quiesce@{}.target", id)},
93         {HostState::DiagnosticMode,
94          fmt::format("obmc-host-diagnostic-mode@{}.target", id)}};
95 
96     transitionTargetTable = {
97         {Transition::Off, fmt::format("obmc-host-shutdown@{}.target", id)},
98         {Transition::On, fmt::format("obmc-host-start@{}.target", id)},
99         {Transition::Reboot, fmt::format("obmc-host-reboot@{}.target", id)},
100 // Some systems do not support a warm reboot so just map the reboot
101 // requests to our normal cold reboot in that case
102 #if ENABLE_WARM_REBOOT
103         {Transition::GracefulWarmReboot,
104          fmt::format("obmc-host-warm-reboot@{}.target", id)},
105         {Transition::ForceWarmReboot,
106          fmt::format("obmc-host-force-warm-reboot@{}.target", id)}
107     };
108 #else
109         {Transition::GracefulWarmReboot,
110          fmt::format("obmc-host-reboot@{}.target", id)},
111         {Transition::ForceWarmReboot,
112          fmt::format("obmc-host-reboot@{}.target", id)}
113     };
114 #endif
115     hostCrashTarget = fmt::format("obmc-host-crash@{}.target", id);
116 }
117 
118 const std::string& Host::getTarget(HostState state)
119 {
120     return stateTargetTable[state];
121 };
122 
123 const std::string& Host::getTarget(Transition tranReq)
124 {
125     return transitionTargetTable[tranReq];
126 };
127 
128 void Host::executeTransition(Transition tranReq)
129 {
130     auto& sysdUnit = getTarget(tranReq);
131 
132     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
133                                             SYSTEMD_INTERFACE, "StartUnit");
134 
135     method.append(sysdUnit);
136     method.append("replace");
137 
138     this->bus.call_noreply(method);
139 
140     return;
141 }
142 
143 bool Host::stateActive(const std::string& target)
144 {
145     std::variant<std::string> currentState;
146     sdbusplus::message::object_path unitTargetPath;
147 
148     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
149                                             SYSTEMD_INTERFACE, "GetUnit");
150 
151     method.append(target);
152 
153     try
154     {
155         auto result = this->bus.call(method);
156         result.read(unitTargetPath);
157     }
158     catch (const sdbusplus::exception_t& e)
159     {
160         error("Error in GetUnit call: {ERROR}", "ERROR", e);
161         return false;
162     }
163 
164     method = this->bus.new_method_call(
165         SYSTEMD_SERVICE,
166         static_cast<const std::string&>(unitTargetPath).c_str(),
167         SYSTEMD_PROPERTY_IFACE, "Get");
168 
169     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
170 
171     try
172     {
173         auto result = this->bus.call(method);
174         result.read(currentState);
175     }
176     catch (const sdbusplus::exception_t& e)
177     {
178         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
179         return false;
180     }
181 
182     const auto& currentStateStr = std::get<std::string>(currentState);
183     return currentStateStr == ACTIVE_STATE ||
184            currentStateStr == ACTIVATING_STATE;
185 }
186 
187 bool Host::isAutoReboot()
188 {
189     using namespace settings;
190 
191     /* The logic here is to first check the one-time AutoReboot setting.
192      * If this property is true (the default) then look at the persistent
193      * user setting in the non one-time object, otherwise honor the one-time
194      * setting and do not auto reboot.
195      */
196     auto methodOneTime = bus.new_method_call(
197         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
198         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
199     methodOneTime.append(autoRebootIntf, "AutoReboot");
200 
201     auto methodUserSetting = bus.new_method_call(
202         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
203         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
204     methodUserSetting.append(autoRebootIntf, "AutoReboot");
205 
206     try
207     {
208         auto reply = bus.call(methodOneTime);
209         std::variant<bool> result;
210         reply.read(result);
211         auto autoReboot = std::get<bool>(result);
212 
213         if (!autoReboot)
214         {
215             info("Auto reboot (one-time) disabled");
216             return false;
217         }
218         else
219         {
220             // one-time is true so read the user setting
221             reply = bus.call(methodUserSetting);
222             reply.read(result);
223             autoReboot = std::get<bool>(result);
224         }
225 
226         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
227 
228         if (autoReboot)
229         {
230             if (rebootCounterParam > 0)
231             {
232                 // Reduce BOOTCOUNT by 1
233                 info(
234                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
235                     "BOOTCOUNT", rebootCounterParam);
236                 return true;
237             }
238             else
239             {
240                 // We are at 0 so reset reboot counter and go to quiesce state
241                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
242                 attemptsLeft(reboot::RebootAttempts::retryAttempts());
243 
244                 // Generate log since we will now be sitting in Quiesce
245                 const std::string errorMsg =
246                     "xyz.openbmc_project.State.Error.HostQuiesce";
247                 utils::createError(this->bus, errorMsg,
248                                    sdbusplus::xyz::openbmc_project::Logging::
249                                        server::Entry::Level::Critical);
250 
251                 // Generate BMC dump to assist with debug
252                 utils::createBmcDump(this->bus);
253 
254                 return false;
255             }
256         }
257         else
258         {
259             info("Auto reboot disabled.");
260             return false;
261         }
262     }
263     catch (const sdbusplus::exception_t& e)
264     {
265         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
266         return false;
267     }
268 }
269 
270 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
271 {
272     uint32_t newStateID{};
273     sdbusplus::message::object_path newStateObjPath;
274     std::string newStateUnit{};
275     std::string newStateResult{};
276 
277     // Read the msg and populate each variable
278     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
279 
280     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
281         (newStateResult == "done") &&
282         (!stateActive(getTarget(server::Host::HostState::Running))))
283     {
284         info("Received signal that host is off");
285         this->currentHostState(server::Host::HostState::Off);
286         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
287         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
288     }
289     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
290              (newStateResult == "done") &&
291              (stateActive(getTarget(server::Host::HostState::Running))))
292     {
293         info("Received signal that host is running");
294         this->currentHostState(server::Host::HostState::Running);
295 
296         // Remove temporary file which is utilized for scenarios where the
297         // BMC is rebooted while the host is still up.
298         // This file is used to indicate to host related systemd services
299         // that the host is already running and they should skip running.
300         // Once the host state is back to running we can clear this file.
301         auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0);
302         size++; // null
303         std::unique_ptr<char[]> hostFile(new char[size]);
304         std::snprintf(hostFile.get(), size, HOST_RUNNING_FILE, 0);
305         if (std::filesystem::exists(hostFile.get()))
306         {
307             std::filesystem::remove(hostFile.get());
308         }
309     }
310     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
311              (newStateResult == "done") &&
312              (stateActive(getTarget(server::Host::HostState::Quiesced))))
313     {
314         if (Host::isAutoReboot())
315         {
316             info("Beginning reboot...");
317             Host::requestedHostTransition(server::Host::Transition::Reboot);
318         }
319         else
320         {
321             info("Maintaining quiesce");
322             this->currentHostState(server::Host::HostState::Quiesced);
323         }
324     }
325 }
326 
327 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
328 {
329     uint32_t newStateID{};
330     sdbusplus::message::object_path newStateObjPath;
331     std::string newStateUnit{};
332 
333     // Read the msg and populate each variable
334     msg.read(newStateID, newStateObjPath, newStateUnit);
335 
336     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
337     {
338         info("Received signal that host is in diagnostice mode");
339         this->currentHostState(server::Host::HostState::DiagnosticMode);
340     }
341     else if ((newStateUnit == hostCrashTarget) &&
342              (server::Host::currentHostState() ==
343               server::Host::HostState::Running))
344     {
345         // Only decrease the boot count if host was running when the host crash
346         // target was started. Systemd will sometimes trigger multiple
347         // JobNew events for the same target. This seems to be related to
348         // how OpenBMC utilizes the targets in the reboot scenario
349         info("Received signal that host has crashed, decrement reboot count");
350 
351         // A host crash can cause a reboot of the host so decrement the reboot
352         // count
353         decrementRebootCount();
354     }
355 }
356 
357 uint32_t Host::decrementRebootCount()
358 {
359     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
360     if (rebootCount > 0)
361     {
362         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
363     }
364     return rebootCount;
365 }
366 
367 fs::path Host::serialize()
368 {
369     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
370     std::ofstream os(path.c_str(), std::ios::binary);
371     cereal::JSONOutputArchive oarchive(os);
372     oarchive(*this);
373     return path;
374 }
375 
376 bool Host::deserialize()
377 {
378     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
379     try
380     {
381         if (fs::exists(path))
382         {
383             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
384             cereal::JSONInputArchive iarchive(is);
385             iarchive(*this);
386             return true;
387         }
388         return false;
389     }
390     catch (const cereal::Exception& e)
391     {
392         error("deserialize exception: {ERROR}", "ERROR", e);
393         fs::remove(path);
394         return false;
395     }
396 }
397 
398 Host::Transition Host::requestedHostTransition(Transition value)
399 {
400     info("Host state transition request of {REQ}", "REQ", value);
401 
402 #if ONLY_ALLOW_BOOT_WHEN_BMC_READY
403     if ((value != Transition::Off) && (!utils::isBmcReady(this->bus)))
404     {
405         info("BMC State is not Ready so no host on operations allowed");
406         throw sdbusplus::xyz::openbmc_project::State::Host::Error::
407             BMCNotReady();
408     }
409 #endif
410 
411     // If this is not a power off request then we need to
412     // decrement the reboot counter.  This code should
413     // never prevent a power on, it should just decrement
414     // the count to 0.  The quiesce handling is where the
415     // check of this count will occur
416     if (value != server::Host::Transition::Off)
417     {
418         decrementRebootCount();
419     }
420 
421     executeTransition(value);
422 
423     auto retVal = server::Host::requestedHostTransition(value);
424 
425     serialize();
426     return retVal;
427 }
428 
429 Host::ProgressStages Host::bootProgress(ProgressStages value)
430 {
431     auto retVal = bootprogress::Progress::bootProgress(value);
432     serialize();
433     return retVal;
434 }
435 
436 Host::OSStatus Host::operatingSystemState(OSStatus value)
437 {
438     auto retVal = osstatus::Status::operatingSystemState(value);
439     serialize();
440     return retVal;
441 }
442 
443 Host::HostState Host::currentHostState(HostState value)
444 {
445     info("Change to Host State: {STATE}", "STATE", value);
446     return server::Host::currentHostState(value);
447 }
448 
449 } // namespace manager
450 } // namespace state
451 } // namespace phosphor
452