1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <fmt/format.h>
9 #include <stdio.h>
10 #include <systemd/sd-bus.h>
11 
12 #include <cereal/archives/json.hpp>
13 #include <cereal/cereal.hpp>
14 #include <cereal/types/string.hpp>
15 #include <cereal/types/tuple.hpp>
16 #include <cereal/types/vector.hpp>
17 #include <phosphor-logging/elog-errors.hpp>
18 #include <phosphor-logging/lg2.hpp>
19 #include <sdbusplus/exception.hpp>
20 #include <sdbusplus/server.hpp>
21 #include <xyz/openbmc_project/Common/error.hpp>
22 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
23 
24 #include <filesystem>
25 #include <fstream>
26 #include <iostream>
27 #include <map>
28 #include <string>
29 
30 // Register class version with Cereal
31 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
32 
33 namespace phosphor
34 {
35 namespace state
36 {
37 namespace manager
38 {
39 
40 PHOSPHOR_LOG2_USING;
41 
42 // When you see server:: or reboot:: you know we're referencing our base class
43 namespace server = sdbusplus::xyz::openbmc_project::State::server;
44 namespace reboot = sdbusplus::xyz::openbmc_project::Control::Boot::server;
45 namespace bootprogress = sdbusplus::xyz::openbmc_project::State::Boot::server;
46 namespace osstatus =
47     sdbusplus::xyz::openbmc_project::State::OperatingSystem::server;
48 using namespace phosphor::logging;
49 namespace fs = std::filesystem;
50 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
51 
52 constexpr auto ACTIVE_STATE = "active";
53 constexpr auto ACTIVATING_STATE = "activating";
54 
55 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
56 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
57 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
58 
59 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
60 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
61 
62 void Host::subscribeToSystemdSignals()
63 {
64     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
65                                             SYSTEMD_INTERFACE, "Subscribe");
66     try
67     {
68         this->bus.call_noreply(method);
69     }
70     catch (const sdbusplus::exception::exception& e)
71     {
72         error("Failed to subscribe to systemd signals: {ERROR}", "ERROR", e);
73         elog<InternalFailure>();
74     }
75     return;
76 }
77 
78 void Host::determineInitialState()
79 {
80 
81     if (stateActive(getTarget(server::Host::HostState::Running)) ||
82         isHostRunning(id))
83     {
84         info("Initial Host State will be Running");
85         server::Host::currentHostState(HostState::Running);
86         server::Host::requestedHostTransition(Transition::On);
87     }
88     else
89     {
90         info("Initial Host State will be Off");
91         server::Host::currentHostState(HostState::Off);
92         server::Host::requestedHostTransition(Transition::Off);
93     }
94 
95     if (!deserialize())
96     {
97         // set to default value.
98         server::Host::requestedHostTransition(Transition::Off);
99     }
100     return;
101 }
102 
103 void Host::createSystemdTargetMaps()
104 {
105     stateTargetTable = {
106         {HostState::Off, fmt::format("obmc-host-stop@{}.target", id)},
107         {HostState::Running, fmt::format("obmc-host-startmin@{}.target", id)},
108         {HostState::Quiesced, fmt::format("obmc-host-quiesce@{}.target", id)},
109         {HostState::DiagnosticMode,
110          fmt::format("obmc-host-diagnostic-mode@{}.target", id)}};
111 
112     transitionTargetTable = {
113         {Transition::Off, fmt::format("obmc-host-shutdown@{}.target", id)},
114         {Transition::On, fmt::format("obmc-host-start@{}.target", id)},
115         {Transition::Reboot, fmt::format("obmc-host-reboot@{}.target", id)},
116 // Some systems do not support a warm reboot so just map the reboot
117 // requests to our normal cold reboot in that case
118 #if ENABLE_WARM_REBOOT
119         {Transition::GracefulWarmReboot,
120          fmt::format("obmc-host-warm-reboot@{}.target", id)},
121         {Transition::ForceWarmReboot,
122          fmt::format("obmc-host-force-warm-reboot@{}.target", id)}
123     };
124 #else
125         {Transition::GracefulWarmReboot,
126          fmt::format("obmc-host-reboot@{}.target", id)},
127         {Transition::ForceWarmReboot,
128          fmt::format("obmc-host-reboot@{}.target", id)}
129     };
130 #endif
131 }
132 
133 const std::string& Host::getTarget(HostState state)
134 {
135     return stateTargetTable[state];
136 };
137 
138 const std::string& Host::getTarget(Transition tranReq)
139 {
140     return transitionTargetTable[tranReq];
141 };
142 
143 void Host::executeTransition(Transition tranReq)
144 {
145     auto& sysdUnit = getTarget(tranReq);
146 
147     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
148                                             SYSTEMD_INTERFACE, "StartUnit");
149 
150     method.append(sysdUnit);
151     method.append("replace");
152 
153     this->bus.call_noreply(method);
154 
155     return;
156 }
157 
158 bool Host::stateActive(const std::string& target)
159 {
160     std::variant<std::string> currentState;
161     sdbusplus::message::object_path unitTargetPath;
162 
163     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
164                                             SYSTEMD_INTERFACE, "GetUnit");
165 
166     method.append(target);
167 
168     try
169     {
170         auto result = this->bus.call(method);
171         result.read(unitTargetPath);
172     }
173     catch (const sdbusplus::exception::exception& e)
174     {
175         error("Error in GetUnit call: {ERROR}", "ERROR", e);
176         return false;
177     }
178 
179     method = this->bus.new_method_call(
180         SYSTEMD_SERVICE,
181         static_cast<const std::string&>(unitTargetPath).c_str(),
182         SYSTEMD_PROPERTY_IFACE, "Get");
183 
184     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
185 
186     try
187     {
188         auto result = this->bus.call(method);
189         result.read(currentState);
190     }
191     catch (const sdbusplus::exception::exception& e)
192     {
193         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
194         return false;
195     }
196 
197     const auto& currentStateStr = std::get<std::string>(currentState);
198     return currentStateStr == ACTIVE_STATE ||
199            currentStateStr == ACTIVATING_STATE;
200 }
201 
202 bool Host::isAutoReboot()
203 {
204     using namespace settings;
205 
206     /* The logic here is to first check the one-time AutoReboot setting.
207      * If this property is true (the default) then look at the persistent
208      * user setting in the non one-time object, otherwise honor the one-time
209      * setting and do not auto reboot.
210      */
211     auto methodOneTime = bus.new_method_call(
212         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
213         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
214     methodOneTime.append(autoRebootIntf, "AutoReboot");
215 
216     auto methodUserSetting = bus.new_method_call(
217         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
218         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
219     methodUserSetting.append(autoRebootIntf, "AutoReboot");
220 
221     try
222     {
223         auto reply = bus.call(methodOneTime);
224         std::variant<bool> result;
225         reply.read(result);
226         auto autoReboot = std::get<bool>(result);
227 
228         if (!autoReboot)
229         {
230             info("Auto reboot (one-time) disabled");
231             return false;
232         }
233         else
234         {
235             // one-time is true so read the user setting
236             reply = bus.call(methodUserSetting);
237             reply.read(result);
238             autoReboot = std::get<bool>(result);
239         }
240 
241         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
242 
243         if (autoReboot)
244         {
245             if (rebootCounterParam > 0)
246             {
247                 // Reduce BOOTCOUNT by 1
248                 info(
249                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
250                     "BOOTCOUNT", rebootCounterParam);
251                 return true;
252             }
253             else
254             {
255                 // We are at 0 so reset reboot counter and go to quiesce state
256                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
257                 attemptsLeft(BOOT_COUNT_MAX_ALLOWED);
258 
259                 // Generate log since we will now be sitting in Quiesce
260                 const std::string errorMsg =
261                     "xyz.openbmc_project.State.Error.HostQuiesce";
262                 utils::createError(this->bus, errorMsg,
263                                    sdbusplus::xyz::openbmc_project::Logging::
264                                        server::Entry::Level::Critical);
265 
266                 // Generate BMC dump to assist with debug
267                 utils::createBmcDump(this->bus);
268 
269                 return false;
270             }
271         }
272         else
273         {
274             info("Auto reboot disabled.");
275             return false;
276         }
277     }
278     catch (const sdbusplus::exception::exception& e)
279     {
280         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
281         return false;
282     }
283 }
284 
285 void Host::sysStateChangeJobRemoved(sdbusplus::message::message& msg)
286 {
287     uint32_t newStateID{};
288     sdbusplus::message::object_path newStateObjPath;
289     std::string newStateUnit{};
290     std::string newStateResult{};
291 
292     // Read the msg and populate each variable
293     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
294 
295     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
296         (newStateResult == "done") &&
297         (!stateActive(getTarget(server::Host::HostState::Running))))
298     {
299         info("Received signal that host is off");
300         this->currentHostState(server::Host::HostState::Off);
301         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
302         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
303     }
304     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
305              (newStateResult == "done") &&
306              (stateActive(getTarget(server::Host::HostState::Running))))
307     {
308         info("Received signal that host is running");
309         this->currentHostState(server::Host::HostState::Running);
310 
311         // Remove temporary file which is utilized for scenarios where the
312         // BMC is rebooted while the host is still up.
313         // This file is used to indicate to host related systemd services
314         // that the host is already running and they should skip running.
315         // Once the host state is back to running we can clear this file.
316         auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0);
317         size++; // null
318         std::unique_ptr<char[]> hostFile(new char[size]);
319         std::snprintf(hostFile.get(), size, HOST_RUNNING_FILE, 0);
320         if (std::filesystem::exists(hostFile.get()))
321         {
322             std::filesystem::remove(hostFile.get());
323         }
324     }
325     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
326              (newStateResult == "done") &&
327              (stateActive(getTarget(server::Host::HostState::Quiesced))))
328     {
329         if (Host::isAutoReboot())
330         {
331             info("Beginning reboot...");
332             Host::requestedHostTransition(server::Host::Transition::Reboot);
333         }
334         else
335         {
336             info("Maintaining quiesce");
337             this->currentHostState(server::Host::HostState::Quiesced);
338         }
339     }
340 }
341 
342 void Host::sysStateChangeJobNew(sdbusplus::message::message& msg)
343 {
344     uint32_t newStateID{};
345     sdbusplus::message::object_path newStateObjPath;
346     std::string newStateUnit{};
347 
348     // Read the msg and populate each variable
349     msg.read(newStateID, newStateObjPath, newStateUnit);
350 
351     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
352     {
353         info("Received signal that host is in diagnostice mode");
354         this->currentHostState(server::Host::HostState::DiagnosticMode);
355     }
356 }
357 
358 uint32_t Host::decrementRebootCount()
359 {
360     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
361     if (rebootCount > 0)
362     {
363         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
364     }
365     return rebootCount;
366 }
367 
368 fs::path Host::serialize()
369 {
370     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
371     std::ofstream os(path.c_str(), std::ios::binary);
372     cereal::JSONOutputArchive oarchive(os);
373     oarchive(*this);
374     return path;
375 }
376 
377 bool Host::deserialize()
378 {
379     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
380     try
381     {
382         if (fs::exists(path))
383         {
384             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
385             cereal::JSONInputArchive iarchive(is);
386             iarchive(*this);
387             return true;
388         }
389         return false;
390     }
391     catch (const cereal::Exception& e)
392     {
393         error("deserialize exception: {ERROR}", "ERROR", e);
394         fs::remove(path);
395         return false;
396     }
397 }
398 
399 Host::Transition Host::requestedHostTransition(Transition value)
400 {
401     info("Host state transition request of {REQ}", "REQ", value);
402     // If this is not a power off request then we need to
403     // decrement the reboot counter.  This code should
404     // never prevent a power on, it should just decrement
405     // the count to 0.  The quiesce handling is where the
406     // check of this count will occur
407     if (value != server::Host::Transition::Off)
408     {
409         decrementRebootCount();
410     }
411 
412     executeTransition(value);
413 
414     auto retVal = server::Host::requestedHostTransition(value);
415 
416     serialize();
417     return retVal;
418 }
419 
420 Host::ProgressStages Host::bootProgress(ProgressStages value)
421 {
422     auto retVal = bootprogress::Progress::bootProgress(value);
423     serialize();
424     return retVal;
425 }
426 
427 Host::OSStatus Host::operatingSystemState(OSStatus value)
428 {
429     auto retVal = osstatus::Status::operatingSystemState(value);
430     serialize();
431     return retVal;
432 }
433 
434 Host::HostState Host::currentHostState(HostState value)
435 {
436     info("Change to Host State: {STATE}", "STATE", value);
437     return server::Host::currentHostState(value);
438 }
439 
440 } // namespace manager
441 } // namespace state
442 } // namespace phosphor
443