1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <fmt/format.h>
9 #include <stdio.h>
10 #include <systemd/sd-bus.h>
11 
12 #include <cereal/archives/json.hpp>
13 #include <cereal/cereal.hpp>
14 #include <cereal/types/string.hpp>
15 #include <cereal/types/tuple.hpp>
16 #include <cereal/types/vector.hpp>
17 #include <phosphor-logging/elog-errors.hpp>
18 #include <phosphor-logging/lg2.hpp>
19 #include <sdbusplus/exception.hpp>
20 #include <sdbusplus/server.hpp>
21 #include <xyz/openbmc_project/Common/error.hpp>
22 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
23 
24 #include <filesystem>
25 #include <fstream>
26 #include <iostream>
27 #include <map>
28 #include <string>
29 
30 // Register class version with Cereal
31 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
32 
33 namespace phosphor
34 {
35 namespace state
36 {
37 namespace manager
38 {
39 
40 PHOSPHOR_LOG2_USING;
41 
42 // When you see server:: or reboot:: you know we're referencing our base class
43 namespace server = sdbusplus::xyz::openbmc_project::State::server;
44 namespace reboot = sdbusplus::xyz::openbmc_project::Control::Boot::server;
45 namespace bootprogress = sdbusplus::xyz::openbmc_project::State::Boot::server;
46 namespace osstatus =
47     sdbusplus::xyz::openbmc_project::State::OperatingSystem::server;
48 using namespace phosphor::logging;
49 namespace fs = std::filesystem;
50 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
51 
52 constexpr auto ACTIVE_STATE = "active";
53 constexpr auto ACTIVATING_STATE = "activating";
54 
55 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
56 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
57 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
58 
59 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
60 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
61 
62 void Host::subscribeToSystemdSignals()
63 {
64     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
65                                             SYSTEMD_INTERFACE, "Subscribe");
66     try
67     {
68         this->bus.call_noreply(method);
69     }
70     catch (const sdbusplus::exception::exception& e)
71     {
72         error("Failed to subscribe to systemd signals: {ERROR}", "ERROR", e);
73         elog<InternalFailure>();
74     }
75     return;
76 }
77 
78 void Host::determineInitialState()
79 {
80     if (stateActive(getTarget(server::Host::HostState::Running)) ||
81         isHostRunning(id))
82     {
83         info("Initial Host State will be Running");
84         server::Host::currentHostState(HostState::Running);
85         server::Host::requestedHostTransition(Transition::On);
86     }
87     else
88     {
89         info("Initial Host State will be Off");
90         server::Host::currentHostState(HostState::Off);
91         server::Host::requestedHostTransition(Transition::Off);
92     }
93 
94     if (!deserialize())
95     {
96         // set to default value.
97         server::Host::requestedHostTransition(Transition::Off);
98     }
99     return;
100 }
101 
102 void Host::createSystemdTargetMaps()
103 {
104     stateTargetTable = {
105         {HostState::Off, fmt::format("obmc-host-stop@{}.target", id)},
106         {HostState::Running, fmt::format("obmc-host-startmin@{}.target", id)},
107         {HostState::Quiesced, fmt::format("obmc-host-quiesce@{}.target", id)},
108         {HostState::DiagnosticMode,
109          fmt::format("obmc-host-diagnostic-mode@{}.target", id)}};
110 
111     transitionTargetTable = {
112         {Transition::Off, fmt::format("obmc-host-shutdown@{}.target", id)},
113         {Transition::On, fmt::format("obmc-host-start@{}.target", id)},
114         {Transition::Reboot, fmt::format("obmc-host-reboot@{}.target", id)},
115 // Some systems do not support a warm reboot so just map the reboot
116 // requests to our normal cold reboot in that case
117 #if ENABLE_WARM_REBOOT
118         {Transition::GracefulWarmReboot,
119          fmt::format("obmc-host-warm-reboot@{}.target", id)},
120         {Transition::ForceWarmReboot,
121          fmt::format("obmc-host-force-warm-reboot@{}.target", id)}
122     };
123 #else
124         {Transition::GracefulWarmReboot,
125          fmt::format("obmc-host-reboot@{}.target", id)},
126         {Transition::ForceWarmReboot,
127          fmt::format("obmc-host-reboot@{}.target", id)}
128     };
129 #endif
130 }
131 
132 const std::string& Host::getTarget(HostState state)
133 {
134     return stateTargetTable[state];
135 };
136 
137 const std::string& Host::getTarget(Transition tranReq)
138 {
139     return transitionTargetTable[tranReq];
140 };
141 
142 void Host::executeTransition(Transition tranReq)
143 {
144     auto& sysdUnit = getTarget(tranReq);
145 
146     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
147                                             SYSTEMD_INTERFACE, "StartUnit");
148 
149     method.append(sysdUnit);
150     method.append("replace");
151 
152     this->bus.call_noreply(method);
153 
154     return;
155 }
156 
157 bool Host::stateActive(const std::string& target)
158 {
159     std::variant<std::string> currentState;
160     sdbusplus::message::object_path unitTargetPath;
161 
162     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
163                                             SYSTEMD_INTERFACE, "GetUnit");
164 
165     method.append(target);
166 
167     try
168     {
169         auto result = this->bus.call(method);
170         result.read(unitTargetPath);
171     }
172     catch (const sdbusplus::exception::exception& e)
173     {
174         error("Error in GetUnit call: {ERROR}", "ERROR", e);
175         return false;
176     }
177 
178     method = this->bus.new_method_call(
179         SYSTEMD_SERVICE,
180         static_cast<const std::string&>(unitTargetPath).c_str(),
181         SYSTEMD_PROPERTY_IFACE, "Get");
182 
183     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
184 
185     try
186     {
187         auto result = this->bus.call(method);
188         result.read(currentState);
189     }
190     catch (const sdbusplus::exception::exception& e)
191     {
192         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
193         return false;
194     }
195 
196     const auto& currentStateStr = std::get<std::string>(currentState);
197     return currentStateStr == ACTIVE_STATE ||
198            currentStateStr == ACTIVATING_STATE;
199 }
200 
201 bool Host::isAutoReboot()
202 {
203     using namespace settings;
204 
205     /* The logic here is to first check the one-time AutoReboot setting.
206      * If this property is true (the default) then look at the persistent
207      * user setting in the non one-time object, otherwise honor the one-time
208      * setting and do not auto reboot.
209      */
210     auto methodOneTime = bus.new_method_call(
211         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
212         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
213     methodOneTime.append(autoRebootIntf, "AutoReboot");
214 
215     auto methodUserSetting = bus.new_method_call(
216         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
217         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
218     methodUserSetting.append(autoRebootIntf, "AutoReboot");
219 
220     try
221     {
222         auto reply = bus.call(methodOneTime);
223         std::variant<bool> result;
224         reply.read(result);
225         auto autoReboot = std::get<bool>(result);
226 
227         if (!autoReboot)
228         {
229             info("Auto reboot (one-time) disabled");
230             return false;
231         }
232         else
233         {
234             // one-time is true so read the user setting
235             reply = bus.call(methodUserSetting);
236             reply.read(result);
237             autoReboot = std::get<bool>(result);
238         }
239 
240         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
241 
242         if (autoReboot)
243         {
244             if (rebootCounterParam > 0)
245             {
246                 // Reduce BOOTCOUNT by 1
247                 info(
248                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
249                     "BOOTCOUNT", rebootCounterParam);
250                 return true;
251             }
252             else
253             {
254                 // We are at 0 so reset reboot counter and go to quiesce state
255                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
256                 attemptsLeft(BOOT_COUNT_MAX_ALLOWED);
257 
258                 // Generate log since we will now be sitting in Quiesce
259                 const std::string errorMsg =
260                     "xyz.openbmc_project.State.Error.HostQuiesce";
261                 utils::createError(this->bus, errorMsg,
262                                    sdbusplus::xyz::openbmc_project::Logging::
263                                        server::Entry::Level::Critical);
264 
265                 // Generate BMC dump to assist with debug
266                 utils::createBmcDump(this->bus);
267 
268                 return false;
269             }
270         }
271         else
272         {
273             info("Auto reboot disabled.");
274             return false;
275         }
276     }
277     catch (const sdbusplus::exception::exception& e)
278     {
279         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
280         return false;
281     }
282 }
283 
284 void Host::sysStateChangeJobRemoved(sdbusplus::message::message& msg)
285 {
286     uint32_t newStateID{};
287     sdbusplus::message::object_path newStateObjPath;
288     std::string newStateUnit{};
289     std::string newStateResult{};
290 
291     // Read the msg and populate each variable
292     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
293 
294     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
295         (newStateResult == "done") &&
296         (!stateActive(getTarget(server::Host::HostState::Running))))
297     {
298         info("Received signal that host is off");
299         this->currentHostState(server::Host::HostState::Off);
300         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
301         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
302     }
303     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
304              (newStateResult == "done") &&
305              (stateActive(getTarget(server::Host::HostState::Running))))
306     {
307         info("Received signal that host is running");
308         this->currentHostState(server::Host::HostState::Running);
309 
310         // Remove temporary file which is utilized for scenarios where the
311         // BMC is rebooted while the host is still up.
312         // This file is used to indicate to host related systemd services
313         // that the host is already running and they should skip running.
314         // Once the host state is back to running we can clear this file.
315         auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0);
316         size++; // null
317         std::unique_ptr<char[]> hostFile(new char[size]);
318         std::snprintf(hostFile.get(), size, HOST_RUNNING_FILE, 0);
319         if (std::filesystem::exists(hostFile.get()))
320         {
321             std::filesystem::remove(hostFile.get());
322         }
323     }
324     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
325              (newStateResult == "done") &&
326              (stateActive(getTarget(server::Host::HostState::Quiesced))))
327     {
328         if (Host::isAutoReboot())
329         {
330             info("Beginning reboot...");
331             Host::requestedHostTransition(server::Host::Transition::Reboot);
332         }
333         else
334         {
335             info("Maintaining quiesce");
336             this->currentHostState(server::Host::HostState::Quiesced);
337         }
338     }
339 }
340 
341 void Host::sysStateChangeJobNew(sdbusplus::message::message& msg)
342 {
343     uint32_t newStateID{};
344     sdbusplus::message::object_path newStateObjPath;
345     std::string newStateUnit{};
346 
347     // Read the msg and populate each variable
348     msg.read(newStateID, newStateObjPath, newStateUnit);
349 
350     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
351     {
352         info("Received signal that host is in diagnostice mode");
353         this->currentHostState(server::Host::HostState::DiagnosticMode);
354     }
355 }
356 
357 uint32_t Host::decrementRebootCount()
358 {
359     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
360     if (rebootCount > 0)
361     {
362         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
363     }
364     return rebootCount;
365 }
366 
367 fs::path Host::serialize()
368 {
369     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
370     std::ofstream os(path.c_str(), std::ios::binary);
371     cereal::JSONOutputArchive oarchive(os);
372     oarchive(*this);
373     return path;
374 }
375 
376 bool Host::deserialize()
377 {
378     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
379     try
380     {
381         if (fs::exists(path))
382         {
383             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
384             cereal::JSONInputArchive iarchive(is);
385             iarchive(*this);
386             return true;
387         }
388         return false;
389     }
390     catch (const cereal::Exception& e)
391     {
392         error("deserialize exception: {ERROR}", "ERROR", e);
393         fs::remove(path);
394         return false;
395     }
396 }
397 
398 Host::Transition Host::requestedHostTransition(Transition value)
399 {
400     info("Host state transition request of {REQ}", "REQ", value);
401     // If this is not a power off request then we need to
402     // decrement the reboot counter.  This code should
403     // never prevent a power on, it should just decrement
404     // the count to 0.  The quiesce handling is where the
405     // check of this count will occur
406     if (value != server::Host::Transition::Off)
407     {
408         decrementRebootCount();
409     }
410 
411     executeTransition(value);
412 
413     auto retVal = server::Host::requestedHostTransition(value);
414 
415     serialize();
416     return retVal;
417 }
418 
419 Host::ProgressStages Host::bootProgress(ProgressStages value)
420 {
421     auto retVal = bootprogress::Progress::bootProgress(value);
422     serialize();
423     return retVal;
424 }
425 
426 Host::OSStatus Host::operatingSystemState(OSStatus value)
427 {
428     auto retVal = osstatus::Status::operatingSystemState(value);
429     serialize();
430     return retVal;
431 }
432 
433 Host::HostState Host::currentHostState(HostState value)
434 {
435     info("Change to Host State: {STATE}", "STATE", value);
436     return server::Host::currentHostState(value);
437 }
438 
439 } // namespace manager
440 } // namespace state
441 } // namespace phosphor
442