1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <fmt/format.h>
9 #include <stdio.h>
10 #include <systemd/sd-bus.h>
11 
12 #include <cereal/archives/json.hpp>
13 #include <cereal/cereal.hpp>
14 #include <cereal/types/string.hpp>
15 #include <cereal/types/tuple.hpp>
16 #include <cereal/types/vector.hpp>
17 #include <phosphor-logging/elog-errors.hpp>
18 #include <phosphor-logging/lg2.hpp>
19 #include <sdbusplus/exception.hpp>
20 #include <sdbusplus/server.hpp>
21 #include <xyz/openbmc_project/Common/error.hpp>
22 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
23 
24 #include <filesystem>
25 #include <fstream>
26 #include <iostream>
27 #include <map>
28 #include <string>
29 
30 // Register class version with Cereal
31 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
32 
33 namespace phosphor
34 {
35 namespace state
36 {
37 namespace manager
38 {
39 
40 PHOSPHOR_LOG2_USING;
41 
42 // When you see server:: or reboot:: you know we're referencing our base class
43 namespace server = sdbusplus::xyz::openbmc_project::State::server;
44 namespace reboot = sdbusplus::xyz::openbmc_project::Control::Boot::server;
45 namespace bootprogress = sdbusplus::xyz::openbmc_project::State::Boot::server;
46 namespace osstatus =
47     sdbusplus::xyz::openbmc_project::State::OperatingSystem::server;
48 using namespace phosphor::logging;
49 namespace fs = std::filesystem;
50 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
51 
52 constexpr auto ACTIVE_STATE = "active";
53 constexpr auto ACTIVATING_STATE = "activating";
54 
55 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
56 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
57 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
58 
59 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
60 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
61 
62 void Host::subscribeToSystemdSignals()
63 {
64     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
65                                             SYSTEMD_INTERFACE, "Subscribe");
66     try
67     {
68         this->bus.call_noreply(method);
69     }
70     catch (const sdbusplus::exception_t& e)
71     {
72         error("Failed to subscribe to systemd signals: {ERROR}", "ERROR", e);
73         elog<InternalFailure>();
74     }
75     return;
76 }
77 
78 void Host::determineInitialState()
79 {
80     if (stateActive(getTarget(server::Host::HostState::Running)) ||
81         isHostRunning(id))
82     {
83         info("Initial Host State will be Running");
84         server::Host::currentHostState(HostState::Running);
85         server::Host::requestedHostTransition(Transition::On);
86     }
87     else
88     {
89         info("Initial Host State will be Off");
90         server::Host::currentHostState(HostState::Off);
91         server::Host::requestedHostTransition(Transition::Off);
92     }
93 
94     if (!deserialize())
95     {
96         // set to default value.
97         server::Host::requestedHostTransition(Transition::Off);
98     }
99     return;
100 }
101 
102 void Host::createSystemdTargetMaps()
103 {
104     stateTargetTable = {
105         {HostState::Off, fmt::format("obmc-host-stop@{}.target", id)},
106         {HostState::Running, fmt::format("obmc-host-startmin@{}.target", id)},
107         {HostState::Quiesced, fmt::format("obmc-host-quiesce@{}.target", id)},
108         {HostState::DiagnosticMode,
109          fmt::format("obmc-host-diagnostic-mode@{}.target", id)}};
110 
111     transitionTargetTable = {
112         {Transition::Off, fmt::format("obmc-host-shutdown@{}.target", id)},
113         {Transition::On, fmt::format("obmc-host-start@{}.target", id)},
114         {Transition::Reboot, fmt::format("obmc-host-reboot@{}.target", id)},
115 // Some systems do not support a warm reboot so just map the reboot
116 // requests to our normal cold reboot in that case
117 #if ENABLE_WARM_REBOOT
118         {Transition::GracefulWarmReboot,
119          fmt::format("obmc-host-warm-reboot@{}.target", id)},
120         {Transition::ForceWarmReboot,
121          fmt::format("obmc-host-force-warm-reboot@{}.target", id)}
122     };
123 #else
124         {Transition::GracefulWarmReboot,
125          fmt::format("obmc-host-reboot@{}.target", id)},
126         {Transition::ForceWarmReboot,
127          fmt::format("obmc-host-reboot@{}.target", id)}
128     };
129 #endif
130     hostCrashTarget = fmt::format("obmc-host-crash@{}.target", id);
131 }
132 
133 const std::string& Host::getTarget(HostState state)
134 {
135     return stateTargetTable[state];
136 };
137 
138 const std::string& Host::getTarget(Transition tranReq)
139 {
140     return transitionTargetTable[tranReq];
141 };
142 
143 void Host::executeTransition(Transition tranReq)
144 {
145     auto& sysdUnit = getTarget(tranReq);
146 
147     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
148                                             SYSTEMD_INTERFACE, "StartUnit");
149 
150     method.append(sysdUnit);
151     method.append("replace");
152 
153     this->bus.call_noreply(method);
154 
155     return;
156 }
157 
158 bool Host::stateActive(const std::string& target)
159 {
160     std::variant<std::string> currentState;
161     sdbusplus::message::object_path unitTargetPath;
162 
163     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
164                                             SYSTEMD_INTERFACE, "GetUnit");
165 
166     method.append(target);
167 
168     try
169     {
170         auto result = this->bus.call(method);
171         result.read(unitTargetPath);
172     }
173     catch (const sdbusplus::exception_t& e)
174     {
175         error("Error in GetUnit call: {ERROR}", "ERROR", e);
176         return false;
177     }
178 
179     method = this->bus.new_method_call(
180         SYSTEMD_SERVICE,
181         static_cast<const std::string&>(unitTargetPath).c_str(),
182         SYSTEMD_PROPERTY_IFACE, "Get");
183 
184     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
185 
186     try
187     {
188         auto result = this->bus.call(method);
189         result.read(currentState);
190     }
191     catch (const sdbusplus::exception_t& e)
192     {
193         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
194         return false;
195     }
196 
197     const auto& currentStateStr = std::get<std::string>(currentState);
198     return currentStateStr == ACTIVE_STATE ||
199            currentStateStr == ACTIVATING_STATE;
200 }
201 
202 bool Host::isAutoReboot()
203 {
204     using namespace settings;
205 
206     /* The logic here is to first check the one-time AutoReboot setting.
207      * If this property is true (the default) then look at the persistent
208      * user setting in the non one-time object, otherwise honor the one-time
209      * setting and do not auto reboot.
210      */
211     auto methodOneTime = bus.new_method_call(
212         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
213         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
214     methodOneTime.append(autoRebootIntf, "AutoReboot");
215 
216     auto methodUserSetting = bus.new_method_call(
217         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
218         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
219     methodUserSetting.append(autoRebootIntf, "AutoReboot");
220 
221     try
222     {
223         auto reply = bus.call(methodOneTime);
224         std::variant<bool> result;
225         reply.read(result);
226         auto autoReboot = std::get<bool>(result);
227 
228         if (!autoReboot)
229         {
230             info("Auto reboot (one-time) disabled");
231             return false;
232         }
233         else
234         {
235             // one-time is true so read the user setting
236             reply = bus.call(methodUserSetting);
237             reply.read(result);
238             autoReboot = std::get<bool>(result);
239         }
240 
241         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
242 
243         if (autoReboot)
244         {
245             if (rebootCounterParam > 0)
246             {
247                 // Reduce BOOTCOUNT by 1
248                 info(
249                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
250                     "BOOTCOUNT", rebootCounterParam);
251                 return true;
252             }
253             else
254             {
255                 // We are at 0 so reset reboot counter and go to quiesce state
256                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
257                 attemptsLeft(reboot::RebootAttempts::retryAttempts());
258 
259                 // Generate log since we will now be sitting in Quiesce
260                 const std::string errorMsg =
261                     "xyz.openbmc_project.State.Error.HostQuiesce";
262                 utils::createError(this->bus, errorMsg,
263                                    sdbusplus::xyz::openbmc_project::Logging::
264                                        server::Entry::Level::Critical);
265 
266                 // Generate BMC dump to assist with debug
267                 utils::createBmcDump(this->bus);
268 
269                 return false;
270             }
271         }
272         else
273         {
274             info("Auto reboot disabled.");
275             return false;
276         }
277     }
278     catch (const sdbusplus::exception_t& e)
279     {
280         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
281         return false;
282     }
283 }
284 
285 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
286 {
287     uint32_t newStateID{};
288     sdbusplus::message::object_path newStateObjPath;
289     std::string newStateUnit{};
290     std::string newStateResult{};
291 
292     // Read the msg and populate each variable
293     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
294 
295     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
296         (newStateResult == "done") &&
297         (!stateActive(getTarget(server::Host::HostState::Running))))
298     {
299         info("Received signal that host is off");
300         this->currentHostState(server::Host::HostState::Off);
301         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
302         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
303     }
304     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
305              (newStateResult == "done") &&
306              (stateActive(getTarget(server::Host::HostState::Running))))
307     {
308         info("Received signal that host is running");
309         this->currentHostState(server::Host::HostState::Running);
310 
311         // Remove temporary file which is utilized for scenarios where the
312         // BMC is rebooted while the host is still up.
313         // This file is used to indicate to host related systemd services
314         // that the host is already running and they should skip running.
315         // Once the host state is back to running we can clear this file.
316         auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0);
317         size++; // null
318         std::unique_ptr<char[]> hostFile(new char[size]);
319         std::snprintf(hostFile.get(), size, HOST_RUNNING_FILE, 0);
320         if (std::filesystem::exists(hostFile.get()))
321         {
322             std::filesystem::remove(hostFile.get());
323         }
324     }
325     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
326              (newStateResult == "done") &&
327              (stateActive(getTarget(server::Host::HostState::Quiesced))))
328     {
329         if (Host::isAutoReboot())
330         {
331             info("Beginning reboot...");
332             Host::requestedHostTransition(server::Host::Transition::Reboot);
333         }
334         else
335         {
336             info("Maintaining quiesce");
337             this->currentHostState(server::Host::HostState::Quiesced);
338         }
339     }
340 }
341 
342 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
343 {
344     uint32_t newStateID{};
345     sdbusplus::message::object_path newStateObjPath;
346     std::string newStateUnit{};
347 
348     // Read the msg and populate each variable
349     msg.read(newStateID, newStateObjPath, newStateUnit);
350 
351     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
352     {
353         info("Received signal that host is in diagnostice mode");
354         this->currentHostState(server::Host::HostState::DiagnosticMode);
355     }
356     else if ((newStateUnit == hostCrashTarget) &&
357              (server::Host::currentHostState() ==
358               server::Host::HostState::Running))
359     {
360         // Only decrease the boot count if host was running when the host crash
361         // target was started. Systemd will sometimes trigger multiple
362         // JobNew events for the same target. This seems to be related to
363         // how OpenBMC utilizes the targets in the reboot scenario
364         info("Received signal that host has crashed, decrement reboot count");
365 
366         // A host crash can cause a reboot of the host so decrement the reboot
367         // count
368         decrementRebootCount();
369     }
370 }
371 
372 uint32_t Host::decrementRebootCount()
373 {
374     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
375     if (rebootCount > 0)
376     {
377         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
378     }
379     return rebootCount;
380 }
381 
382 fs::path Host::serialize()
383 {
384     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
385     std::ofstream os(path.c_str(), std::ios::binary);
386     cereal::JSONOutputArchive oarchive(os);
387     oarchive(*this);
388     return path;
389 }
390 
391 bool Host::deserialize()
392 {
393     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
394     try
395     {
396         if (fs::exists(path))
397         {
398             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
399             cereal::JSONInputArchive iarchive(is);
400             iarchive(*this);
401             return true;
402         }
403         return false;
404     }
405     catch (const cereal::Exception& e)
406     {
407         error("deserialize exception: {ERROR}", "ERROR", e);
408         fs::remove(path);
409         return false;
410     }
411 }
412 
413 Host::Transition Host::requestedHostTransition(Transition value)
414 {
415     info("Host state transition request of {REQ}", "REQ", value);
416     // If this is not a power off request then we need to
417     // decrement the reboot counter.  This code should
418     // never prevent a power on, it should just decrement
419     // the count to 0.  The quiesce handling is where the
420     // check of this count will occur
421     if (value != server::Host::Transition::Off)
422     {
423         decrementRebootCount();
424     }
425 
426     executeTransition(value);
427 
428     auto retVal = server::Host::requestedHostTransition(value);
429 
430     serialize();
431     return retVal;
432 }
433 
434 Host::ProgressStages Host::bootProgress(ProgressStages value)
435 {
436     auto retVal = bootprogress::Progress::bootProgress(value);
437     serialize();
438     return retVal;
439 }
440 
441 Host::OSStatus Host::operatingSystemState(OSStatus value)
442 {
443     auto retVal = osstatus::Status::operatingSystemState(value);
444     serialize();
445     return retVal;
446 }
447 
448 Host::HostState Host::currentHostState(HostState value)
449 {
450     info("Change to Host State: {STATE}", "STATE", value);
451     return server::Host::currentHostState(value);
452 }
453 
454 } // namespace manager
455 } // namespace state
456 } // namespace phosphor
457