1 #include "config.h"
2 
3 #include "host_state_manager.hpp"
4 
5 #include "host_check.hpp"
6 #include "utils.hpp"
7 
8 #include <fmt/format.h>
9 #include <stdio.h>
10 #include <systemd/sd-bus.h>
11 
12 #include <cereal/archives/json.hpp>
13 #include <cereal/cereal.hpp>
14 #include <cereal/types/string.hpp>
15 #include <cereal/types/tuple.hpp>
16 #include <cereal/types/vector.hpp>
17 #include <phosphor-logging/elog-errors.hpp>
18 #include <phosphor-logging/lg2.hpp>
19 #include <sdbusplus/exception.hpp>
20 #include <sdbusplus/server.hpp>
21 #include <xyz/openbmc_project/Common/error.hpp>
22 #include <xyz/openbmc_project/Control/Power/RestorePolicy/server.hpp>
23 
24 #include <filesystem>
25 #include <fstream>
26 #include <iostream>
27 #include <map>
28 #include <string>
29 
30 // Register class version with Cereal
31 CEREAL_CLASS_VERSION(phosphor::state::manager::Host, CLASS_VERSION)
32 
33 namespace phosphor
34 {
35 namespace state
36 {
37 namespace manager
38 {
39 
40 PHOSPHOR_LOG2_USING;
41 
42 // When you see server:: or reboot:: you know we're referencing our base class
43 namespace server = sdbusplus::xyz::openbmc_project::State::server;
44 namespace reboot = sdbusplus::xyz::openbmc_project::Control::Boot::server;
45 namespace bootprogress = sdbusplus::xyz::openbmc_project::State::Boot::server;
46 namespace osstatus =
47     sdbusplus::xyz::openbmc_project::State::OperatingSystem::server;
48 using namespace phosphor::logging;
49 namespace fs = std::filesystem;
50 using sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
51 
52 constexpr auto ACTIVE_STATE = "active";
53 constexpr auto ACTIVATING_STATE = "activating";
54 
55 constexpr auto SYSTEMD_SERVICE = "org.freedesktop.systemd1";
56 constexpr auto SYSTEMD_OBJ_PATH = "/org/freedesktop/systemd1";
57 constexpr auto SYSTEMD_INTERFACE = "org.freedesktop.systemd1.Manager";
58 
59 constexpr auto SYSTEMD_PROPERTY_IFACE = "org.freedesktop.DBus.Properties";
60 constexpr auto SYSTEMD_INTERFACE_UNIT = "org.freedesktop.systemd1.Unit";
61 
62 void Host::determineInitialState()
63 {
64     if (stateActive(getTarget(server::Host::HostState::Running)) ||
65         isHostRunning(id))
66     {
67         info("Initial Host State will be Running");
68         server::Host::currentHostState(HostState::Running);
69         server::Host::requestedHostTransition(Transition::On);
70     }
71     else
72     {
73         info("Initial Host State will be Off");
74         server::Host::currentHostState(HostState::Off);
75         server::Host::requestedHostTransition(Transition::Off);
76     }
77 
78     if (!deserialize())
79     {
80         // set to default value.
81         server::Host::requestedHostTransition(Transition::Off);
82     }
83     return;
84 }
85 
86 void Host::createSystemdTargetMaps()
87 {
88     stateTargetTable = {
89         {HostState::Off, fmt::format("obmc-host-stop@{}.target", id)},
90         {HostState::Running, fmt::format("obmc-host-startmin@{}.target", id)},
91         {HostState::Quiesced, fmt::format("obmc-host-quiesce@{}.target", id)},
92         {HostState::DiagnosticMode,
93          fmt::format("obmc-host-diagnostic-mode@{}.target", id)}};
94 
95     transitionTargetTable = {
96         {Transition::Off, fmt::format("obmc-host-shutdown@{}.target", id)},
97         {Transition::On, fmt::format("obmc-host-start@{}.target", id)},
98         {Transition::Reboot, fmt::format("obmc-host-reboot@{}.target", id)},
99 // Some systems do not support a warm reboot so just map the reboot
100 // requests to our normal cold reboot in that case
101 #if ENABLE_WARM_REBOOT
102         {Transition::GracefulWarmReboot,
103          fmt::format("obmc-host-warm-reboot@{}.target", id)},
104         {Transition::ForceWarmReboot,
105          fmt::format("obmc-host-force-warm-reboot@{}.target", id)}
106     };
107 #else
108         {Transition::GracefulWarmReboot,
109          fmt::format("obmc-host-reboot@{}.target", id)},
110         {Transition::ForceWarmReboot,
111          fmt::format("obmc-host-reboot@{}.target", id)}
112     };
113 #endif
114     hostCrashTarget = fmt::format("obmc-host-crash@{}.target", id);
115 }
116 
117 const std::string& Host::getTarget(HostState state)
118 {
119     return stateTargetTable[state];
120 };
121 
122 const std::string& Host::getTarget(Transition tranReq)
123 {
124     return transitionTargetTable[tranReq];
125 };
126 
127 void Host::executeTransition(Transition tranReq)
128 {
129     auto& sysdUnit = getTarget(tranReq);
130 
131     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
132                                             SYSTEMD_INTERFACE, "StartUnit");
133 
134     method.append(sysdUnit);
135     method.append("replace");
136 
137     this->bus.call_noreply(method);
138 
139     return;
140 }
141 
142 bool Host::stateActive(const std::string& target)
143 {
144     std::variant<std::string> currentState;
145     sdbusplus::message::object_path unitTargetPath;
146 
147     auto method = this->bus.new_method_call(SYSTEMD_SERVICE, SYSTEMD_OBJ_PATH,
148                                             SYSTEMD_INTERFACE, "GetUnit");
149 
150     method.append(target);
151 
152     try
153     {
154         auto result = this->bus.call(method);
155         result.read(unitTargetPath);
156     }
157     catch (const sdbusplus::exception_t& e)
158     {
159         error("Error in GetUnit call: {ERROR}", "ERROR", e);
160         return false;
161     }
162 
163     method = this->bus.new_method_call(
164         SYSTEMD_SERVICE,
165         static_cast<const std::string&>(unitTargetPath).c_str(),
166         SYSTEMD_PROPERTY_IFACE, "Get");
167 
168     method.append(SYSTEMD_INTERFACE_UNIT, "ActiveState");
169 
170     try
171     {
172         auto result = this->bus.call(method);
173         result.read(currentState);
174     }
175     catch (const sdbusplus::exception_t& e)
176     {
177         error("Error in ActiveState Get: {ERROR}", "ERROR", e);
178         return false;
179     }
180 
181     const auto& currentStateStr = std::get<std::string>(currentState);
182     return currentStateStr == ACTIVE_STATE ||
183            currentStateStr == ACTIVATING_STATE;
184 }
185 
186 bool Host::isAutoReboot()
187 {
188     using namespace settings;
189 
190     /* The logic here is to first check the one-time AutoReboot setting.
191      * If this property is true (the default) then look at the persistent
192      * user setting in the non one-time object, otherwise honor the one-time
193      * setting and do not auto reboot.
194      */
195     auto methodOneTime = bus.new_method_call(
196         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
197         settings.autoRebootOneTime.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
198     methodOneTime.append(autoRebootIntf, "AutoReboot");
199 
200     auto methodUserSetting = bus.new_method_call(
201         settings.service(settings.autoReboot, autoRebootIntf).c_str(),
202         settings.autoReboot.c_str(), SYSTEMD_PROPERTY_IFACE, "Get");
203     methodUserSetting.append(autoRebootIntf, "AutoReboot");
204 
205     try
206     {
207         auto reply = bus.call(methodOneTime);
208         std::variant<bool> result;
209         reply.read(result);
210         auto autoReboot = std::get<bool>(result);
211 
212         if (!autoReboot)
213         {
214             info("Auto reboot (one-time) disabled");
215             return false;
216         }
217         else
218         {
219             // one-time is true so read the user setting
220             reply = bus.call(methodUserSetting);
221             reply.read(result);
222             autoReboot = std::get<bool>(result);
223         }
224 
225         auto rebootCounterParam = reboot::RebootAttempts::attemptsLeft();
226 
227         if (autoReboot)
228         {
229             if (rebootCounterParam > 0)
230             {
231                 // Reduce BOOTCOUNT by 1
232                 info(
233                     "Auto reboot enabled and boot count at {BOOTCOUNT}, rebooting",
234                     "BOOTCOUNT", rebootCounterParam);
235                 return true;
236             }
237             else
238             {
239                 // We are at 0 so reset reboot counter and go to quiesce state
240                 info("Auto reboot enabled but HOST BOOTCOUNT already set to 0");
241                 attemptsLeft(reboot::RebootAttempts::retryAttempts());
242 
243                 // Generate log since we will now be sitting in Quiesce
244                 const std::string errorMsg =
245                     "xyz.openbmc_project.State.Error.HostQuiesce";
246                 utils::createError(this->bus, errorMsg,
247                                    sdbusplus::xyz::openbmc_project::Logging::
248                                        server::Entry::Level::Critical);
249 
250                 // Generate BMC dump to assist with debug
251                 utils::createBmcDump(this->bus);
252 
253                 return false;
254             }
255         }
256         else
257         {
258             info("Auto reboot disabled.");
259             return false;
260         }
261     }
262     catch (const sdbusplus::exception_t& e)
263     {
264         error("Error in AutoReboot Get, {ERROR}", "ERROR", e);
265         return false;
266     }
267 }
268 
269 void Host::sysStateChangeJobRemoved(sdbusplus::message_t& msg)
270 {
271     uint32_t newStateID{};
272     sdbusplus::message::object_path newStateObjPath;
273     std::string newStateUnit{};
274     std::string newStateResult{};
275 
276     // Read the msg and populate each variable
277     msg.read(newStateID, newStateObjPath, newStateUnit, newStateResult);
278 
279     if ((newStateUnit == getTarget(server::Host::HostState::Off)) &&
280         (newStateResult == "done") &&
281         (!stateActive(getTarget(server::Host::HostState::Running))))
282     {
283         info("Received signal that host is off");
284         this->currentHostState(server::Host::HostState::Off);
285         this->bootProgress(bootprogress::Progress::ProgressStages::Unspecified);
286         this->operatingSystemState(osstatus::Status::OSStatus::Inactive);
287     }
288     else if ((newStateUnit == getTarget(server::Host::HostState::Running)) &&
289              (newStateResult == "done") &&
290              (stateActive(getTarget(server::Host::HostState::Running))))
291     {
292         info("Received signal that host is running");
293         this->currentHostState(server::Host::HostState::Running);
294 
295         // Remove temporary file which is utilized for scenarios where the
296         // BMC is rebooted while the host is still up.
297         // This file is used to indicate to host related systemd services
298         // that the host is already running and they should skip running.
299         // Once the host state is back to running we can clear this file.
300         auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0);
301         size++; // null
302         std::unique_ptr<char[]> hostFile(new char[size]);
303         std::snprintf(hostFile.get(), size, HOST_RUNNING_FILE, 0);
304         if (std::filesystem::exists(hostFile.get()))
305         {
306             std::filesystem::remove(hostFile.get());
307         }
308     }
309     else if ((newStateUnit == getTarget(server::Host::HostState::Quiesced)) &&
310              (newStateResult == "done") &&
311              (stateActive(getTarget(server::Host::HostState::Quiesced))))
312     {
313         if (Host::isAutoReboot())
314         {
315             info("Beginning reboot...");
316             Host::requestedHostTransition(server::Host::Transition::Reboot);
317         }
318         else
319         {
320             info("Maintaining quiesce");
321             this->currentHostState(server::Host::HostState::Quiesced);
322         }
323     }
324 }
325 
326 void Host::sysStateChangeJobNew(sdbusplus::message_t& msg)
327 {
328     uint32_t newStateID{};
329     sdbusplus::message::object_path newStateObjPath;
330     std::string newStateUnit{};
331 
332     // Read the msg and populate each variable
333     msg.read(newStateID, newStateObjPath, newStateUnit);
334 
335     if (newStateUnit == getTarget(server::Host::HostState::DiagnosticMode))
336     {
337         info("Received signal that host is in diagnostice mode");
338         this->currentHostState(server::Host::HostState::DiagnosticMode);
339     }
340     else if ((newStateUnit == hostCrashTarget) &&
341              (server::Host::currentHostState() ==
342               server::Host::HostState::Running))
343     {
344         // Only decrease the boot count if host was running when the host crash
345         // target was started. Systemd will sometimes trigger multiple
346         // JobNew events for the same target. This seems to be related to
347         // how OpenBMC utilizes the targets in the reboot scenario
348         info("Received signal that host has crashed, decrement reboot count");
349 
350         // A host crash can cause a reboot of the host so decrement the reboot
351         // count
352         decrementRebootCount();
353     }
354 }
355 
356 uint32_t Host::decrementRebootCount()
357 {
358     auto rebootCount = reboot::RebootAttempts::attemptsLeft();
359     if (rebootCount > 0)
360     {
361         return (reboot::RebootAttempts::attemptsLeft(rebootCount - 1));
362     }
363     return rebootCount;
364 }
365 
366 fs::path Host::serialize()
367 {
368     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
369     std::ofstream os(path.c_str(), std::ios::binary);
370     cereal::JSONOutputArchive oarchive(os);
371     oarchive(*this);
372     return path;
373 }
374 
375 bool Host::deserialize()
376 {
377     fs::path path{fmt::format(HOST_STATE_PERSIST_PATH, id)};
378     try
379     {
380         if (fs::exists(path))
381         {
382             std::ifstream is(path.c_str(), std::ios::in | std::ios::binary);
383             cereal::JSONInputArchive iarchive(is);
384             iarchive(*this);
385             return true;
386         }
387         return false;
388     }
389     catch (const cereal::Exception& e)
390     {
391         error("deserialize exception: {ERROR}", "ERROR", e);
392         fs::remove(path);
393         return false;
394     }
395 }
396 
397 Host::Transition Host::requestedHostTransition(Transition value)
398 {
399     info("Host state transition request of {REQ}", "REQ", value);
400     // If this is not a power off request then we need to
401     // decrement the reboot counter.  This code should
402     // never prevent a power on, it should just decrement
403     // the count to 0.  The quiesce handling is where the
404     // check of this count will occur
405     if (value != server::Host::Transition::Off)
406     {
407         decrementRebootCount();
408     }
409 
410     executeTransition(value);
411 
412     auto retVal = server::Host::requestedHostTransition(value);
413 
414     serialize();
415     return retVal;
416 }
417 
418 Host::ProgressStages Host::bootProgress(ProgressStages value)
419 {
420     auto retVal = bootprogress::Progress::bootProgress(value);
421     serialize();
422     return retVal;
423 }
424 
425 Host::OSStatus Host::operatingSystemState(OSStatus value)
426 {
427     auto retVal = osstatus::Status::operatingSystemState(value);
428     serialize();
429     return retVal;
430 }
431 
432 Host::HostState Host::currentHostState(HostState value)
433 {
434     info("Change to Host State: {STATE}", "STATE", value);
435     return server::Host::currentHostState(value);
436 }
437 
438 } // namespace manager
439 } // namespace state
440 } // namespace phosphor
441