xref: /openbmc/openpower-proc-control/procedures/phal/check_host_running.cpp (revision 25e39c84f7448f39b34e3e28141c3673b36b1c1b)
1 extern "C"
2 {
3 #include "libpdbg.h"
4 }
5 
6 #include "extensions/phal/common_utils.hpp"
7 #include "extensions/phal/create_pel.hpp"
8 #include "p10_cfam.hpp"
9 #include "registration.hpp"
10 
11 #include <phosphor-logging/log.hpp>
12 
13 #include <cstdio>
14 #include <fstream>
15 #include <memory>
16 
17 namespace openpower
18 {
19 namespace phal
20 {
21 
22 using namespace openpower::cfam::p10;
23 using namespace phosphor::logging;
24 
25 /**
26  * This is the backup plan to ensuring the host is not running before the
27  * BMC issues a power off to the system. Prior to this procedure being called,
28  * the BMC has tried all other communication mechanisms to talk with the host
29  * and they have failed. The design is that the host firmware will write the
30  * value 0xA5000001 to Mailbox scratch register 12 when they are up and running
31  * to a point where communication to the BMC is no longer required to function.
32  * On a power off or shutdown this register is cleared by the host and BMC
33  * firmware. If the BMC sees the 0xA5000001 pattern in the scratch register
34  * then it assumes the host is running and will leave power on to the system.
35  */
36 void checkHostRunning()
37 {
38     struct pdbg_target* procTarget;
39 
40     try
41     {
42         phal_init();
43     }
44     catch (std::exception& ex)
45     {
46         // This should "never" happen so just throw the exception and let
47         // our systemd error handling process this
48         log<level::ERR>("Exception raised during init PHAL",
49                         entry("EXCEPTION=%s", ex.what()));
50         throw std::runtime_error("PHAL initialization failed");
51     }
52 
53     pdbg_for_each_class_target("proc", procTarget)
54     {
55         // Only check the primary proc
56         if (!isPrimaryProc(procTarget))
57         {
58             continue;
59         }
60 
61         uint32_t val = 0;
62         constexpr uint32_t HOST_RUNNING_INDICATION = 0xA5000001;
63         auto rc = getCFAM(procTarget, P10_SCRATCH_REG_12, val);
64         if ((rc == 0) && (val != HOST_RUNNING_INDICATION))
65         {
66             log<level::INFO>("CFAM read indicates host is not running",
67                              entry("CFAM=0x%X", val));
68             return;
69         }
70 
71         if (rc != 0)
72         {
73             // On error, we have to assume host is up so just fall through
74             // to code below
75             log<level::ERR>("CFAM read error, assume host is running");
76         }
77         else if (val == HOST_RUNNING_INDICATION)
78         {
79             // This is not good. Normal communication path to host did not work
80             // but CFAM indicates host is running.
81             log<level::ERR>("CFAM read indicates host is running");
82         }
83 
84         // Create an error so user knows system is in a bad state
85         openpower::pel::createHostRunningPEL();
86 
87         // Create file for host instance and create in filesystem to
88         // indicate to services that host is running.
89         // This file is cleared by the phosphor-state-manager once the host
90         // start target completes.
91         constexpr auto HOST_RUNNING_FILE = "/run/openbmc/host@%d-on";
92         auto size = std::snprintf(nullptr, 0, HOST_RUNNING_FILE, 0);
93         size++; // null
94         std::unique_ptr<char[]> buf(new char[size]);
95         std::snprintf(buf.get(), size, HOST_RUNNING_FILE, 0);
96         std::ofstream outfile(buf.get());
97         outfile.close();
98         return;
99     }
100 
101     // We should "never" make it here. If we did it implies no primary processor
102     // was found. Once again, rely on systemd recovery if this happens
103     log<level::ERR>("No primary processor found in checkHostRunning");
104     throw std::runtime_error("No primary processor found in checkHostRunning");
105 }
106 
107 /**
108  * The BMC is to make a best effort to clear the CFAM register used by PHYP
109  * to indicate it is running when the host is stopped. This procedure will do
110  * that.
111  */
112 void clearHostRunning()
113 {
114     struct pdbg_target* procTarget;
115     log<level::INFO>("Entering clearHostRunning");
116 
117     try
118     {
119         phal_init();
120     }
121     catch (std::exception& ex)
122     {
123         // This should "never" happen so just throw the exception and let
124         // our systemd error handling process this
125         log<level::ERR>("Exception raised during init PHAL",
126                         entry("EXCEPTION=%s", ex.what()));
127         throw std::runtime_error("PHAL initialization failed");
128     }
129 
130     pdbg_for_each_class_target("proc", procTarget)
131     {
132         // Only check the primary proc
133         if (!isPrimaryProc(procTarget))
134         {
135             continue;
136         }
137 
138         constexpr uint32_t HOST_NOT_RUNNING_INDICATION = 0;
139         auto rc = putCFAM(procTarget, P10_SCRATCH_REG_12,
140                           HOST_NOT_RUNNING_INDICATION);
141         if (rc != 0)
142         {
143             log<level::ERR>("CFAM write to clear host running status failed");
144         }
145 
146         // It's best effort, so just return either way
147         return;
148     }
149     log<level::ERR>("No primary processor found in clearHostRunning");
150 }
151 
152 REGISTER_PROCEDURE("checkHostRunning", checkHostRunning)
153 REGISTER_PROCEDURE("clearHostRunning", clearHostRunning)
154 
155 } // namespace phal
156 } // namespace openpower
157