xref: /openbmc/openpower-debug-collector/watchdog/watchdog_main.cpp (revision 20a9d4375d2eb9d56d5f3e135bceb75884c2cc84)
1 #include <format>
2 extern "C"
3 {
4 #include <libpdbg.h>
5 #include <libpdbg_sbe.h>
6 }
7 
8 #include <libphal.H>
9 
10 #include <phosphor-logging/lg2.hpp>
11 #include <watchdog_common.hpp>
12 #include <watchdog_dbus.hpp>
13 #include <watchdog_handler.hpp>
14 #include <watchdog_logging.hpp>
15 
16 namespace watchdog
17 {
18 namespace dump
19 {
20 
triggerHostbootDump(const uint32_t timeout)21 void triggerHostbootDump(const uint32_t timeout)
22 {
23     constexpr auto HOST_STATE_DIAGNOSTIC_MODE =
24         "obmc-host-diagnostic-mode@0.target";
25     constexpr auto HOST_STATE_QUIESCE_TGT = "obmc-host-quiesce@0.target";
26 
27     // Put system into diagnostic mode
28     transitionHost(HOST_STATE_DIAGNOSTIC_MODE);
29 
30     eventWatchdogTimeout(timeout);
31 
32     // Put system into quiesce state
33     transitionHost(HOST_STATE_QUIESCE_TGT);
34 }
35 
triggerSystemDump()36 void triggerSystemDump()
37 {
38     try
39     {
40         // Create a PEL may be before setting the target
41         constexpr auto eventName =
42             "org.open_power.Host.Boot.Error.WatchdogTimedOut";
43 
44         // CreatePELWithFFDCFiles requires a vector of FFDCTuple.
45         auto emptyFfdc = std::vector<FFDCTuple>{};
46 
47         std::map<std::string, std::string> additionalData;
48 
49         // Create PEL with empty additional data.
50         createPel(eventName, additionalData, emptyFfdc);
51 
52         // We will be transitioning host by starting appropriate dbus target
53         constexpr auto target = "obmc-host-crash@0.target";
54 
55         auto bus = sdbusplus::bus::new_system();
56         auto method = bus.new_method_call(
57             "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
58             "org.freedesktop.systemd1.Manager", "StartUnit");
59 
60         method.append(target);    // target unit to start
61         method.append("replace"); // mode = replace conflicting queued jobs
62 
63         bus.call_noreply(method); // start the service
64     }
65     catch (const sdbusplus::exception::SdBusError& e)
66     {
67         lg2::error("triggerMPIPLDump:: D-Bus call exception, errorMsg({ERROR})",
68                    "ERROR", e.what());
69     }
70 }
71 
72 /**
73  * @brief get SBE special callout information
74  *
75  * @details This function adds the special sbe callout in the user provided
76  * json callout list. includes BMC0002 procedure callout with high priority
77  * and processor callout with medium priority.
78  *
79  * @param[in] procTarget - pdbg processor target
80  * @param[out] jsonCalloutDataList - reference to json callout list
81  */
getSBECallout(struct pdbg_target * procTarget,json & jsonCalloutDataList)82 static void getSBECallout(struct pdbg_target* procTarget,
83                           json& jsonCalloutDataList)
84 {
85     using namespace openpower::phal::pdbg;
86     json jsonProcedCallout;
87 
88     // Add procedure callout
89     jsonProcedCallout["Procedure"] = "BMC0002";
90     jsonProcedCallout["Priority"] = "H";
91     jsonCalloutDataList.emplace_back(jsonProcedCallout);
92     try
93     {
94         ATTR_LOCATION_CODE_Type locationCode;
95         // Initialize with default data.
96         memset(&locationCode, '\0', sizeof(locationCode));
97         // Get location code information
98         openpower::phal::pdbg::getLocationCode(procTarget, locationCode);
99         json jsonProcCallout;
100         jsonProcCallout["LocationCode"] = locationCode;
101         jsonProcCallout["Deconfigured"] = false;
102         jsonProcCallout["Guarded"] = false;
103         jsonProcCallout["Priority"] = "M";
104         jsonCalloutDataList.emplace_back(jsonProcCallout);
105     }
106     catch (const std::exception& e)
107     {
108         lg2::error("getLocationCode({LOCATION}): Exception({ERROR})",
109                    "LOCATION", pdbg_target_path(procTarget), "ERROR", e);
110     }
111 }
112 
handleSbeBootError(struct pdbg_target * procTarget,const uint32_t timeout)113 void handleSbeBootError(struct pdbg_target* procTarget, const uint32_t timeout)
114 {
115     using namespace openpower::phal;
116 
117     sbeError_t sbeError;
118     bool dumpIsRequired = false;
119 
120     try
121     {
122         // Capture FFDC information on primary processor
123         sbeError = sbe::captureFFDC(procTarget);
124     }
125     catch (const std::exception& e)
126     {
127         // Failed to collect FFDC information
128         lg2::error("captureFFDC: Exception{ERROR}", "ERROR", e);
129         dumpIsRequired = true;
130     }
131 
132     // event type
133     std::string event;
134     if ((sbeError.errType() == exception::SBE_FFDC_NO_DATA) ||
135         (sbeError.errType() == exception::SBE_CMD_TIMEOUT) || (dumpIsRequired))
136     {
137         lg2::info("No FFDC data");
138         event = "org.open_power.Processor.Error.SbeBootTimeout";
139         dumpIsRequired = true;
140     }
141     else
142     {
143         lg2::error("SBE Boot failure");
144         event = "org.open_power.Processor.Error.SbeBootFailure";
145     }
146 
147     // Additional data
148     std::map<std::string, std::string> additionalData;
149 
150     // SRC6 : [0:15] chip position
151     uint32_t index = pdbg_target_index(procTarget);
152     additionalData.emplace("SRC6", std::to_string(index << 16));
153     additionalData.emplace("SBE_ERR_MSG", sbeError.what());
154 
155     // FFDC
156     auto ffdc = std::vector<FFDCTuple>{};
157     // get SBE ffdc file descriptor
158     auto fd = sbeError.getFd();
159 
160     // Log error with additional ffdc if fd is valid
161     if (fd > 0)
162     {
163         ffdc.push_back(
164             std::make_tuple(sdbusplus::xyz::openbmc_project::Logging::server::
165                                 Create::FFDCFormat::Custom,
166                             static_cast<uint8_t>(0xCB),
167                             static_cast<uint8_t>(0x01), sbeError.getFd()));
168     }
169 
170     std::unique_ptr<FFDCFile> ffdcFilePtr;
171     try
172     {
173         if (dumpIsRequired)
174         {
175             // Additional callout is required for SBE timeout case
176             // In this case no SBE FFDC information available and
177             // required to add default callouts.
178             json jsonCalloutDataList;
179             jsonCalloutDataList = json::array();
180             getSBECallout(procTarget, jsonCalloutDataList);
181             ffdcFilePtr = std::make_unique<FFDCFile>(jsonCalloutDataList);
182             ffdc.push_back(std::make_tuple(
183                 sdbusplus::xyz::openbmc_project::Logging::server::Create::
184                     FFDCFormat::JSON,
185                 static_cast<uint8_t>(0xCA), static_cast<uint8_t>(0x01),
186                 ffdcFilePtr->getFileDescriptor()));
187         }
188     }
189     catch (const std::exception& e)
190     {
191         lg2::error("Skipping SBE special callout due to Exception({ERROR})",
192                    "ERROR", e);
193     }
194     auto pelId = createPel(event, additionalData, ffdc);
195 
196     if (dumpIsRequired)
197     {
198         try
199         {
200             using namespace openpower::phal;
201 
202             // Check SBE dump collection allowed
203             bool dumpAllowed = sbe::isDumpAllowed(procTarget);
204             if (!dumpAllowed)
205             {
206                 // Possibly another collection in progress, skip dump collection
207                 lg2::error("Another collection is in progress, skipping "
208                            "dump collection");
209                 return;
210             }
211         }
212         catch (const std::exception& e)
213         {
214             lg2::error("Exception {ERROR} occurred", "ERROR", e);
215             return;
216         }
217 
218         DumpParameters dumpParameters;
219         dumpParameters.logId = pelId;
220         dumpParameters.unitId = index;
221         dumpParameters.timeout = timeout;
222         dumpParameters.dumpType = DumpType::SBE;
223 
224         // will not return until dump is complete or timeout
225         requestDump(dumpParameters);
226     }
227 }
228 
229 } // namespace dump
230 } // namespace watchdog
231