1 extern "C"
2 {
3 #include <libpdbg.h>
4 #include <libpdbg_sbe.h>
5 }
6 
7 #include "create_pel.hpp"
8 #include "sbe_consts.hpp"
9 #include "sbe_dump_collector.hpp"
10 #include "sbe_type.hpp"
11 
12 #include <libphal.H>
13 #include <phal_exception.H>
14 
15 #include <phosphor-logging/elog-errors.hpp>
16 #include <phosphor-logging/lg2.hpp>
17 #include <phosphor-logging/log.hpp>
18 #include <sbe_consts.hpp>
19 #include <xyz/openbmc_project/Common/File/error.hpp>
20 #include <xyz/openbmc_project/Common/error.hpp>
21 
22 #include <cstdint>
23 #include <filesystem>
24 #include <format>
25 #include <fstream>
26 #include <stdexcept>
27 
28 namespace openpower::dump::sbe_chipop
29 {
30 
31 using namespace phosphor::logging;
32 using namespace openpower::dump::SBE;
33 
34 void SbeDumpCollector::collectDump(uint8_t type, uint32_t id,
35                                    uint64_t failingUnit,
36                                    const std::filesystem::path& path)
37 {
38     lg2::error("Starting dump collection: type:{TYPE} id:{ID} "
39                "failingUnit:{FAILINGUNIT}, path:{PATH}",
40                "TYPE", type, "ID", id, "FAILINGUNIT", failingUnit, "PATH",
41                path.string());
42 
43     initializePdbg();
44 
45     std::vector<struct pdbg_target*> targets;
46 
47     struct pdbg_target* target = nullptr;
48     pdbg_for_each_class_target("proc", target)
49     {
50         if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED ||
51             !openpower::phal::pdbg::isTgtFunctional(target))
52         {
53             continue;
54         }
55 
56         bool includeTarget = true;
57         // if the dump type is hostboot then call stop instructions
58         if (type == SBE_DUMP_TYPE_HOSTBOOT)
59         {
60             includeTarget = executeThreadStop(target);
61         }
62         if (includeTarget)
63         {
64             targets.push_back(target);
65         }
66     }
67 
68     std::vector<uint8_t> clockStates = {SBE_CLOCK_ON, SBE_CLOCK_OFF};
69     for (auto cstate : clockStates)
70     {
71         auto futures = spawnDumpCollectionProcesses(type, id, path, failingUnit,
72                                                     cstate, targets);
73 
74         // Wait for all asynchronous tasks to complete
75         for (auto& future : futures)
76         {
77             try
78             {
79                 future.wait();
80             }
81             catch (const std::exception& e)
82             {
83                 lg2::error("Failed to collect dump from SBE ErrorMsg({ERROR})",
84                            "ERROR", e);
85             }
86         }
87         lg2::info(
88             "Dump collection completed for clock state({CSTATE}): type({TYPE}) "
89             "id({ID}) failingUnit({FAILINGUNIT}), path({PATH})",
90             "CSTATE", cstate, "TYPE", type, "ID", id, "FAILINGUNIT",
91             failingUnit, "PATH", path.string());
92     }
93     if (std::filesystem::is_empty(path))
94     {
95         lg2::error("Failed to collect the dump");
96         throw std::runtime_error("Failed to collect the dump");
97     }
98     lg2::info("Dump collection completed");
99 }
100 
101 void SbeDumpCollector::initializePdbg()
102 {
103     openpower::phal::pdbg::init();
104 }
105 
106 std::vector<std::future<void>> SbeDumpCollector::spawnDumpCollectionProcesses(
107     uint8_t type, uint32_t id, const std::filesystem::path& path,
108     uint64_t failingUnit, uint8_t cstate,
109     const std::vector<struct pdbg_target*>& targets)
110 {
111     std::vector<std::future<void>> futures;
112 
113     for (auto target : targets)
114     {
115         if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED ||
116             !openpower::phal::pdbg::isTgtFunctional(target))
117         {
118             continue;
119         }
120 
121         auto future =
122             std::async(std::launch::async,
123                        [this, target, path, id, type, cstate, failingUnit]() {
124             try
125             {
126                 this->collectDumpFromSBE(target, path, id, type, cstate,
127                                          failingUnit);
128             }
129             catch (const std::exception& e)
130             {
131                 lg2::error(
132                     "Failed to collect dump from SBE on Proc-({PROCINDEX})",
133                     "PROCINDEX", pdbg_target_index(target));
134             }
135         });
136 
137         futures.push_back(std::move(future));
138     }
139 
140     return futures;
141 }
142 
143 void SbeDumpCollector::logErrorAndCreatePEL(
144     const openpower::phal::sbeError_t& sbeError, uint64_t chipPos,
145     SBETypes sbeType, uint32_t cmdClass, uint32_t cmdType)
146 {
147     try
148     {
149         std::string event = sbeTypeAttributes.at(sbeType).chipOpFailure;
150         auto dumpIsRequired = false;
151 
152         if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
153         {
154             event = sbeTypeAttributes.at(sbeType).chipOpTimeout;
155             dumpIsRequired = true;
156         }
157 
158         openpower::dump::pel::FFDCData pelAdditionalData = {
159             {"SRC6", std::format("{:X}{:X}", chipPos, (cmdClass | cmdType))}};
160 
161         openpower::dump::pel::createSbeErrorPEL(event, sbeError,
162                                                 pelAdditionalData);
163         auto logId = openpower::dump::pel::createSbeErrorPEL(event, sbeError,
164                                                              pelAdditionalData);
165 
166         // Request SBE Dump if required
167         if (dumpIsRequired)
168         {
169             util::requestSBEDump(chipPos, logId, sbeType);
170         }
171     }
172     catch (const std::out_of_range& e)
173     {
174         lg2::error("Unknown SBE Type({SBETYPE}) ErrorMsg({ERROR})", "SBETYPE",
175                    sbeType, "ERROR", e);
176     }
177     catch (const std::exception& e)
178     {
179         lg2::error("SBE Dump request failed, chip position({CHIPPOS}), "
180                    "Error: {ERROR}",
181                    "CHIPPOS", chipPos, "ERROR", e);
182     }
183 }
184 
185 void SbeDumpCollector::collectDumpFromSBE(struct pdbg_target* chip,
186                                           const std::filesystem::path& path,
187                                           uint32_t id, uint8_t type,
188                                           uint8_t clockState,
189                                           uint64_t failingUnit)
190 {
191     auto chipPos = pdbg_target_index(chip);
192     SBETypes sbeType = getSBEType(chip);
193     auto chipName = sbeTypeAttributes.at(sbeType).chipName;
194     lg2::info(
195         "Collecting dump from proc({PROC}): path({PATH}) id({ID}) "
196         "type({TYPE}) clockState({CLOCKSTATE}) failingUnit({FAILINGUNIT})",
197         "PROC", chipPos, "PATH", path.string(), "ID", id, "TYPE", type,
198         "CLOCKSTATE", clockState, "FAILINGUNIT", failingUnit);
199 
200     util::DumpDataPtr dataPtr;
201     uint32_t len = 0;
202     uint8_t collectFastArray =
203         checkFastarrayCollectionNeeded(clockState, type, failingUnit, chipPos);
204 
205     try
206     {
207         openpower::phal::sbe::getDump(chip, type, clockState, collectFastArray,
208                                       dataPtr.getPtr(), &len);
209     }
210     catch (const openpower::phal::sbeError_t& sbeError)
211     {
212         if (sbeError.errType() ==
213             openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
214         {
215             // SBE is not ready to accept chip-ops,
216             // Skip the request, no additional error handling required.
217             lg2::info("Collect dump: Skipping ({ERROR}) dump({TYPE}) "
218                       "on proc({PROC}) clock state({CLOCKSTATE})",
219                       "ERROR", sbeError, "TYPE", type, "PROC", chipPos,
220                       "CLOCKSTATE", clockState);
221             return;
222         }
223 
224         lg2::error("Error in collecting dump dump type({TYPE}), "
225                    "clockstate({CLOCKSTATE}), chip type({CHIPTYPE}) "
226                    "position({POSITION}), "
227                    "collectFastArray({COLLECTFASTARRAY}) error({ERROR})",
228                    "TYPE", type, "CLOCKSTATE", clockState, "CHIPTYPE", chipName,
229                    "POSITION", chipPos, "COLLECTFASTARRAY", collectFastArray,
230                    "ERROR", sbeError);
231         logErrorAndCreatePEL(sbeError, chipPos, sbeType, SBEFIFO_CMD_CLASS_DUMP,
232                              SBEFIFO_CMD_GET_DUMP);
233         return;
234     }
235     writeDumpFile(path, id, clockState, 0, chipName, chipPos, dataPtr, len);
236 }
237 
238 void SbeDumpCollector::writeDumpFile(
239     const std::filesystem::path& path, const uint32_t id,
240     const uint8_t clockState, const uint8_t nodeNum,
241     const std::string& chipName, const uint8_t chipPos,
242     util::DumpDataPtr& dataPtr, const uint32_t len)
243 {
244     using namespace sdbusplus::xyz::openbmc_project::Common::Error;
245     namespace fileError = sdbusplus::xyz::openbmc_project::Common::File::Error;
246 
247     // Construct the filename
248     std::ostringstream filenameBuilder;
249     filenameBuilder << std::setw(8) << std::setfill('0') << id
250                     << ".SbeDataClocks"
251                     << (clockState == SBE_CLOCK_ON ? "On" : "Off") << ".node"
252                     << static_cast<int>(nodeNum) << "." << chipName
253                     << static_cast<int>(chipPos);
254 
255     auto dumpPath = path / filenameBuilder.str();
256 
257     // Attempt to open the file
258     std::ofstream outfile(dumpPath, std::ios::out | std::ios::binary);
259     if (!outfile)
260     {
261         using namespace sdbusplus::xyz::openbmc_project::Common::File::Error;
262         using metadata = xyz::openbmc_project::Common::File::Open;
263         // Unable to open the file for writing
264         auto err = errno;
265         lg2::error("Error opening file to write dump, "
266                    "errno({ERRNO}), filepath({FILEPATH})",
267                    "ERRNO", err, "FILEPATH", dumpPath.string());
268 
269         report<Open>(metadata::ERRNO(err), metadata::PATH(dumpPath.c_str()));
270         // Just return here, so that the dumps collected from other
271         // SBEs can be packaged.
272         return;
273     }
274 
275     // Write to the file
276     try
277     {
278         outfile.write(reinterpret_cast<const char*>(dataPtr.getData()), len);
279 
280         lg2::info("Successfully wrote dump file "
281                   "path=({PATH}) size=({SIZE})",
282                   "PATH", dumpPath.string(), "SIZE", len);
283     }
284     catch (const std::ofstream::failure& oe)
285     {
286         using namespace sdbusplus::xyz::openbmc_project::Common::File::Error;
287         using metadata = xyz::openbmc_project::Common::File::Write;
288 
289         lg2::error(
290             "Failed to write to dump file, "
291             "errorMsg({ERROR}), error({ERRORCODE}), filepath({FILEPATH})",
292             "ERROR", oe, "ERRORCODE", oe.code().value(), "FILEPATH",
293             dumpPath.string());
294         report<Write>(metadata::ERRNO(oe.code().value()),
295                       metadata::PATH(dumpPath.c_str()));
296         // Just return here so dumps collected from other SBEs can be
297         // packaged.
298     }
299 }
300 
301 bool SbeDumpCollector::executeThreadStop(struct pdbg_target* target)
302 {
303     try
304     {
305         openpower::phal::sbe::threadStopProc(target);
306         return true;
307     }
308     catch (const openpower::phal::sbeError_t& sbeError)
309     {
310         uint64_t chipPos = pdbg_target_index(target);
311         if (sbeError.errType() ==
312             openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED)
313         {
314             lg2::info("SBE is not ready to accept chip-op: Skipping "
315                       "stop instruction on proc-({POSITION}) error({ERROR}) ",
316                       "POSITION", chipPos, "ERROR", sbeError);
317             return false; // Do not include the target for dump collection
318         }
319 
320         lg2::error("Stop instructions failed on "
321                    "proc-({POSITION}) error({ERROR}) ",
322                    "POSITION", chipPos, "ERROR", sbeError);
323 
324         logErrorAndCreatePEL(sbeError, chipPos, SBETypes::PROC,
325                              SBEFIFO_CMD_CLASS_INSTRUCTION,
326                              SBEFIFO_CMD_CONTROL_INSN);
327         // For TIMEOUT, log the error and skip adding the processor for dump
328         // collection
329         if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT)
330         {
331             return false;
332         }
333     }
334     // Include the target for dump collection for SBE_CMD_FAILED or any other
335     // non-critical errors
336     return true;
337 }
338 
339 } // namespace openpower::dump::sbe_chipop
340