xref: /openbmc/openpower-occ-control/occ_ffdc.cpp (revision 3ece5b99026970e1cf9bc7e797aff4c1be81b2f3)
1 #include "occ_ffdc.hpp"
2 
3 #include "utils.hpp"
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <sys/ioctl.h>
9 #include <unistd.h>
10 
11 #include <nlohmann/json.hpp>
12 #include <org/open_power/OCC/Device/error.hpp>
13 #include <phosphor-logging/elog-errors.hpp>
14 #include <phosphor-logging/elog.hpp>
15 #include <phosphor-logging/lg2.hpp>
16 #include <phosphor-logging/log.hpp>
17 #include <xyz/openbmc_project/Common/error.hpp>
18 #include <xyz/openbmc_project/Logging/Create/server.hpp>
19 
20 namespace open_power
21 {
22 namespace occ
23 {
24 
25 static constexpr size_t max_ffdc_size = 8192;
26 static constexpr size_t sbe_status_header_size = 8;
27 
28 static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
29 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
30 
31 using namespace phosphor::logging;
32 using namespace sdbusplus::org::open_power::OCC::Device::Error;
33 using InternalFailure =
34     sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
35 
createPEL(const char * path,uint32_t src6,const char * msg,int fd)36 uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg,
37                          int fd)
38 {
39     uint32_t plid = 0;
40     std::vector<std::tuple<
41         sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
42         uint8_t, uint8_t, sdbusplus::message::unix_fd>>
43         pelFFDCInfo;
44 
45     lg2::info("Creating PEL for OCC{INST} with SBE FFDC: {PATH} - SRC6: {SRC}",
46               "INST", src6 >> 16, "PATH", path, "SRC", lg2::hex, src6);
47 
48     if (fd > 0)
49     {
50         pelFFDCInfo.push_back(std::make_tuple(
51             sdbusplus::xyz::openbmc_project::Logging::server::Create::
52                 FFDCFormat::Custom,
53             static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd));
54     }
55 
56     // Add journal traces to PEL FFDC
57     auto occJournalFile =
58         addJournalEntries(pelFFDCInfo, "openpower-occ-control", 25);
59 
60     std::map<std::string, std::string> additionalData;
61     additionalData.emplace("SRC6", std::to_string(src6));
62     additionalData.emplace("_PID", std::to_string(getpid()));
63     additionalData.emplace("SBE_ERR_MSG", msg);
64 
65     auto& bus = utils::getBus();
66 
67     try
68     {
69         std::string service =
70             utils::getService(loggingObjectPath, opLoggingInterface);
71         auto method =
72             bus.new_method_call(service.c_str(), loggingObjectPath,
73                                 opLoggingInterface, "CreatePELWithFFDCFiles");
74 
75         // Set level to Notice (Informational). Error should trigger an OCC
76         // reset and if it does not recover, HTMGT/HBRT will create an
77         // unrecoverable error.
78         auto level =
79             sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
80                 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
81                     Notice);
82 
83         method.append(path, level, additionalData, pelFFDCInfo);
84         auto response = bus.call(method);
85         std::tuple<uint32_t, uint32_t> reply = {0, 0};
86 
87         response.read(reply);
88         plid = std::get<1>(reply);
89     }
90     catch (const sdbusplus::exception_t& e)
91     {
92         lg2::error("Failed to create PEL: {ERR}", "ERR", e.what());
93     }
94 
95     return plid;
96 }
97 
createOCCResetPEL(unsigned int instance,const char * path,int err,const char * callout,const bool isInventoryCallout)98 void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err,
99                              const char* callout, const bool isInventoryCallout)
100 {
101     std::map<std::string, std::string> additionalData;
102 
103     additionalData.emplace("_PID", std::to_string(getpid()));
104 
105     if (err)
106     {
107         additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err));
108     }
109 
110     lg2::info("Creating OCC Reset PEL for OCC{INST}: {PATH}", "INST", instance,
111               "PATH", path);
112 
113     if (callout)
114     {
115         if (isInventoryCallout)
116         {
117             lg2::info("adding inventory callout path {COPATH}", "COPATH",
118                       std::string(callout));
119             additionalData.emplace("CALLOUT_INVENTORY_PATH",
120                                    std::string(callout));
121         }
122         else
123         {
124             lg2::info("adding device callout path {COPATH}, errno:{ERRNO}",
125                       "COPATH", std::string(callout), "ERRNO", err);
126             additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout));
127         }
128     }
129 
130     additionalData.emplace("OCC", std::to_string(instance));
131 
132     auto& bus = utils::getBus();
133 
134     try
135     {
136         FFDCFiles ffdc;
137         // Add journal traces to PEL FFDC
138         auto occJournalFile =
139             addJournalEntries(ffdc, "openpower-occ-control", 25);
140 
141         std::string service =
142             utils::getService(loggingObjectPath, opLoggingInterface);
143         auto method =
144             bus.new_method_call(service.c_str(), loggingObjectPath,
145                                 opLoggingInterface, "CreatePELWithFFDCFiles");
146 
147         // Set level to Notice (Informational). Error should trigger an OCC
148         // reset and if it does not recover, HTMGT/HBRT will create an
149         // unrecoverable error.
150         auto level =
151             sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
152                 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
153                     Notice);
154 
155         method.append(path, level, additionalData, ffdc);
156         bus.call(method);
157     }
158     catch (const sdbusplus::exception_t& e)
159     {
160         lg2::error("Failed to create OCC Reset PEL: {ERR}", "ERR", e.what());
161     }
162 }
163 
164 // Reads the SBE FFDC file and create an error log
analyzeEvent()165 void FFDC::analyzeEvent()
166 {
167     int tfd = -1;
168     size_t total = 0;
169     auto data = std::make_unique<unsigned char[]>(max_ffdc_size);
170     while (total < max_ffdc_size)
171     {
172         auto r = read(fd, data.get() + total, max_ffdc_size - total);
173         if (r < 0)
174         {
175             elog<ReadFailure>(
176                 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
177                     CALLOUT_ERRNO(errno),
178                 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
179                     CALLOUT_DEVICE_PATH(file.c_str()));
180             return;
181         }
182         if (!r)
183         {
184             break;
185         }
186         total += r;
187     }
188 
189     lseek(fd, 0, SEEK_SET);
190 
191     if (!total)
192     {
193         // no error
194         return;
195     }
196 
197     uint32_t src6 = instance << 16;
198     src6 |= *(data.get() + 2) << 8;
199     src6 |= *(data.get() + 3);
200 
201     if (total > sbe_status_header_size)
202     {
203         std::string templateString =
204             fs::temp_directory_path() / "OCC_FFDC_XXXXXX";
205         tfd = mkostemp(templateString.data(), O_RDWR);
206         if (tfd < 0)
207         {
208             lg2::error("Couldn't create temporary FFDC file");
209         }
210         else
211         {
212             temporaryFiles.emplace_back(templateString, tfd);
213             size_t written = sbe_status_header_size;
214             while (written < total)
215             {
216                 auto r = write(tfd, data.get() + written, total - written);
217                 if (r < 0)
218                 {
219                     close(temporaryFiles.back().second);
220                     fs::remove(temporaryFiles.back().first);
221                     temporaryFiles.pop_back();
222                     tfd = -1;
223                     lg2::error("Couldn't write temporary FFDC file");
224                     break;
225                 }
226                 if (!r)
227                 {
228                     break;
229                 }
230                 written += r;
231             }
232         }
233     }
234 
235     createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6,
236               "SBE command reported error", tfd);
237 }
238 
239 // Create file with the latest journal entries for specified executable
addJournalEntries(FFDCFiles & fileList,const std::string & executable,unsigned int lines)240 std::unique_ptr<FFDCFile> FFDC::addJournalEntries(
241     FFDCFiles& fileList, const std::string& executable, unsigned int lines)
242 {
243     auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable));
244     if (journalFile && journalFile->fd() != -1)
245     {
246         lg2::debug(
247             "addJournalEntries: Added up to {NUM} journal entries for {APP}",
248             "NUM", lines, "APP", executable);
249         fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd());
250     }
251     else
252     {
253         lg2::error("addJournalEntries: Failed to add journal entries for {APP}",
254                    "APP", executable);
255     }
256     return journalFile;
257 }
258 
259 // Write JSON data into FFDC file and return the file
makeJsonFFDCFile(const nlohmann::json & ffdcData)260 std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData)
261 {
262     std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX";
263     auto fd = mkostemp(tmpFile.data(), O_RDWR);
264     if (fd != -1)
265     {
266         auto jsonString = ffdcData.dump();
267         auto rc = write(fd, jsonString.data(), jsonString.size());
268         close(fd);
269         if (rc != -1)
270         {
271             fs::path jsonFile{tmpFile};
272             return std::make_unique<FFDCFile>(jsonFile);
273         }
274         else
275         {
276             auto e = errno;
277             lg2::error(
278                 "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={ERR}",
279                 "ERR", e);
280         }
281     }
282     else
283     {
284         auto e = errno;
285         lg2::error("makeJsonFFDCFile: Failed called to mkostemp, errno={ERR}",
286                    "ERR", e);
287     }
288     return nullptr;
289 }
290 
291 // Collect the latest journal entries for a specified executable
getJournalEntries(int numLines,std::string executable)292 nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable)
293 {
294     // Sleep 100ms; otherwise recent journal entries sometimes not available
295     using namespace std::chrono_literals;
296     std::this_thread::sleep_for(100ms);
297 
298     std::vector<std::string> entries;
299 
300     // Open the journal
301     sd_journal* journal;
302     int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY);
303     if (rc < 0)
304     {
305         // Build one line string containing field values
306         entries.push_back("[Internal error: sd_journal_open(), rc=" +
307                           std::string(strerror(rc)) + "]");
308         return nlohmann::json(entries);
309     }
310 
311     // Create object to automatically close journal
312     JournalCloser closer{journal};
313 
314     // Add match so we only loop over entries with specified field value
315     std::string field{"SYSLOG_IDENTIFIER"};
316     std::string match{field + '=' + executable};
317     rc = sd_journal_add_match(journal, match.c_str(), 0);
318     if (rc < 0)
319     {
320         // Build one line string containing field values
321         entries.push_back("[Internal error: sd_journal_add_match(), rc=" +
322                           std::string(strerror(rc)) + "]");
323     }
324     else
325     {
326         int count{1};
327         entries.reserve(numLines);
328         std::string syslogID, pid, message, timeStamp;
329 
330         // Loop through journal entries from newest to oldest
331         SD_JOURNAL_FOREACH_BACKWARDS(journal)
332         {
333             // Get relevant journal entry fields
334             timeStamp = getTimeStamp(journal);
335             syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER");
336             pid = getFieldValue(journal, "_PID");
337             message = getFieldValue(journal, "MESSAGE");
338 
339             // Build one line string containing field values
340             entries.push_back(
341                 timeStamp + " " + syslogID + "[" + pid + "]: " + message);
342 
343             // Stop after number of lines was read
344             if (count++ >= numLines)
345             {
346                 break;
347             }
348         }
349     }
350 
351     // put the journal entries in chronological order
352     std::reverse(entries.begin(), entries.end());
353 
354     return nlohmann::json(entries);
355 }
356 
getTimeStamp(sd_journal * journal)357 std::string FFDC::getTimeStamp(sd_journal* journal)
358 {
359     // Get realtime (wallclock) timestamp of current journal entry.  The
360     // timestamp is in microseconds since the epoch.
361     uint64_t usec{0};
362     int rc = sd_journal_get_realtime_usec(journal, &usec);
363     if (rc < 0)
364     {
365         return "[Internal error: sd_journal_get_realtime_usec(), rc=" +
366                std::string(strerror(rc)) + "]";
367     }
368 
369     // Convert to number of seconds since the epoch
370     time_t secs = usec / 1000000;
371 
372     // Convert seconds to tm struct required by strftime()
373     struct tm* timeStruct = localtime(&secs);
374     if (timeStruct == nullptr)
375     {
376         return "[Internal error: localtime() returned nullptr]";
377     }
378 
379     // Convert tm struct into a date/time string
380     char timeStamp[80];
381     strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct);
382 
383     return timeStamp;
384 }
385 
getFieldValue(sd_journal * journal,const std::string & field)386 std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field)
387 {
388     std::string value{};
389 
390     // Get field data from current journal entry
391     const void* data{nullptr};
392     size_t length{0};
393     int rc = sd_journal_get_data(journal, field.c_str(), &data, &length);
394     if (rc < 0)
395     {
396         if (-rc == ENOENT)
397         {
398             // Current entry does not include this field; return empty value
399             return value;
400         }
401         else
402         {
403             return "[Internal error: sd_journal_get_data() rc=" +
404                    std::string(strerror(rc)) + "]";
405         }
406     }
407 
408     // Get value from field data.  Field data in format "FIELD=value".
409     std::string dataString{static_cast<const char*>(data), length};
410     std::string::size_type pos = dataString.find('=');
411     if ((pos != std::string::npos) && ((pos + 1) < dataString.size()))
412     {
413         // Value is substring after the '='
414         value = dataString.substr(pos + 1);
415     }
416 
417     return value;
418 }
419 
420 // Create temporary file that will automatically get removed when destructed
FFDCFile(const fs::path & name)421 FFDCFile::FFDCFile(const fs::path& name) :
422     _fd(open(name.c_str(), O_RDONLY)), _name(name)
423 {
424     if (_fd() == -1)
425     {
426         auto e = errno;
427         lg2::error("FFDCFile: Could not open FFDC file {FILE}. errno {ERR}",
428                    "FILE", _name.string(), "ERR", e);
429     }
430 }
431 
432 } // namespace occ
433 } // namespace open_power
434