1 #include "occ_ffdc.hpp"
2 
3 #include "utils.hpp"
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <sys/ioctl.h>
9 #include <unistd.h>
10 
11 #include <nlohmann/json.hpp>
12 #include <org/open_power/OCC/Device/error.hpp>
13 #include <phosphor-logging/elog-errors.hpp>
14 #include <phosphor-logging/elog.hpp>
15 #include <phosphor-logging/log.hpp>
16 #include <xyz/openbmc_project/Common/error.hpp>
17 #include <xyz/openbmc_project/Logging/Create/server.hpp>
18 
19 #include <format>
20 
21 namespace open_power
22 {
23 namespace occ
24 {
25 
26 static constexpr size_t max_ffdc_size = 8192;
27 static constexpr size_t sbe_status_header_size = 8;
28 
29 static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
30 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
31 
32 using namespace phosphor::logging;
33 using namespace sdbusplus::org::open_power::OCC::Device::Error;
34 using InternalFailure =
35     sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
36 
createPEL(const char * path,uint32_t src6,const char * msg,int fd)37 uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg,
38                          int fd)
39 {
40     uint32_t plid = 0;
41     std::vector<std::tuple<
42         sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
43         uint8_t, uint8_t, sdbusplus::message::unix_fd>>
44         pelFFDCInfo;
45 
46     log<level::INFO>(
47         std::format("Creating PEL for OCC{} with SBE FFDC: {} - SRC6: 0x{:08X}",
48                     src6 >> 16, path, src6)
49             .c_str());
50 
51     if (fd > 0)
52     {
53         pelFFDCInfo.push_back(std::make_tuple(
54             sdbusplus::xyz::openbmc_project::Logging::server::Create::
55                 FFDCFormat::Custom,
56             static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd));
57     }
58 
59     // Add journal traces to PEL FFDC
60     auto occJournalFile = addJournalEntries(pelFFDCInfo,
61                                             "openpower-occ-control", 25);
62 
63     std::map<std::string, std::string> additionalData;
64     additionalData.emplace("SRC6", std::to_string(src6));
65     additionalData.emplace("_PID", std::to_string(getpid()));
66     additionalData.emplace("SBE_ERR_MSG", msg);
67 
68     auto& bus = utils::getBus();
69 
70     try
71     {
72         std::string service = utils::getService(loggingObjectPath,
73                                                 opLoggingInterface);
74         auto method = bus.new_method_call(service.c_str(), loggingObjectPath,
75                                           opLoggingInterface,
76                                           "CreatePELWithFFDCFiles");
77 
78         // Set level to Notice (Informational). Error should trigger an OCC
79         // reset and if it does not recover, HTMGT/HBRT will create an
80         // unrecoverable error.
81         auto level =
82             sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
83                 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
84                     Notice);
85 
86         method.append(path, level, additionalData, pelFFDCInfo);
87         auto response = bus.call(method);
88         std::tuple<uint32_t, uint32_t> reply = {0, 0};
89 
90         response.read(reply);
91         plid = std::get<1>(reply);
92     }
93     catch (const sdbusplus::exception_t& e)
94     {
95         log<level::ERR>(
96             std::format("Failed to create PEL: {}", e.what()).c_str());
97     }
98 
99     return plid;
100 }
101 
createOCCResetPEL(unsigned int instance,const char * path,int err,const char * callout)102 void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err,
103                              const char* callout)
104 {
105     std::map<std::string, std::string> additionalData;
106 
107     additionalData.emplace("_PID", std::to_string(getpid()));
108 
109     if (err)
110     {
111         additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err));
112     }
113 
114     if (callout)
115     {
116         additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout));
117     }
118 
119     additionalData.emplace("OCC", std::to_string(instance));
120 
121     log<level::INFO>(
122         std::format("Creating OCC Reset PEL for OCC{}: {}", instance, path)
123             .c_str());
124 
125     auto& bus = utils::getBus();
126 
127     try
128     {
129         FFDCFiles ffdc;
130         // Add journal traces to PEL FFDC
131         auto occJournalFile = addJournalEntries(ffdc, "openpower-occ-control",
132                                                 25);
133 
134         std::string service = utils::getService(loggingObjectPath,
135                                                 opLoggingInterface);
136         auto method = bus.new_method_call(service.c_str(), loggingObjectPath,
137                                           opLoggingInterface,
138                                           "CreatePELWithFFDCFiles");
139 
140         // Set level to Notice (Informational). Error should trigger an OCC
141         // reset and if it does not recover, HTMGT/HBRT will create an
142         // unrecoverable error.
143         auto level =
144             sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
145                 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
146                     Notice);
147 
148         method.append(path, level, additionalData, ffdc);
149         bus.call(method);
150     }
151     catch (const sdbusplus::exception_t& e)
152     {
153         log<level::ERR>(
154             std::format("Failed to create OCC Reset PEL: {}", e.what())
155                 .c_str());
156     }
157 }
158 
159 // Reads the SBE FFDC file and create an error log
analyzeEvent()160 void FFDC::analyzeEvent()
161 {
162     int tfd = -1;
163     size_t total = 0;
164     auto data = std::make_unique<unsigned char[]>(max_ffdc_size);
165     while (total < max_ffdc_size)
166     {
167         auto r = read(fd, data.get() + total, max_ffdc_size - total);
168         if (r < 0)
169         {
170             elog<ReadFailure>(
171                 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
172                     CALLOUT_ERRNO(errno),
173                 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
174                     CALLOUT_DEVICE_PATH(file.c_str()));
175             return;
176         }
177         if (!r)
178         {
179             break;
180         }
181         total += r;
182     }
183 
184     lseek(fd, 0, SEEK_SET);
185 
186     if (!total)
187     {
188         // no error
189         return;
190     }
191 
192     uint32_t src6 = instance << 16;
193     src6 |= *(data.get() + 2) << 8;
194     src6 |= *(data.get() + 3);
195 
196     if (total > sbe_status_header_size)
197     {
198         std::string templateString = fs::temp_directory_path() /
199                                      "OCC_FFDC_XXXXXX";
200         tfd = mkostemp(templateString.data(), O_RDWR);
201         if (tfd < 0)
202         {
203             log<level::ERR>("Couldn't create temporary FFDC file");
204         }
205         else
206         {
207             temporaryFiles.emplace_back(templateString, tfd);
208             size_t written = sbe_status_header_size;
209             while (written < total)
210             {
211                 auto r = write(tfd, data.get() + written, total - written);
212                 if (r < 0)
213                 {
214                     close(temporaryFiles.back().second);
215                     fs::remove(temporaryFiles.back().first);
216                     temporaryFiles.pop_back();
217                     tfd = -1;
218                     log<level::ERR>("Couldn't write temporary FFDC file");
219                     break;
220                 }
221                 if (!r)
222                 {
223                     break;
224                 }
225                 written += r;
226             }
227         }
228     }
229 
230     createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6,
231               "SBE command reported error", tfd);
232 }
233 
234 // Create file with the latest journal entries for specified executable
addJournalEntries(FFDCFiles & fileList,const std::string & executable,unsigned int lines)235 std::unique_ptr<FFDCFile> FFDC::addJournalEntries(FFDCFiles& fileList,
236                                                   const std::string& executable,
237                                                   unsigned int lines)
238 {
239     auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable));
240     if (journalFile && journalFile->fd() != -1)
241     {
242         log<level::DEBUG>(
243             std::format(
244                 "addJournalEntries: Added up to {} journal entries for {}",
245                 lines, executable)
246                 .c_str());
247         fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd());
248     }
249     else
250     {
251         log<level::ERR>(
252             std::format(
253                 "addJournalEntries: Failed to add journal entries for {}",
254                 executable)
255                 .c_str());
256     }
257     return journalFile;
258 }
259 
260 // Write JSON data into FFDC file and return the file
makeJsonFFDCFile(const nlohmann::json & ffdcData)261 std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData)
262 {
263     std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX";
264     auto fd = mkostemp(tmpFile.data(), O_RDWR);
265     if (fd != -1)
266     {
267         auto jsonString = ffdcData.dump();
268         auto rc = write(fd, jsonString.data(), jsonString.size());
269         close(fd);
270         if (rc != -1)
271         {
272             fs::path jsonFile{tmpFile};
273             return std::make_unique<FFDCFile>(jsonFile);
274         }
275         else
276         {
277             auto e = errno;
278             log<level::ERR>(
279                 std::format(
280                     "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={}",
281                     e)
282                     .c_str());
283         }
284     }
285     else
286     {
287         auto e = errno;
288         log<level::ERR>(
289             std::format("makeJsonFFDCFile: Failed called to mkostemp, errno={}",
290                         e)
291                 .c_str());
292     }
293     return nullptr;
294 }
295 
296 // Collect the latest journal entries for a specified executable
getJournalEntries(int numLines,std::string executable)297 nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable)
298 {
299     // Sleep 100ms; otherwise recent journal entries sometimes not available
300     using namespace std::chrono_literals;
301     std::this_thread::sleep_for(100ms);
302 
303     std::vector<std::string> entries;
304 
305     // Open the journal
306     sd_journal* journal;
307     int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY);
308     if (rc < 0)
309     {
310         // Build one line string containing field values
311         entries.push_back("[Internal error: sd_journal_open(), rc=" +
312                           std::string(strerror(rc)) + "]");
313         return nlohmann::json(entries);
314     }
315 
316     // Create object to automatically close journal
317     JournalCloser closer{journal};
318 
319     // Add match so we only loop over entries with specified field value
320     std::string field{"SYSLOG_IDENTIFIER"};
321     std::string match{field + '=' + executable};
322     rc = sd_journal_add_match(journal, match.c_str(), 0);
323     if (rc < 0)
324     {
325         // Build one line string containing field values
326         entries.push_back("[Internal error: sd_journal_add_match(), rc=" +
327                           std::string(strerror(rc)) + "]");
328     }
329     else
330     {
331         int count{1};
332         entries.reserve(numLines);
333         std::string syslogID, pid, message, timeStamp;
334 
335         // Loop through journal entries from newest to oldest
336         SD_JOURNAL_FOREACH_BACKWARDS(journal)
337         {
338             // Get relevant journal entry fields
339             timeStamp = getTimeStamp(journal);
340             syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER");
341             pid = getFieldValue(journal, "_PID");
342             message = getFieldValue(journal, "MESSAGE");
343 
344             // Build one line string containing field values
345             entries.push_back(timeStamp + " " + syslogID + "[" + pid +
346                               "]: " + message);
347 
348             // Stop after number of lines was read
349             if (count++ >= numLines)
350             {
351                 break;
352             }
353         }
354     }
355 
356     // put the journal entries in chronological order
357     std::reverse(entries.begin(), entries.end());
358 
359     return nlohmann::json(entries);
360 }
361 
getTimeStamp(sd_journal * journal)362 std::string FFDC::getTimeStamp(sd_journal* journal)
363 {
364     // Get realtime (wallclock) timestamp of current journal entry.  The
365     // timestamp is in microseconds since the epoch.
366     uint64_t usec{0};
367     int rc = sd_journal_get_realtime_usec(journal, &usec);
368     if (rc < 0)
369     {
370         return "[Internal error: sd_journal_get_realtime_usec(), rc=" +
371                std::string(strerror(rc)) + "]";
372     }
373 
374     // Convert to number of seconds since the epoch
375     time_t secs = usec / 1000000;
376 
377     // Convert seconds to tm struct required by strftime()
378     struct tm* timeStruct = localtime(&secs);
379     if (timeStruct == nullptr)
380     {
381         return "[Internal error: localtime() returned nullptr]";
382     }
383 
384     // Convert tm struct into a date/time string
385     char timeStamp[80];
386     strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct);
387 
388     return timeStamp;
389 }
390 
getFieldValue(sd_journal * journal,const std::string & field)391 std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field)
392 {
393     std::string value{};
394 
395     // Get field data from current journal entry
396     const void* data{nullptr};
397     size_t length{0};
398     int rc = sd_journal_get_data(journal, field.c_str(), &data, &length);
399     if (rc < 0)
400     {
401         if (-rc == ENOENT)
402         {
403             // Current entry does not include this field; return empty value
404             return value;
405         }
406         else
407         {
408             return "[Internal error: sd_journal_get_data() rc=" +
409                    std::string(strerror(rc)) + "]";
410         }
411     }
412 
413     // Get value from field data.  Field data in format "FIELD=value".
414     std::string dataString{static_cast<const char*>(data), length};
415     std::string::size_type pos = dataString.find('=');
416     if ((pos != std::string::npos) && ((pos + 1) < dataString.size()))
417     {
418         // Value is substring after the '='
419         value = dataString.substr(pos + 1);
420     }
421 
422     return value;
423 }
424 
425 // Create temporary file that will automatically get removed when destructed
FFDCFile(const fs::path & name)426 FFDCFile::FFDCFile(const fs::path& name) :
427     _fd(open(name.c_str(), O_RDONLY)), _name(name)
428 {
429     if (_fd() == -1)
430     {
431         auto e = errno;
432         log<level::ERR>(
433             std::format("FFDCFile: Could not open FFDC file {}. errno {}",
434                         _name.string(), e)
435                 .c_str());
436     }
437 }
438 
439 } // namespace occ
440 } // namespace open_power
441