1 #include "occ_ffdc.hpp"
2
3 #include "utils.hpp"
4
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <sys/ioctl.h>
9 #include <unistd.h>
10
11 #include <nlohmann/json.hpp>
12 #include <org/open_power/OCC/Device/error.hpp>
13 #include <phosphor-logging/elog-errors.hpp>
14 #include <phosphor-logging/elog.hpp>
15 #include <phosphor-logging/lg2.hpp>
16 #include <phosphor-logging/log.hpp>
17 #include <xyz/openbmc_project/Common/error.hpp>
18 #include <xyz/openbmc_project/Logging/Create/server.hpp>
19
20 namespace open_power
21 {
22 namespace occ
23 {
24
25 static constexpr size_t max_ffdc_size = 8192;
26 static constexpr size_t sbe_status_header_size = 8;
27
28 static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging";
29 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL";
30
31 using namespace phosphor::logging;
32 using namespace sdbusplus::org::open_power::OCC::Device::Error;
33 using InternalFailure =
34 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure;
35
createPEL(const char * path,uint32_t src6,const char * msg,int fd)36 uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg,
37 int fd)
38 {
39 uint32_t plid = 0;
40 std::vector<std::tuple<
41 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
42 uint8_t, uint8_t, sdbusplus::message::unix_fd>>
43 pelFFDCInfo;
44
45 lg2::info("Creating PEL for OCC{INST} with SBE FFDC: {PATH} - SRC6: {SRC}",
46 "INST", src6 >> 16, "PATH", path, "SRC", lg2::hex, src6);
47
48 if (fd > 0)
49 {
50 pelFFDCInfo.push_back(std::make_tuple(
51 sdbusplus::xyz::openbmc_project::Logging::server::Create::
52 FFDCFormat::Custom,
53 static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd));
54 }
55
56 // Add journal traces to PEL FFDC
57 auto occJournalFile =
58 addJournalEntries(pelFFDCInfo, "openpower-occ-control", 25);
59
60 std::map<std::string, std::string> additionalData;
61 additionalData.emplace("SRC6", std::to_string(src6));
62 additionalData.emplace("_PID", std::to_string(getpid()));
63 additionalData.emplace("SBE_ERR_MSG", msg);
64
65 auto& bus = utils::getBus();
66
67 try
68 {
69 std::string service =
70 utils::getService(loggingObjectPath, opLoggingInterface);
71 auto method =
72 bus.new_method_call(service.c_str(), loggingObjectPath,
73 opLoggingInterface, "CreatePELWithFFDCFiles");
74
75 // Set level to Notice (Informational). Error should trigger an OCC
76 // reset and if it does not recover, HTMGT/HBRT will create an
77 // unrecoverable error.
78 auto level =
79 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
80 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
81 Notice);
82
83 method.append(path, level, additionalData, pelFFDCInfo);
84 auto response = bus.call(method);
85 std::tuple<uint32_t, uint32_t> reply = {0, 0};
86
87 response.read(reply);
88 plid = std::get<1>(reply);
89 }
90 catch (const sdbusplus::exception_t& e)
91 {
92 lg2::error("Failed to create PEL: {ERR}", "ERR", e.what());
93 }
94
95 return plid;
96 }
97
createOCCResetPEL(unsigned int instance,const char * path,int err,const char * callout)98 void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err,
99 const char* callout)
100 {
101 std::map<std::string, std::string> additionalData;
102
103 additionalData.emplace("_PID", std::to_string(getpid()));
104
105 if (err)
106 {
107 additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err));
108 }
109
110 if (callout)
111 {
112 additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout));
113 }
114
115 additionalData.emplace("OCC", std::to_string(instance));
116
117 lg2::info("Creating OCC Reset PEL for OCC{INST}: {PATH}", "INST", instance,
118 "PATH", path);
119
120 auto& bus = utils::getBus();
121
122 try
123 {
124 FFDCFiles ffdc;
125 // Add journal traces to PEL FFDC
126 auto occJournalFile =
127 addJournalEntries(ffdc, "openpower-occ-control", 25);
128
129 std::string service =
130 utils::getService(loggingObjectPath, opLoggingInterface);
131 auto method =
132 bus.new_method_call(service.c_str(), loggingObjectPath,
133 opLoggingInterface, "CreatePELWithFFDCFiles");
134
135 // Set level to Notice (Informational). Error should trigger an OCC
136 // reset and if it does not recover, HTMGT/HBRT will create an
137 // unrecoverable error.
138 auto level =
139 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage(
140 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level::
141 Notice);
142
143 method.append(path, level, additionalData, ffdc);
144 bus.call(method);
145 }
146 catch (const sdbusplus::exception_t& e)
147 {
148 lg2::error("Failed to create OCC Reset PEL: {ERR}", "ERR", e.what());
149 }
150 }
151
152 // Reads the SBE FFDC file and create an error log
analyzeEvent()153 void FFDC::analyzeEvent()
154 {
155 int tfd = -1;
156 size_t total = 0;
157 auto data = std::make_unique<unsigned char[]>(max_ffdc_size);
158 while (total < max_ffdc_size)
159 {
160 auto r = read(fd, data.get() + total, max_ffdc_size - total);
161 if (r < 0)
162 {
163 elog<ReadFailure>(
164 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
165 CALLOUT_ERRNO(errno),
166 phosphor::logging::org::open_power::OCC::Device::ReadFailure::
167 CALLOUT_DEVICE_PATH(file.c_str()));
168 return;
169 }
170 if (!r)
171 {
172 break;
173 }
174 total += r;
175 }
176
177 lseek(fd, 0, SEEK_SET);
178
179 if (!total)
180 {
181 // no error
182 return;
183 }
184
185 uint32_t src6 = instance << 16;
186 src6 |= *(data.get() + 2) << 8;
187 src6 |= *(data.get() + 3);
188
189 if (total > sbe_status_header_size)
190 {
191 std::string templateString =
192 fs::temp_directory_path() / "OCC_FFDC_XXXXXX";
193 tfd = mkostemp(templateString.data(), O_RDWR);
194 if (tfd < 0)
195 {
196 lg2::error("Couldn't create temporary FFDC file");
197 }
198 else
199 {
200 temporaryFiles.emplace_back(templateString, tfd);
201 size_t written = sbe_status_header_size;
202 while (written < total)
203 {
204 auto r = write(tfd, data.get() + written, total - written);
205 if (r < 0)
206 {
207 close(temporaryFiles.back().second);
208 fs::remove(temporaryFiles.back().first);
209 temporaryFiles.pop_back();
210 tfd = -1;
211 lg2::error("Couldn't write temporary FFDC file");
212 break;
213 }
214 if (!r)
215 {
216 break;
217 }
218 written += r;
219 }
220 }
221 }
222
223 createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6,
224 "SBE command reported error", tfd);
225 }
226
227 // Create file with the latest journal entries for specified executable
addJournalEntries(FFDCFiles & fileList,const std::string & executable,unsigned int lines)228 std::unique_ptr<FFDCFile> FFDC::addJournalEntries(
229 FFDCFiles& fileList, const std::string& executable, unsigned int lines)
230 {
231 auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable));
232 if (journalFile && journalFile->fd() != -1)
233 {
234 lg2::debug(
235 "addJournalEntries: Added up to {NUM} journal entries for {APP}",
236 "NUM", lines, "APP", executable);
237 fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd());
238 }
239 else
240 {
241 lg2::error("addJournalEntries: Failed to add journal entries for {APP}",
242 "APP", executable);
243 }
244 return journalFile;
245 }
246
247 // Write JSON data into FFDC file and return the file
makeJsonFFDCFile(const nlohmann::json & ffdcData)248 std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData)
249 {
250 std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX";
251 auto fd = mkostemp(tmpFile.data(), O_RDWR);
252 if (fd != -1)
253 {
254 auto jsonString = ffdcData.dump();
255 auto rc = write(fd, jsonString.data(), jsonString.size());
256 close(fd);
257 if (rc != -1)
258 {
259 fs::path jsonFile{tmpFile};
260 return std::make_unique<FFDCFile>(jsonFile);
261 }
262 else
263 {
264 auto e = errno;
265 lg2::error(
266 "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={ERR}",
267 "ERR", e);
268 }
269 }
270 else
271 {
272 auto e = errno;
273 lg2::error("makeJsonFFDCFile: Failed called to mkostemp, errno={ERR}",
274 "ERR", e);
275 }
276 return nullptr;
277 }
278
279 // Collect the latest journal entries for a specified executable
getJournalEntries(int numLines,std::string executable)280 nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable)
281 {
282 // Sleep 100ms; otherwise recent journal entries sometimes not available
283 using namespace std::chrono_literals;
284 std::this_thread::sleep_for(100ms);
285
286 std::vector<std::string> entries;
287
288 // Open the journal
289 sd_journal* journal;
290 int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY);
291 if (rc < 0)
292 {
293 // Build one line string containing field values
294 entries.push_back("[Internal error: sd_journal_open(), rc=" +
295 std::string(strerror(rc)) + "]");
296 return nlohmann::json(entries);
297 }
298
299 // Create object to automatically close journal
300 JournalCloser closer{journal};
301
302 // Add match so we only loop over entries with specified field value
303 std::string field{"SYSLOG_IDENTIFIER"};
304 std::string match{field + '=' + executable};
305 rc = sd_journal_add_match(journal, match.c_str(), 0);
306 if (rc < 0)
307 {
308 // Build one line string containing field values
309 entries.push_back("[Internal error: sd_journal_add_match(), rc=" +
310 std::string(strerror(rc)) + "]");
311 }
312 else
313 {
314 int count{1};
315 entries.reserve(numLines);
316 std::string syslogID, pid, message, timeStamp;
317
318 // Loop through journal entries from newest to oldest
319 SD_JOURNAL_FOREACH_BACKWARDS(journal)
320 {
321 // Get relevant journal entry fields
322 timeStamp = getTimeStamp(journal);
323 syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER");
324 pid = getFieldValue(journal, "_PID");
325 message = getFieldValue(journal, "MESSAGE");
326
327 // Build one line string containing field values
328 entries.push_back(
329 timeStamp + " " + syslogID + "[" + pid + "]: " + message);
330
331 // Stop after number of lines was read
332 if (count++ >= numLines)
333 {
334 break;
335 }
336 }
337 }
338
339 // put the journal entries in chronological order
340 std::reverse(entries.begin(), entries.end());
341
342 return nlohmann::json(entries);
343 }
344
getTimeStamp(sd_journal * journal)345 std::string FFDC::getTimeStamp(sd_journal* journal)
346 {
347 // Get realtime (wallclock) timestamp of current journal entry. The
348 // timestamp is in microseconds since the epoch.
349 uint64_t usec{0};
350 int rc = sd_journal_get_realtime_usec(journal, &usec);
351 if (rc < 0)
352 {
353 return "[Internal error: sd_journal_get_realtime_usec(), rc=" +
354 std::string(strerror(rc)) + "]";
355 }
356
357 // Convert to number of seconds since the epoch
358 time_t secs = usec / 1000000;
359
360 // Convert seconds to tm struct required by strftime()
361 struct tm* timeStruct = localtime(&secs);
362 if (timeStruct == nullptr)
363 {
364 return "[Internal error: localtime() returned nullptr]";
365 }
366
367 // Convert tm struct into a date/time string
368 char timeStamp[80];
369 strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct);
370
371 return timeStamp;
372 }
373
getFieldValue(sd_journal * journal,const std::string & field)374 std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field)
375 {
376 std::string value{};
377
378 // Get field data from current journal entry
379 const void* data{nullptr};
380 size_t length{0};
381 int rc = sd_journal_get_data(journal, field.c_str(), &data, &length);
382 if (rc < 0)
383 {
384 if (-rc == ENOENT)
385 {
386 // Current entry does not include this field; return empty value
387 return value;
388 }
389 else
390 {
391 return "[Internal error: sd_journal_get_data() rc=" +
392 std::string(strerror(rc)) + "]";
393 }
394 }
395
396 // Get value from field data. Field data in format "FIELD=value".
397 std::string dataString{static_cast<const char*>(data), length};
398 std::string::size_type pos = dataString.find('=');
399 if ((pos != std::string::npos) && ((pos + 1) < dataString.size()))
400 {
401 // Value is substring after the '='
402 value = dataString.substr(pos + 1);
403 }
404
405 return value;
406 }
407
408 // Create temporary file that will automatically get removed when destructed
FFDCFile(const fs::path & name)409 FFDCFile::FFDCFile(const fs::path& name) :
410 _fd(open(name.c_str(), O_RDONLY)), _name(name)
411 {
412 if (_fd() == -1)
413 {
414 auto e = errno;
415 lg2::error("FFDCFile: Could not open FFDC file {FILE}. errno {ERR}",
416 "FILE", _name.string(), "ERR", e);
417 }
418 }
419
420 } // namespace occ
421 } // namespace open_power
422