#include "occ_ffdc.hpp" #include "utils.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include namespace open_power { namespace occ { static constexpr size_t max_ffdc_size = 8192; static constexpr size_t sbe_status_header_size = 8; static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging"; static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL"; using namespace phosphor::logging; using namespace sdbusplus::org::open_power::OCC::Device::Error; using InternalFailure = sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg, int fd) { uint32_t plid = 0; std::vector> pelFFDCInfo; lg2::info("Creating PEL for OCC{INST} with SBE FFDC: {PATH} - SRC6: {SRC}", "INST", src6 >> 16, "PATH", path, "SRC", lg2::hex, src6); if (fd > 0) { pelFFDCInfo.push_back(std::make_tuple( sdbusplus::xyz::openbmc_project::Logging::server::Create:: FFDCFormat::Custom, static_cast(0xCB), static_cast(0x01), fd)); } // Add journal traces to PEL FFDC auto occJournalFile = addJournalEntries(pelFFDCInfo, "openpower-occ-control", 25); std::map additionalData; additionalData.emplace("SRC6", std::to_string(src6)); additionalData.emplace("_PID", std::to_string(getpid())); additionalData.emplace("SBE_ERR_MSG", msg); auto& bus = utils::getBus(); try { std::string service = utils::getService(loggingObjectPath, opLoggingInterface); auto method = bus.new_method_call(service.c_str(), loggingObjectPath, opLoggingInterface, "CreatePELWithFFDCFiles"); // Set level to Notice (Informational). Error should trigger an OCC // reset and if it does not recover, HTMGT/HBRT will create an // unrecoverable error. auto level = sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: Notice); method.append(path, level, additionalData, pelFFDCInfo); auto response = bus.call(method); std::tuple reply = {0, 0}; response.read(reply); plid = std::get<1>(reply); } catch (const sdbusplus::exception_t& e) { lg2::error("Failed to create PEL: {ERR}", "ERR", e.what()); } return plid; } void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err, const char* callout) { std::map additionalData; additionalData.emplace("_PID", std::to_string(getpid())); if (err) { additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err)); } if (callout) { additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout)); } additionalData.emplace("OCC", std::to_string(instance)); lg2::info("Creating OCC Reset PEL for OCC{INST}: {PATH}", "INST", instance, "PATH", path); auto& bus = utils::getBus(); try { FFDCFiles ffdc; // Add journal traces to PEL FFDC auto occJournalFile = addJournalEntries(ffdc, "openpower-occ-control", 25); std::string service = utils::getService(loggingObjectPath, opLoggingInterface); auto method = bus.new_method_call(service.c_str(), loggingObjectPath, opLoggingInterface, "CreatePELWithFFDCFiles"); // Set level to Notice (Informational). Error should trigger an OCC // reset and if it does not recover, HTMGT/HBRT will create an // unrecoverable error. auto level = sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: Notice); method.append(path, level, additionalData, ffdc); bus.call(method); } catch (const sdbusplus::exception_t& e) { lg2::error("Failed to create OCC Reset PEL: {ERR}", "ERR", e.what()); } } // Reads the SBE FFDC file and create an error log void FFDC::analyzeEvent() { int tfd = -1; size_t total = 0; auto data = std::make_unique(max_ffdc_size); while (total < max_ffdc_size) { auto r = read(fd, data.get() + total, max_ffdc_size - total); if (r < 0) { elog( phosphor::logging::org::open_power::OCC::Device::ReadFailure:: CALLOUT_ERRNO(errno), phosphor::logging::org::open_power::OCC::Device::ReadFailure:: CALLOUT_DEVICE_PATH(file.c_str())); return; } if (!r) { break; } total += r; } lseek(fd, 0, SEEK_SET); if (!total) { // no error return; } uint32_t src6 = instance << 16; src6 |= *(data.get() + 2) << 8; src6 |= *(data.get() + 3); if (total > sbe_status_header_size) { std::string templateString = fs::temp_directory_path() / "OCC_FFDC_XXXXXX"; tfd = mkostemp(templateString.data(), O_RDWR); if (tfd < 0) { lg2::error("Couldn't create temporary FFDC file"); } else { temporaryFiles.emplace_back(templateString, tfd); size_t written = sbe_status_header_size; while (written < total) { auto r = write(tfd, data.get() + written, total - written); if (r < 0) { close(temporaryFiles.back().second); fs::remove(temporaryFiles.back().first); temporaryFiles.pop_back(); tfd = -1; lg2::error("Couldn't write temporary FFDC file"); break; } if (!r) { break; } written += r; } } } createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6, "SBE command reported error", tfd); } // Create file with the latest journal entries for specified executable std::unique_ptr FFDC::addJournalEntries( FFDCFiles& fileList, const std::string& executable, unsigned int lines) { auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable)); if (journalFile && journalFile->fd() != -1) { lg2::debug( "addJournalEntries: Added up to {NUM} journal entries for {APP}", "NUM", lines, "APP", executable); fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd()); } else { lg2::error("addJournalEntries: Failed to add journal entries for {APP}", "APP", executable); } return journalFile; } // Write JSON data into FFDC file and return the file std::unique_ptr FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData) { std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX"; auto fd = mkostemp(tmpFile.data(), O_RDWR); if (fd != -1) { auto jsonString = ffdcData.dump(); auto rc = write(fd, jsonString.data(), jsonString.size()); close(fd); if (rc != -1) { fs::path jsonFile{tmpFile}; return std::make_unique(jsonFile); } else { auto e = errno; lg2::error( "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={ERR}", "ERR", e); } } else { auto e = errno; lg2::error("makeJsonFFDCFile: Failed called to mkostemp, errno={ERR}", "ERR", e); } return nullptr; } // Collect the latest journal entries for a specified executable nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable) { // Sleep 100ms; otherwise recent journal entries sometimes not available using namespace std::chrono_literals; std::this_thread::sleep_for(100ms); std::vector entries; // Open the journal sd_journal* journal; int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY); if (rc < 0) { // Build one line string containing field values entries.push_back("[Internal error: sd_journal_open(), rc=" + std::string(strerror(rc)) + "]"); return nlohmann::json(entries); } // Create object to automatically close journal JournalCloser closer{journal}; // Add match so we only loop over entries with specified field value std::string field{"SYSLOG_IDENTIFIER"}; std::string match{field + '=' + executable}; rc = sd_journal_add_match(journal, match.c_str(), 0); if (rc < 0) { // Build one line string containing field values entries.push_back("[Internal error: sd_journal_add_match(), rc=" + std::string(strerror(rc)) + "]"); } else { int count{1}; entries.reserve(numLines); std::string syslogID, pid, message, timeStamp; // Loop through journal entries from newest to oldest SD_JOURNAL_FOREACH_BACKWARDS(journal) { // Get relevant journal entry fields timeStamp = getTimeStamp(journal); syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER"); pid = getFieldValue(journal, "_PID"); message = getFieldValue(journal, "MESSAGE"); // Build one line string containing field values entries.push_back( timeStamp + " " + syslogID + "[" + pid + "]: " + message); // Stop after number of lines was read if (count++ >= numLines) { break; } } } // put the journal entries in chronological order std::reverse(entries.begin(), entries.end()); return nlohmann::json(entries); } std::string FFDC::getTimeStamp(sd_journal* journal) { // Get realtime (wallclock) timestamp of current journal entry. The // timestamp is in microseconds since the epoch. uint64_t usec{0}; int rc = sd_journal_get_realtime_usec(journal, &usec); if (rc < 0) { return "[Internal error: sd_journal_get_realtime_usec(), rc=" + std::string(strerror(rc)) + "]"; } // Convert to number of seconds since the epoch time_t secs = usec / 1000000; // Convert seconds to tm struct required by strftime() struct tm* timeStruct = localtime(&secs); if (timeStruct == nullptr) { return "[Internal error: localtime() returned nullptr]"; } // Convert tm struct into a date/time string char timeStamp[80]; strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct); return timeStamp; } std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field) { std::string value{}; // Get field data from current journal entry const void* data{nullptr}; size_t length{0}; int rc = sd_journal_get_data(journal, field.c_str(), &data, &length); if (rc < 0) { if (-rc == ENOENT) { // Current entry does not include this field; return empty value return value; } else { return "[Internal error: sd_journal_get_data() rc=" + std::string(strerror(rc)) + "]"; } } // Get value from field data. Field data in format "FIELD=value". std::string dataString{static_cast(data), length}; std::string::size_type pos = dataString.find('='); if ((pos != std::string::npos) && ((pos + 1) < dataString.size())) { // Value is substring after the '=' value = dataString.substr(pos + 1); } return value; } // Create temporary file that will automatically get removed when destructed FFDCFile::FFDCFile(const fs::path& name) : _fd(open(name.c_str(), O_RDONLY)), _name(name) { if (_fd() == -1) { auto e = errno; lg2::error("FFDCFile: Could not open FFDC file {FILE}. errno {ERR}", "FILE", _name.string(), "ERR", e); } } } // namespace occ } // namespace open_power