1 #include "occ_ffdc.hpp" 2 3 #include "utils.hpp" 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <sys/ioctl.h> 9 #include <unistd.h> 10 11 #include <nlohmann/json.hpp> 12 #include <org/open_power/OCC/Device/error.hpp> 13 #include <phosphor-logging/elog-errors.hpp> 14 #include <phosphor-logging/elog.hpp> 15 #include <phosphor-logging/lg2.hpp> 16 #include <phosphor-logging/log.hpp> 17 #include <xyz/openbmc_project/Common/error.hpp> 18 #include <xyz/openbmc_project/Logging/Create/server.hpp> 19 20 namespace open_power 21 { 22 namespace occ 23 { 24 25 static constexpr size_t max_ffdc_size = 8192; 26 static constexpr size_t sbe_status_header_size = 8; 27 28 static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging"; 29 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL"; 30 31 using namespace phosphor::logging; 32 using namespace sdbusplus::org::open_power::OCC::Device::Error; 33 using InternalFailure = 34 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 35 36 uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg, 37 int fd) 38 { 39 uint32_t plid = 0; 40 std::vector<std::tuple< 41 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat, 42 uint8_t, uint8_t, sdbusplus::message::unix_fd>> 43 pelFFDCInfo; 44 45 lg2::info("Creating PEL for OCC{INST} with SBE FFDC: {PATH} - SRC6: {SRC}", 46 "INST", src6 >> 16, "PATH", path, "SRC", lg2::hex, src6); 47 48 if (fd > 0) 49 { 50 pelFFDCInfo.push_back(std::make_tuple( 51 sdbusplus::xyz::openbmc_project::Logging::server::Create:: 52 FFDCFormat::Custom, 53 static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd)); 54 } 55 56 // Add journal traces to PEL FFDC 57 auto occJournalFile = 58 addJournalEntries(pelFFDCInfo, "openpower-occ-control", 25); 59 60 std::map<std::string, std::string> additionalData; 61 additionalData.emplace("SRC6", std::to_string(src6)); 62 additionalData.emplace("_PID", std::to_string(getpid())); 63 additionalData.emplace("SBE_ERR_MSG", msg); 64 65 auto& bus = utils::getBus(); 66 67 try 68 { 69 std::string service = 70 utils::getService(loggingObjectPath, opLoggingInterface); 71 auto method = 72 bus.new_method_call(service.c_str(), loggingObjectPath, 73 opLoggingInterface, "CreatePELWithFFDCFiles"); 74 75 // Set level to Notice (Informational). Error should trigger an OCC 76 // reset and if it does not recover, HTMGT/HBRT will create an 77 // unrecoverable error. 78 auto level = 79 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( 80 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: 81 Notice); 82 83 method.append(path, level, additionalData, pelFFDCInfo); 84 auto response = bus.call(method); 85 std::tuple<uint32_t, uint32_t> reply = {0, 0}; 86 87 response.read(reply); 88 plid = std::get<1>(reply); 89 } 90 catch (const sdbusplus::exception_t& e) 91 { 92 lg2::error("Failed to create PEL: {ERR}", "ERR", e.what()); 93 } 94 95 return plid; 96 } 97 98 void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err, 99 const char* callout) 100 { 101 std::map<std::string, std::string> additionalData; 102 103 additionalData.emplace("_PID", std::to_string(getpid())); 104 105 if (err) 106 { 107 additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err)); 108 } 109 110 if (callout) 111 { 112 additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout)); 113 } 114 115 additionalData.emplace("OCC", std::to_string(instance)); 116 117 lg2::info("Creating OCC Reset PEL for OCC{INST}: {PATH}", "INST", instance, 118 "PATH", path); 119 120 auto& bus = utils::getBus(); 121 122 try 123 { 124 FFDCFiles ffdc; 125 // Add journal traces to PEL FFDC 126 auto occJournalFile = 127 addJournalEntries(ffdc, "openpower-occ-control", 25); 128 129 std::string service = 130 utils::getService(loggingObjectPath, opLoggingInterface); 131 auto method = 132 bus.new_method_call(service.c_str(), loggingObjectPath, 133 opLoggingInterface, "CreatePELWithFFDCFiles"); 134 135 // Set level to Notice (Informational). Error should trigger an OCC 136 // reset and if it does not recover, HTMGT/HBRT will create an 137 // unrecoverable error. 138 auto level = 139 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( 140 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: 141 Notice); 142 143 method.append(path, level, additionalData, ffdc); 144 bus.call(method); 145 } 146 catch (const sdbusplus::exception_t& e) 147 { 148 lg2::error("Failed to create OCC Reset PEL: {ERR}", "ERR", e.what()); 149 } 150 } 151 152 // Reads the SBE FFDC file and create an error log 153 void FFDC::analyzeEvent() 154 { 155 int tfd = -1; 156 size_t total = 0; 157 auto data = std::make_unique<unsigned char[]>(max_ffdc_size); 158 while (total < max_ffdc_size) 159 { 160 auto r = read(fd, data.get() + total, max_ffdc_size - total); 161 if (r < 0) 162 { 163 elog<ReadFailure>( 164 phosphor::logging::org::open_power::OCC::Device::ReadFailure:: 165 CALLOUT_ERRNO(errno), 166 phosphor::logging::org::open_power::OCC::Device::ReadFailure:: 167 CALLOUT_DEVICE_PATH(file.c_str())); 168 return; 169 } 170 if (!r) 171 { 172 break; 173 } 174 total += r; 175 } 176 177 lseek(fd, 0, SEEK_SET); 178 179 if (!total) 180 { 181 // no error 182 return; 183 } 184 185 uint32_t src6 = instance << 16; 186 src6 |= *(data.get() + 2) << 8; 187 src6 |= *(data.get() + 3); 188 189 if (total > sbe_status_header_size) 190 { 191 std::string templateString = 192 fs::temp_directory_path() / "OCC_FFDC_XXXXXX"; 193 tfd = mkostemp(templateString.data(), O_RDWR); 194 if (tfd < 0) 195 { 196 lg2::error("Couldn't create temporary FFDC file"); 197 } 198 else 199 { 200 temporaryFiles.emplace_back(templateString, tfd); 201 size_t written = sbe_status_header_size; 202 while (written < total) 203 { 204 auto r = write(tfd, data.get() + written, total - written); 205 if (r < 0) 206 { 207 close(temporaryFiles.back().second); 208 fs::remove(temporaryFiles.back().first); 209 temporaryFiles.pop_back(); 210 tfd = -1; 211 lg2::error("Couldn't write temporary FFDC file"); 212 break; 213 } 214 if (!r) 215 { 216 break; 217 } 218 written += r; 219 } 220 } 221 } 222 223 createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6, 224 "SBE command reported error", tfd); 225 } 226 227 // Create file with the latest journal entries for specified executable 228 std::unique_ptr<FFDCFile> FFDC::addJournalEntries( 229 FFDCFiles& fileList, const std::string& executable, unsigned int lines) 230 { 231 auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable)); 232 if (journalFile && journalFile->fd() != -1) 233 { 234 lg2::debug( 235 "addJournalEntries: Added up to {NUM} journal entries for {APP}", 236 "NUM", lines, "APP", executable); 237 fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd()); 238 } 239 else 240 { 241 lg2::error("addJournalEntries: Failed to add journal entries for {APP}", 242 "APP", executable); 243 } 244 return journalFile; 245 } 246 247 // Write JSON data into FFDC file and return the file 248 std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData) 249 { 250 std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX"; 251 auto fd = mkostemp(tmpFile.data(), O_RDWR); 252 if (fd != -1) 253 { 254 auto jsonString = ffdcData.dump(); 255 auto rc = write(fd, jsonString.data(), jsonString.size()); 256 close(fd); 257 if (rc != -1) 258 { 259 fs::path jsonFile{tmpFile}; 260 return std::make_unique<FFDCFile>(jsonFile); 261 } 262 else 263 { 264 auto e = errno; 265 lg2::error( 266 "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={ERR}", 267 "ERR", e); 268 } 269 } 270 else 271 { 272 auto e = errno; 273 lg2::error("makeJsonFFDCFile: Failed called to mkostemp, errno={ERR}", 274 "ERR", e); 275 } 276 return nullptr; 277 } 278 279 // Collect the latest journal entries for a specified executable 280 nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable) 281 { 282 // Sleep 100ms; otherwise recent journal entries sometimes not available 283 using namespace std::chrono_literals; 284 std::this_thread::sleep_for(100ms); 285 286 std::vector<std::string> entries; 287 288 // Open the journal 289 sd_journal* journal; 290 int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY); 291 if (rc < 0) 292 { 293 // Build one line string containing field values 294 entries.push_back("[Internal error: sd_journal_open(), rc=" + 295 std::string(strerror(rc)) + "]"); 296 return nlohmann::json(entries); 297 } 298 299 // Create object to automatically close journal 300 JournalCloser closer{journal}; 301 302 // Add match so we only loop over entries with specified field value 303 std::string field{"SYSLOG_IDENTIFIER"}; 304 std::string match{field + '=' + executable}; 305 rc = sd_journal_add_match(journal, match.c_str(), 0); 306 if (rc < 0) 307 { 308 // Build one line string containing field values 309 entries.push_back("[Internal error: sd_journal_add_match(), rc=" + 310 std::string(strerror(rc)) + "]"); 311 } 312 else 313 { 314 int count{1}; 315 entries.reserve(numLines); 316 std::string syslogID, pid, message, timeStamp; 317 318 // Loop through journal entries from newest to oldest 319 SD_JOURNAL_FOREACH_BACKWARDS(journal) 320 { 321 // Get relevant journal entry fields 322 timeStamp = getTimeStamp(journal); 323 syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER"); 324 pid = getFieldValue(journal, "_PID"); 325 message = getFieldValue(journal, "MESSAGE"); 326 327 // Build one line string containing field values 328 entries.push_back( 329 timeStamp + " " + syslogID + "[" + pid + "]: " + message); 330 331 // Stop after number of lines was read 332 if (count++ >= numLines) 333 { 334 break; 335 } 336 } 337 } 338 339 // put the journal entries in chronological order 340 std::reverse(entries.begin(), entries.end()); 341 342 return nlohmann::json(entries); 343 } 344 345 std::string FFDC::getTimeStamp(sd_journal* journal) 346 { 347 // Get realtime (wallclock) timestamp of current journal entry. The 348 // timestamp is in microseconds since the epoch. 349 uint64_t usec{0}; 350 int rc = sd_journal_get_realtime_usec(journal, &usec); 351 if (rc < 0) 352 { 353 return "[Internal error: sd_journal_get_realtime_usec(), rc=" + 354 std::string(strerror(rc)) + "]"; 355 } 356 357 // Convert to number of seconds since the epoch 358 time_t secs = usec / 1000000; 359 360 // Convert seconds to tm struct required by strftime() 361 struct tm* timeStruct = localtime(&secs); 362 if (timeStruct == nullptr) 363 { 364 return "[Internal error: localtime() returned nullptr]"; 365 } 366 367 // Convert tm struct into a date/time string 368 char timeStamp[80]; 369 strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct); 370 371 return timeStamp; 372 } 373 374 std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field) 375 { 376 std::string value{}; 377 378 // Get field data from current journal entry 379 const void* data{nullptr}; 380 size_t length{0}; 381 int rc = sd_journal_get_data(journal, field.c_str(), &data, &length); 382 if (rc < 0) 383 { 384 if (-rc == ENOENT) 385 { 386 // Current entry does not include this field; return empty value 387 return value; 388 } 389 else 390 { 391 return "[Internal error: sd_journal_get_data() rc=" + 392 std::string(strerror(rc)) + "]"; 393 } 394 } 395 396 // Get value from field data. Field data in format "FIELD=value". 397 std::string dataString{static_cast<const char*>(data), length}; 398 std::string::size_type pos = dataString.find('='); 399 if ((pos != std::string::npos) && ((pos + 1) < dataString.size())) 400 { 401 // Value is substring after the '=' 402 value = dataString.substr(pos + 1); 403 } 404 405 return value; 406 } 407 408 // Create temporary file that will automatically get removed when destructed 409 FFDCFile::FFDCFile(const fs::path& name) : 410 _fd(open(name.c_str(), O_RDONLY)), _name(name) 411 { 412 if (_fd() == -1) 413 { 414 auto e = errno; 415 lg2::error("FFDCFile: Could not open FFDC file {FILE}. errno {ERR}", 416 "FILE", _name.string(), "ERR", e); 417 } 418 } 419 420 } // namespace occ 421 } // namespace open_power 422