1 #include "occ_ffdc.hpp" 2 3 #include "utils.hpp" 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <stdio.h> 8 #include <sys/ioctl.h> 9 #include <unistd.h> 10 11 #include <nlohmann/json.hpp> 12 #include <org/open_power/OCC/Device/error.hpp> 13 #include <phosphor-logging/elog-errors.hpp> 14 #include <phosphor-logging/elog.hpp> 15 #include <phosphor-logging/log.hpp> 16 #include <xyz/openbmc_project/Common/error.hpp> 17 #include <xyz/openbmc_project/Logging/Create/server.hpp> 18 19 #include <format> 20 21 namespace open_power 22 { 23 namespace occ 24 { 25 26 static constexpr size_t max_ffdc_size = 8192; 27 static constexpr size_t sbe_status_header_size = 8; 28 29 static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging"; 30 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL"; 31 32 using namespace phosphor::logging; 33 using namespace sdbusplus::org::open_power::OCC::Device::Error; 34 using InternalFailure = 35 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 36 37 uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg, 38 int fd) 39 { 40 uint32_t plid = 0; 41 std::vector<std::tuple< 42 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat, 43 uint8_t, uint8_t, sdbusplus::message::unix_fd>> 44 pelFFDCInfo; 45 46 log<level::INFO>( 47 std::format("Creating PEL for OCC{} with SBE FFDC: {} - SRC6: 0x{:08X}", 48 src6 >> 16, path, src6) 49 .c_str()); 50 51 if (fd > 0) 52 { 53 pelFFDCInfo.push_back(std::make_tuple( 54 sdbusplus::xyz::openbmc_project::Logging::server::Create:: 55 FFDCFormat::Custom, 56 static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd)); 57 } 58 59 // Add journal traces to PEL FFDC 60 auto occJournalFile = addJournalEntries(pelFFDCInfo, 61 "openpower-occ-control", 25); 62 63 std::map<std::string, std::string> additionalData; 64 additionalData.emplace("SRC6", std::to_string(src6)); 65 additionalData.emplace("_PID", std::to_string(getpid())); 66 additionalData.emplace("SBE_ERR_MSG", msg); 67 68 auto& bus = utils::getBus(); 69 70 try 71 { 72 std::string service = utils::getService(loggingObjectPath, 73 opLoggingInterface); 74 auto method = bus.new_method_call(service.c_str(), loggingObjectPath, 75 opLoggingInterface, 76 "CreatePELWithFFDCFiles"); 77 78 // Set level to Notice (Informational). Error should trigger an OCC 79 // reset and if it does not recover, HTMGT/HBRT will create an 80 // unrecoverable error. 81 auto level = 82 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( 83 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: 84 Notice); 85 86 method.append(path, level, additionalData, pelFFDCInfo); 87 auto response = bus.call(method); 88 std::tuple<uint32_t, uint32_t> reply = {0, 0}; 89 90 response.read(reply); 91 plid = std::get<1>(reply); 92 } 93 catch (const sdbusplus::exception_t& e) 94 { 95 log<level::ERR>( 96 std::format("Failed to create PEL: {}", e.what()).c_str()); 97 } 98 99 return plid; 100 } 101 102 void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err, 103 const char* callout) 104 { 105 std::map<std::string, std::string> additionalData; 106 107 additionalData.emplace("_PID", std::to_string(getpid())); 108 109 if (err) 110 { 111 additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err)); 112 } 113 114 if (callout) 115 { 116 additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout)); 117 } 118 119 additionalData.emplace("OCC", std::to_string(instance)); 120 121 log<level::INFO>( 122 std::format("Creating OCC Reset PEL for OCC{}: {}", instance, path) 123 .c_str()); 124 125 auto& bus = utils::getBus(); 126 127 try 128 { 129 FFDCFiles ffdc; 130 // Add journal traces to PEL FFDC 131 auto occJournalFile = addJournalEntries(ffdc, "openpower-occ-control", 132 25); 133 134 std::string service = utils::getService(loggingObjectPath, 135 opLoggingInterface); 136 auto method = bus.new_method_call(service.c_str(), loggingObjectPath, 137 opLoggingInterface, 138 "CreatePELWithFFDCFiles"); 139 140 // Set level to Notice (Informational). Error should trigger an OCC 141 // reset and if it does not recover, HTMGT/HBRT will create an 142 // unrecoverable error. 143 auto level = 144 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( 145 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: 146 Notice); 147 148 method.append(path, level, additionalData, ffdc); 149 bus.call(method); 150 } 151 catch (const sdbusplus::exception_t& e) 152 { 153 log<level::ERR>( 154 std::format("Failed to create OCC Reset PEL: {}", e.what()) 155 .c_str()); 156 } 157 } 158 159 // Reads the SBE FFDC file and create an error log 160 void FFDC::analyzeEvent() 161 { 162 int tfd = -1; 163 size_t total = 0; 164 auto data = std::make_unique<unsigned char[]>(max_ffdc_size); 165 while (total < max_ffdc_size) 166 { 167 auto r = read(fd, data.get() + total, max_ffdc_size - total); 168 if (r < 0) 169 { 170 elog<ReadFailure>( 171 phosphor::logging::org::open_power::OCC::Device::ReadFailure:: 172 CALLOUT_ERRNO(errno), 173 phosphor::logging::org::open_power::OCC::Device::ReadFailure:: 174 CALLOUT_DEVICE_PATH(file.c_str())); 175 return; 176 } 177 if (!r) 178 { 179 break; 180 } 181 total += r; 182 } 183 184 lseek(fd, 0, SEEK_SET); 185 186 if (!total) 187 { 188 // no error 189 return; 190 } 191 192 uint32_t src6 = instance << 16; 193 src6 |= *(data.get() + 2) << 8; 194 src6 |= *(data.get() + 3); 195 196 if (total > sbe_status_header_size) 197 { 198 std::string templateString = fs::temp_directory_path() / 199 "OCC_FFDC_XXXXXX"; 200 tfd = mkostemp(templateString.data(), O_RDWR); 201 if (tfd < 0) 202 { 203 log<level::ERR>("Couldn't create temporary FFDC file"); 204 } 205 else 206 { 207 temporaryFiles.emplace_back(templateString, tfd); 208 size_t written = sbe_status_header_size; 209 while (written < total) 210 { 211 auto r = write(tfd, data.get() + written, total - written); 212 if (r < 0) 213 { 214 close(temporaryFiles.back().second); 215 fs::remove(temporaryFiles.back().first); 216 temporaryFiles.pop_back(); 217 tfd = -1; 218 log<level::ERR>("Couldn't write temporary FFDC file"); 219 break; 220 } 221 if (!r) 222 { 223 break; 224 } 225 written += r; 226 } 227 } 228 } 229 230 createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6, 231 "SBE command reported error", tfd); 232 } 233 234 // Create file with the latest journal entries for specified executable 235 std::unique_ptr<FFDCFile> FFDC::addJournalEntries(FFDCFiles& fileList, 236 const std::string& executable, 237 unsigned int lines) 238 { 239 auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable)); 240 if (journalFile && journalFile->fd() != -1) 241 { 242 log<level::DEBUG>( 243 std::format( 244 "addJournalEntries: Added up to {} journal entries for {}", 245 lines, executable) 246 .c_str()); 247 fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd()); 248 } 249 else 250 { 251 log<level::ERR>( 252 std::format( 253 "addJournalEntries: Failed to add journal entries for {}", 254 executable) 255 .c_str()); 256 } 257 return journalFile; 258 } 259 260 // Write JSON data into FFDC file and return the file 261 std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData) 262 { 263 std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX"; 264 auto fd = mkostemp(tmpFile.data(), O_RDWR); 265 if (fd != -1) 266 { 267 auto jsonString = ffdcData.dump(); 268 auto rc = write(fd, jsonString.data(), jsonString.size()); 269 close(fd); 270 if (rc != -1) 271 { 272 fs::path jsonFile{tmpFile}; 273 return std::make_unique<FFDCFile>(jsonFile); 274 } 275 else 276 { 277 auto e = errno; 278 log<level::ERR>( 279 std::format( 280 "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={}", 281 e) 282 .c_str()); 283 } 284 } 285 else 286 { 287 auto e = errno; 288 log<level::ERR>( 289 std::format("makeJsonFFDCFile: Failed called to mkostemp, errno={}", 290 e) 291 .c_str()); 292 } 293 return nullptr; 294 } 295 296 // Collect the latest journal entries for a specified executable 297 nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable) 298 { 299 // Sleep 100ms; otherwise recent journal entries sometimes not available 300 using namespace std::chrono_literals; 301 std::this_thread::sleep_for(100ms); 302 303 std::vector<std::string> entries; 304 305 // Open the journal 306 sd_journal* journal; 307 int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY); 308 if (rc < 0) 309 { 310 // Build one line string containing field values 311 entries.push_back("[Internal error: sd_journal_open(), rc=" + 312 std::string(strerror(rc)) + "]"); 313 return nlohmann::json(entries); 314 } 315 316 // Create object to automatically close journal 317 JournalCloser closer{journal}; 318 319 // Add match so we only loop over entries with specified field value 320 std::string field{"SYSLOG_IDENTIFIER"}; 321 std::string match{field + '=' + executable}; 322 rc = sd_journal_add_match(journal, match.c_str(), 0); 323 if (rc < 0) 324 { 325 // Build one line string containing field values 326 entries.push_back("[Internal error: sd_journal_add_match(), rc=" + 327 std::string(strerror(rc)) + "]"); 328 } 329 else 330 { 331 int count{1}; 332 entries.reserve(numLines); 333 std::string syslogID, pid, message, timeStamp; 334 335 // Loop through journal entries from newest to oldest 336 SD_JOURNAL_FOREACH_BACKWARDS(journal) 337 { 338 // Get relevant journal entry fields 339 timeStamp = getTimeStamp(journal); 340 syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER"); 341 pid = getFieldValue(journal, "_PID"); 342 message = getFieldValue(journal, "MESSAGE"); 343 344 // Build one line string containing field values 345 entries.push_back(timeStamp + " " + syslogID + "[" + pid + 346 "]: " + message); 347 348 // Stop after number of lines was read 349 if (count++ >= numLines) 350 { 351 break; 352 } 353 } 354 } 355 356 // put the journal entries in chronological order 357 std::reverse(entries.begin(), entries.end()); 358 359 return nlohmann::json(entries); 360 } 361 362 std::string FFDC::getTimeStamp(sd_journal* journal) 363 { 364 // Get realtime (wallclock) timestamp of current journal entry. The 365 // timestamp is in microseconds since the epoch. 366 uint64_t usec{0}; 367 int rc = sd_journal_get_realtime_usec(journal, &usec); 368 if (rc < 0) 369 { 370 return "[Internal error: sd_journal_get_realtime_usec(), rc=" + 371 std::string(strerror(rc)) + "]"; 372 } 373 374 // Convert to number of seconds since the epoch 375 time_t secs = usec / 1000000; 376 377 // Convert seconds to tm struct required by strftime() 378 struct tm* timeStruct = localtime(&secs); 379 if (timeStruct == nullptr) 380 { 381 return "[Internal error: localtime() returned nullptr]"; 382 } 383 384 // Convert tm struct into a date/time string 385 char timeStamp[80]; 386 strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct); 387 388 return timeStamp; 389 } 390 391 std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field) 392 { 393 std::string value{}; 394 395 // Get field data from current journal entry 396 const void* data{nullptr}; 397 size_t length{0}; 398 int rc = sd_journal_get_data(journal, field.c_str(), &data, &length); 399 if (rc < 0) 400 { 401 if (-rc == ENOENT) 402 { 403 // Current entry does not include this field; return empty value 404 return value; 405 } 406 else 407 { 408 return "[Internal error: sd_journal_get_data() rc=" + 409 std::string(strerror(rc)) + "]"; 410 } 411 } 412 413 // Get value from field data. Field data in format "FIELD=value". 414 std::string dataString{static_cast<const char*>(data), length}; 415 std::string::size_type pos = dataString.find('='); 416 if ((pos != std::string::npos) && ((pos + 1) < dataString.size())) 417 { 418 // Value is substring after the '=' 419 value = dataString.substr(pos + 1); 420 } 421 422 return value; 423 } 424 425 // Create temporary file that will automatically get removed when destructed 426 FFDCFile::FFDCFile(const fs::path& name) : 427 _fd(open(name.c_str(), O_RDONLY)), _name(name) 428 { 429 if (_fd() == -1) 430 { 431 auto e = errno; 432 log<level::ERR>( 433 std::format("FFDCFile: Could not open FFDC file {}. errno {}", 434 _name.string(), e) 435 .c_str()); 436 } 437 } 438 439 } // namespace occ 440 } // namespace open_power 441