1 #include "occ_ffdc.hpp" 2 3 #include "utils.hpp" 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <fmt/core.h> 8 #include <stdio.h> 9 #include <sys/ioctl.h> 10 #include <unistd.h> 11 12 #include <nlohmann/json.hpp> 13 #include <org/open_power/OCC/Device/error.hpp> 14 #include <phosphor-logging/elog-errors.hpp> 15 #include <phosphor-logging/elog.hpp> 16 #include <phosphor-logging/log.hpp> 17 #include <xyz/openbmc_project/Common/error.hpp> 18 #include <xyz/openbmc_project/Logging/Create/server.hpp> 19 20 namespace open_power 21 { 22 namespace occ 23 { 24 25 static constexpr size_t max_ffdc_size = 8192; 26 static constexpr size_t sbe_status_header_size = 8; 27 28 static constexpr auto loggingObjectPath = "/xyz/openbmc_project/logging"; 29 static constexpr auto opLoggingInterface = "org.open_power.Logging.PEL"; 30 31 using namespace phosphor::logging; 32 using namespace sdbusplus::org::open_power::OCC::Device::Error; 33 using InternalFailure = 34 sdbusplus::xyz::openbmc_project::Common::Error::InternalFailure; 35 36 uint32_t FFDC::createPEL(const char* path, uint32_t src6, const char* msg, 37 int fd) 38 { 39 uint32_t plid = 0; 40 std::vector<std::tuple< 41 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat, 42 uint8_t, uint8_t, sdbusplus::message::unix_fd>> 43 pelFFDCInfo; 44 45 log<level::INFO>( 46 fmt::format("Creating PEL for OCC{} with SBE FFDC: {} - SRC6: 0x{:08X}", 47 src6 >> 16, path, src6) 48 .c_str()); 49 50 if (fd > 0) 51 { 52 pelFFDCInfo.push_back(std::make_tuple( 53 sdbusplus::xyz::openbmc_project::Logging::server::Create:: 54 FFDCFormat::Custom, 55 static_cast<uint8_t>(0xCB), static_cast<uint8_t>(0x01), fd)); 56 } 57 58 // Add journal traces to PEL FFDC 59 auto occJournalFile = addJournalEntries(pelFFDCInfo, 60 "openpower-occ-control", 25); 61 62 std::map<std::string, std::string> additionalData; 63 additionalData.emplace("SRC6", std::to_string(src6)); 64 additionalData.emplace("_PID", std::to_string(getpid())); 65 additionalData.emplace("SBE_ERR_MSG", msg); 66 67 auto& bus = utils::getBus(); 68 69 try 70 { 71 std::string service = utils::getService(loggingObjectPath, 72 opLoggingInterface); 73 auto method = bus.new_method_call(service.c_str(), loggingObjectPath, 74 opLoggingInterface, 75 "CreatePELWithFFDCFiles"); 76 77 // Set level to Notice (Informational). Error should trigger an OCC 78 // reset and if it does not recover, HTMGT/HBRT will create an 79 // unrecoverable error. 80 auto level = 81 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( 82 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: 83 Notice); 84 85 method.append(path, level, additionalData, pelFFDCInfo); 86 auto response = bus.call(method); 87 std::tuple<uint32_t, uint32_t> reply = {0, 0}; 88 89 response.read(reply); 90 plid = std::get<1>(reply); 91 } 92 catch (const sdbusplus::exception_t& e) 93 { 94 log<level::ERR>( 95 fmt::format("Failed to create PEL: {}", e.what()).c_str()); 96 } 97 98 return plid; 99 } 100 101 void FFDC::createOCCResetPEL(unsigned int instance, const char* path, int err, 102 const char* callout) 103 { 104 std::map<std::string, std::string> additionalData; 105 106 additionalData.emplace("_PID", std::to_string(getpid())); 107 108 if (err) 109 { 110 additionalData.emplace("CALLOUT_ERRNO", std::to_string(-err)); 111 } 112 113 if (callout) 114 { 115 additionalData.emplace("CALLOUT_DEVICE_PATH", std::string(callout)); 116 } 117 118 additionalData.emplace("OCC", std::to_string(instance)); 119 120 log<level::INFO>( 121 fmt::format("Creating OCC Reset PEL for OCC{}: {}", instance, path) 122 .c_str()); 123 124 auto& bus = utils::getBus(); 125 126 try 127 { 128 FFDCFiles ffdc; 129 // Add journal traces to PEL FFDC 130 auto occJournalFile = addJournalEntries(ffdc, "openpower-occ-control", 131 25); 132 133 std::string service = utils::getService(loggingObjectPath, 134 opLoggingInterface); 135 auto method = bus.new_method_call(service.c_str(), loggingObjectPath, 136 opLoggingInterface, 137 "CreatePELWithFFDCFiles"); 138 139 // Set level to Notice (Informational). Error should trigger an OCC 140 // reset and if it does not recover, HTMGT/HBRT will create an 141 // unrecoverable error. 142 auto level = 143 sdbusplus::xyz::openbmc_project::Logging::server::convertForMessage( 144 sdbusplus::xyz::openbmc_project::Logging::server::Entry::Level:: 145 Notice); 146 147 method.append(path, level, additionalData, ffdc); 148 bus.call(method); 149 } 150 catch (const sdbusplus::exception_t& e) 151 { 152 log<level::ERR>( 153 fmt::format("Failed to create OCC Reset PEL: {}", e.what()) 154 .c_str()); 155 } 156 } 157 158 // Reads the SBE FFDC file and create an error log 159 void FFDC::analyzeEvent() 160 { 161 int tfd = -1; 162 size_t total = 0; 163 auto data = std::make_unique<unsigned char[]>(max_ffdc_size); 164 while (total < max_ffdc_size) 165 { 166 auto r = read(fd, data.get() + total, max_ffdc_size - total); 167 if (r < 0) 168 { 169 elog<ReadFailure>( 170 phosphor::logging::org::open_power::OCC::Device::ReadFailure:: 171 CALLOUT_ERRNO(errno), 172 phosphor::logging::org::open_power::OCC::Device::ReadFailure:: 173 CALLOUT_DEVICE_PATH(file.c_str())); 174 return; 175 } 176 if (!r) 177 { 178 break; 179 } 180 total += r; 181 } 182 183 lseek(fd, 0, SEEK_SET); 184 185 if (!total) 186 { 187 // no error 188 return; 189 } 190 191 uint32_t src6 = instance << 16; 192 src6 |= *(data.get() + 2) << 8; 193 src6 |= *(data.get() + 3); 194 195 if (total > sbe_status_header_size) 196 { 197 std::string templateString = fs::temp_directory_path() / 198 "OCC_FFDC_XXXXXX"; 199 tfd = mkostemp(templateString.data(), O_RDWR); 200 if (tfd < 0) 201 { 202 log<level::ERR>("Couldn't create temporary FFDC file"); 203 } 204 else 205 { 206 temporaryFiles.emplace_back(templateString, tfd); 207 size_t written = sbe_status_header_size; 208 while (written < total) 209 { 210 auto r = write(tfd, data.get() + written, total - written); 211 if (r < 0) 212 { 213 close(temporaryFiles.back().second); 214 fs::remove(temporaryFiles.back().first); 215 temporaryFiles.pop_back(); 216 tfd = -1; 217 log<level::ERR>("Couldn't write temporary FFDC file"); 218 break; 219 } 220 if (!r) 221 { 222 break; 223 } 224 written += r; 225 } 226 } 227 } 228 229 createPEL("org.open_power.Processor.Error.SbeChipOpFailure", src6, 230 "SBE command reported error", tfd); 231 } 232 233 // Create file with the latest journal entries for specified executable 234 std::unique_ptr<FFDCFile> FFDC::addJournalEntries(FFDCFiles& fileList, 235 const std::string& executable, 236 unsigned int lines) 237 { 238 auto journalFile = makeJsonFFDCFile(getJournalEntries(lines, executable)); 239 if (journalFile && journalFile->fd() != -1) 240 { 241 log<level::DEBUG>( 242 fmt::format( 243 "addJournalEntries: Added up to {} journal entries for {}", 244 lines, executable) 245 .c_str()); 246 fileList.emplace_back(FFDCFormat::JSON, 0x01, 0x01, journalFile->fd()); 247 } 248 else 249 { 250 log<level::ERR>( 251 fmt::format( 252 "addJournalEntries: Failed to add journal entries for {}", 253 executable) 254 .c_str()); 255 } 256 return journalFile; 257 } 258 259 // Write JSON data into FFDC file and return the file 260 std::unique_ptr<FFDCFile> FFDC::makeJsonFFDCFile(const nlohmann::json& ffdcData) 261 { 262 std::string tmpFile = fs::temp_directory_path() / "OCC_JOURNAL_XXXXXX"; 263 auto fd = mkostemp(tmpFile.data(), O_RDWR); 264 if (fd != -1) 265 { 266 auto jsonString = ffdcData.dump(); 267 auto rc = write(fd, jsonString.data(), jsonString.size()); 268 close(fd); 269 if (rc != -1) 270 { 271 fs::path jsonFile{tmpFile}; 272 return std::make_unique<FFDCFile>(jsonFile); 273 } 274 else 275 { 276 auto e = errno; 277 log<level::ERR>( 278 fmt::format( 279 "makeJsonFFDCFile: Failed call to write JSON FFDC file, errno={}", 280 e) 281 .c_str()); 282 } 283 } 284 else 285 { 286 auto e = errno; 287 log<level::ERR>( 288 fmt::format("makeJsonFFDCFile: Failed called to mkostemp, errno={}", 289 e) 290 .c_str()); 291 } 292 return nullptr; 293 } 294 295 // Collect the latest journal entries for a specified executable 296 nlohmann::json FFDC::getJournalEntries(int numLines, std::string executable) 297 { 298 // Sleep 100ms; otherwise recent journal entries sometimes not available 299 using namespace std::chrono_literals; 300 std::this_thread::sleep_for(100ms); 301 302 std::vector<std::string> entries; 303 304 // Open the journal 305 sd_journal* journal; 306 int rc = sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY); 307 if (rc < 0) 308 { 309 // Build one line string containing field values 310 entries.push_back("[Internal error: sd_journal_open(), rc=" + 311 std::string(strerror(rc)) + "]"); 312 return nlohmann::json(entries); 313 } 314 315 // Create object to automatically close journal 316 JournalCloser closer{journal}; 317 318 // Add match so we only loop over entries with specified field value 319 std::string field{"SYSLOG_IDENTIFIER"}; 320 std::string match{field + '=' + executable}; 321 rc = sd_journal_add_match(journal, match.c_str(), 0); 322 if (rc < 0) 323 { 324 // Build one line string containing field values 325 entries.push_back("[Internal error: sd_journal_add_match(), rc=" + 326 std::string(strerror(rc)) + "]"); 327 } 328 else 329 { 330 int count{1}; 331 entries.reserve(numLines); 332 std::string syslogID, pid, message, timeStamp; 333 334 // Loop through journal entries from newest to oldest 335 SD_JOURNAL_FOREACH_BACKWARDS(journal) 336 { 337 // Get relevant journal entry fields 338 timeStamp = getTimeStamp(journal); 339 syslogID = getFieldValue(journal, "SYSLOG_IDENTIFIER"); 340 pid = getFieldValue(journal, "_PID"); 341 message = getFieldValue(journal, "MESSAGE"); 342 343 // Build one line string containing field values 344 entries.push_back(timeStamp + " " + syslogID + "[" + pid + 345 "]: " + message); 346 347 // Stop after number of lines was read 348 if (count++ >= numLines) 349 { 350 break; 351 } 352 } 353 } 354 355 // put the journal entries in chronological order 356 std::reverse(entries.begin(), entries.end()); 357 358 return nlohmann::json(entries); 359 } 360 361 std::string FFDC::getTimeStamp(sd_journal* journal) 362 { 363 // Get realtime (wallclock) timestamp of current journal entry. The 364 // timestamp is in microseconds since the epoch. 365 uint64_t usec{0}; 366 int rc = sd_journal_get_realtime_usec(journal, &usec); 367 if (rc < 0) 368 { 369 return "[Internal error: sd_journal_get_realtime_usec(), rc=" + 370 std::string(strerror(rc)) + "]"; 371 } 372 373 // Convert to number of seconds since the epoch 374 time_t secs = usec / 1000000; 375 376 // Convert seconds to tm struct required by strftime() 377 struct tm* timeStruct = localtime(&secs); 378 if (timeStruct == nullptr) 379 { 380 return "[Internal error: localtime() returned nullptr]"; 381 } 382 383 // Convert tm struct into a date/time string 384 char timeStamp[80]; 385 strftime(timeStamp, sizeof(timeStamp), "%b %d %H:%M:%S", timeStruct); 386 387 return timeStamp; 388 } 389 390 std::string FFDC::getFieldValue(sd_journal* journal, const std::string& field) 391 { 392 std::string value{}; 393 394 // Get field data from current journal entry 395 const void* data{nullptr}; 396 size_t length{0}; 397 int rc = sd_journal_get_data(journal, field.c_str(), &data, &length); 398 if (rc < 0) 399 { 400 if (-rc == ENOENT) 401 { 402 // Current entry does not include this field; return empty value 403 return value; 404 } 405 else 406 { 407 return "[Internal error: sd_journal_get_data() rc=" + 408 std::string(strerror(rc)) + "]"; 409 } 410 } 411 412 // Get value from field data. Field data in format "FIELD=value". 413 std::string dataString{static_cast<const char*>(data), length}; 414 std::string::size_type pos = dataString.find('='); 415 if ((pos != std::string::npos) && ((pos + 1) < dataString.size())) 416 { 417 // Value is substring after the '=' 418 value = dataString.substr(pos + 1); 419 } 420 421 return value; 422 } 423 424 // Create temporary file that will automatically get removed when destructed 425 FFDCFile::FFDCFile(const fs::path& name) : 426 _fd(open(name.c_str(), O_RDONLY)), _name(name) 427 { 428 if (_fd() == -1) 429 { 430 auto e = errno; 431 log<level::ERR>( 432 fmt::format("FFDCFile: Could not open FFDC file {}. errno {}", 433 _name.string(), e) 434 .c_str()); 435 } 436 } 437 438 } // namespace occ 439 } // namespace open_power 440