1 #include <unistd.h> 2 3 #include <analyzer/analyzer_main.hpp> 4 #include <attn/attn_common.hpp> 5 #include <attn/attn_dbus.hpp> 6 #include <attn/attn_dump.hpp> 7 #include <attn/attn_logging.hpp> 8 #include <attn/pel/pel_minimal.hpp> 9 #include <phosphor-logging/log.hpp> 10 11 namespace attn 12 { 13 14 /** @brief Journal entry of type INFO using phosphor logging */ 15 template <> 16 void trace<INFO>(const char* i_message) 17 { 18 phosphor::logging::log<phosphor::logging::level::INFO>(i_message); 19 } 20 21 template <> 22 void trace<ERROR>(const char* i_message) 23 { 24 phosphor::logging::log<phosphor::logging::level::ERR>(i_message); 25 } 26 27 /** @brief Tuple containing information about ffdc files */ 28 using FFDCTuple = 29 std::tuple<util::FFDCFormat, uint8_t, uint8_t, sdbusplus::message::unix_fd>; 30 31 /** @brief Gather messages from the journal */ 32 std::vector<std::string> sdjGetMessages(const std::string& field, 33 const std::string& fieldValue, 34 unsigned int max); 35 36 /** 37 * Create FFDCTuple objects corresponding to the specified FFDC files. 38 * 39 * The D-Bus method to create an error log requires a vector of tuples to 40 * pass in the FFDC file information. 41 * 42 * @param files - FFDC files 43 * @return vector of FFDCTuple objects 44 */ 45 std::vector<FFDCTuple> 46 createFFDCTuples(const std::vector<util::FFDCFile>& files) 47 { 48 std::vector<FFDCTuple> ffdcTuples{}; 49 util::transformFFDC(files, ffdcTuples); 50 51 return ffdcTuples; 52 } 53 54 /** 55 * @brief Create an FFDCFile object containing raw data 56 * 57 * Throws an exception if an error occurs. 58 * 59 * @param i_buffer - raw data to add to ffdc faw data file 60 * @param i_size - size of the raw data 61 * @return FFDCFile object 62 */ 63 util::FFDCFile createFFDCRawFile(void* i_buffer, size_t i_size) 64 { 65 util::FFDCFile file{util::FFDCFormat::Custom}; 66 67 // Write buffer to file and then reset file description file offset 68 int fd = file.getFileDescriptor(); 69 size_t numBytes = write(fd, static_cast<char*>(i_buffer), i_size); 70 if (i_size != numBytes) 71 { 72 std::stringstream traceMsg; 73 traceMsg << file.getPath().c_str() << " only " << (int)numBytes 74 << " of " << (int)i_size << " bytes written"; 75 auto strobj = traceMsg.str(); 76 trace<level::ERROR>(strobj.c_str()); 77 } 78 79 lseek(fd, 0, SEEK_SET); 80 81 return file; 82 } 83 84 /** 85 * @brief Create an FFDCFile object containing the specified lines of text data 86 * 87 * Throws an exception if an error occurs. 88 * 89 * @param lines - lines of text data to write to file 90 * @return FFDCFile object 91 */ 92 util::FFDCFile createFFDCTraceFile(const std::vector<std::string>& lines) 93 { 94 // Create FFDC file of type Text 95 util::FFDCFile file{util::FFDCFormat::Text}; 96 int fd = file.getFileDescriptor(); 97 98 // Write FFDC lines to file 99 std::string buffer; 100 for (const std::string& line : lines) 101 { 102 // Copy line to buffer. Add newline if necessary. 103 buffer = line; 104 if (line.empty() || (line.back() != '\n')) 105 { 106 buffer += '\n'; 107 } 108 109 // write buffer to file 110 size_t numBytes = write(fd, buffer.c_str(), buffer.size()); 111 if (buffer.size() != numBytes) 112 { 113 std::stringstream traceMsg; 114 traceMsg << file.getPath().c_str() << " only " << (int)numBytes 115 << " of " << (int)buffer.size() << " bytes written"; 116 auto strobj = traceMsg.str(); 117 trace<level::ERROR>(strobj.c_str()); 118 } 119 } 120 121 // Seek to beginning of file so error logging system can read data 122 lseek(fd, 0, SEEK_SET); 123 124 return file; 125 } 126 127 /** 128 * Create FDDC files from journal messages of relevant executables 129 * 130 * Parse the system journal looking for log entries created by the executables 131 * of interest for logging. For each of these entries create a ffdc trace file 132 * that will be used to create ffdc log entries. These files will be pushed 133 * onto the stack of ffdc files. 134 * 135 * @param i_files - vector of ffdc files that will become log entries 136 */ 137 void createFFDCTraceFiles(std::vector<util::FFDCFile>& i_files) 138 { 139 // Executables of interest 140 std::vector<std::string> executables{"openpower-hw-diags"}; 141 142 for (const std::string& executable : executables) 143 { 144 try 145 { 146 // get journal messages 147 std::vector<std::string> messages = 148 sdjGetMessages("SYSLOG_IDENTIFIER", executable, 30); 149 150 // Create FFDC file containing the journal messages 151 if (!messages.empty()) 152 { 153 i_files.emplace_back(createFFDCTraceFile(messages)); 154 } 155 } 156 catch (const std::exception& e) 157 { 158 trace<level::INFO>("createFFDCTraceFiles exception"); 159 std::string traceMsg = std::string(e.what(), maxTraceLen); 160 trace<level::INFO>(traceMsg.c_str()); 161 } 162 } 163 } 164 165 /** 166 * Create FFDCFile objects containing debug data to store in the error log. 167 * 168 * If an error occurs, the error is written to the journal but an exception 169 * is not thrown. 170 * 171 * @param i_buffer - raw data (if creating raw dump ffdc entry in log) 172 * @return vector of FFDCFile objects 173 */ 174 std::vector<util::FFDCFile> createFFDCFiles(char* i_buffer = nullptr, 175 size_t i_size = 0) 176 { 177 std::vector<util::FFDCFile> files{}; 178 179 // Create raw dump file 180 if ((nullptr != i_buffer) && (0 != i_size)) 181 { 182 files.emplace_back(createFFDCRawFile(i_buffer, i_size)); 183 } 184 185 // Create trace dump file 186 createFFDCTraceFiles(files); 187 188 return files; 189 } 190 191 /** 192 * Create a PEL from an existing PEL 193 * 194 * Create a new PEL based on the specified raw PEL and submit the new PEL 195 * to the backend logging code as a raw PEL. Note that additional data map 196 * here contains data to be committed to the PEL and it can also be used to 197 * create the PEL as it contains needed information. 198 * 199 * @param i_rawPel - buffer containing a raw PEL 200 * @param i_additional - additional data to be added to the new PEL 201 */ 202 void createPelCustom(std::vector<uint8_t>& i_rawPel, 203 std::map<std::string, std::string> i_additional) 204 { 205 // create PEL object from buffer 206 auto tiPel = std::make_unique<pel::PelMinimal>(i_rawPel); 207 208 // The additional data contains the TI info as well as the value for the 209 // subystem that provided the TI info. Get the subystem from additional 210 // data and then populate the prmary SRC and SRC words for the custom PEL 211 // based on the sybsystem's TI info. 212 uint8_t subsystem = std::stoi(i_additional["Subsystem"]); 213 tiPel->setSubsystem(subsystem); 214 215 // If recoverable attentions are active we will call the analyzer and 216 // then link the custom pel to analyzer pel. 217 std::map<std::string, std::string>::iterator it; 218 it = i_additional.find("recoverables"); 219 if (it != i_additional.end() && "true" == it->second) 220 { 221 DumpParameters dumpParameters; 222 auto plid = analyzer::analyzeHardware( 223 analyzer::AnalysisType::TERMINATE_IMMEDIATE, dumpParameters); 224 if (0 != plid) 225 { 226 // Link the PLID if an attention was found and a PEL was generated. 227 tiPel->setPlid(plid); 228 } 229 } 230 231 if (static_cast<uint8_t>(pel::SubsystemID::hypervisor) == subsystem) 232 { 233 // populate hypervisor SRC words 234 tiPel->setSrcWords(std::array<uint32_t, pel::numSrcWords>{ 235 (uint32_t)std::stoul(i_additional["0x10 SRC Word 12"], 0, 16), 236 (uint32_t)std::stoul(i_additional["0x14 SRC Word 13"], 0, 16), 237 (uint32_t)std::stoul(i_additional["0x18 SRC Word 14"], 0, 16), 238 (uint32_t)std::stoul(i_additional["0x1c SRC Word 15"], 0, 16), 239 (uint32_t)std::stoul(i_additional["0x20 SRC Word 16"], 0, 16), 240 (uint32_t)std::stoul(i_additional["0x24 SRC Word 17"], 0, 16), 241 (uint32_t)std::stoul(i_additional["0x28 SRC Word 18"], 0, 16), 242 (uint32_t)std::stoul(i_additional["0x2c SRC Word 19"], 0, 16)}); 243 244 // Populate phyp primary SRC 245 246 // char array for raw pel src 247 std::array<char, pel::asciiStringSize> srcChars{'0'}; 248 249 // src from TI info 250 std::string srcString = i_additional["SrcAscii"]; 251 252 // copy from string to char array 253 srcString.copy(srcChars.data(), 254 std::min(srcString.size(), pel::asciiStringSize), 0); 255 256 tiPel->setAsciiString(srcChars); // pel object src is char array 257 258 // set symptom-id 259 auto symptomId = (i_additional["SrcAscii"].substr(0, 8) + '_'); 260 261 symptomId += (i_additional["0x10 SRC Word 12"]); 262 symptomId += (i_additional["0x14 SRC Word 13"] + '_'); 263 symptomId += (i_additional["0x18 SRC Word 14"]); 264 symptomId += (i_additional["0x1c SRC Word 15"] + '_'); 265 symptomId += (i_additional["0x20 SRC Word 16"]); 266 symptomId += (i_additional["0x24 SRC Word 17"] + '_'); 267 symptomId += (i_additional["0x28 SRC Word 18"]); 268 symptomId += (i_additional["0x2c SRC Word 19"]); 269 270 // setSymptomId will take care of required null-terminate and padding 271 tiPel->setSymptomId(symptomId); 272 } 273 else 274 { 275 // Populate hostboot SRC words - note HB word 0 from the shared info 276 // data (additional data "0x10 HB Word") is reflected in the PEL as 277 // "reason code" so we zero it here. Also note that the first word 278 // in this group of words starts at word 0 and word 1 does not exits. 279 tiPel->setSrcWords(std::array<uint32_t, pel::numSrcWords>{ 280 (uint32_t)0x00000000, 281 (uint32_t)std::stoul(i_additional["0x14 HB Word 2"], 0, 16), 282 (uint32_t)std::stoul(i_additional["0x18 HB Word 3"], 0, 16), 283 (uint32_t)std::stoul(i_additional["0x1c HB Word 4"], 0, 16), 284 (uint32_t)std::stoul(i_additional["0x20 HB Word 5"], 0, 16), 285 (uint32_t)std::stoul(i_additional["0x24 HB Word 6"], 0, 16), 286 (uint32_t)std::stoul(i_additional["0x28 HB Word 7"], 0, 16), 287 (uint32_t)std::stoul(i_additional["0x2c HB Word 8"], 0, 16)}); 288 289 // Populate hostboot primary SRC 290 291 // char array for raw pel src 292 std::array<char, pel::asciiStringSize> srcChars{'0'}; 293 294 // src from TI info 295 std::string srcString = i_additional["SrcAscii"]; 296 297 // copy from string to char array 298 srcString.copy(srcChars.data(), 299 std::min(srcString.size(), pel::asciiStringSize), 0); 300 301 tiPel->setAsciiString(srcChars); // pel object src is char array 302 303 // set symptom-id 304 auto symptomId = (i_additional["SrcAscii"].substr(0, 8) + '_'); 305 306 symptomId += (i_additional["0x10 HB Word 0"]); // note: word 1 307 symptomId += (i_additional["0x14 HB Word 2"] + '_'); // does not exist 308 symptomId += (i_additional["0x18 HB Word 3"]); 309 symptomId += (i_additional["0x1c HB Word 4"] + '_'); 310 symptomId += (i_additional["0x20 HB Word 5"]); 311 symptomId += (i_additional["0x24 HB Word 6"] + '_'); 312 symptomId += (i_additional["0x28 HB Word 7"]); 313 symptomId += (i_additional["0x2c HB Word 8"]); 314 315 // setSymptomId will take care of required null-terminate and padding 316 tiPel->setSymptomId(symptomId); 317 } 318 319 // set severity, event type and action flags 320 tiPel->setSeverity(static_cast<uint8_t>(pel::Severity::termination)); 321 tiPel->setType(static_cast<uint8_t>(pel::EventType::na)); 322 tiPel->setAction(static_cast<uint16_t>(pel::ActionFlags::service | 323 pel::ActionFlags::report | 324 pel::ActionFlags::call)); 325 326 // The raw PEL that we used as the basis for this custom PEL contains the 327 // attention handler trace data and does not needed to be in this PEL so 328 // we remove it here. 329 tiPel->setSectionCount(tiPel->getSectionCount() - 1); 330 331 // Update the raw PEL with the new custom PEL data 332 tiPel->raw(i_rawPel); 333 334 // create PEL from raw data 335 createPelRaw(i_rawPel); 336 } 337 338 /** 339 * Log an event handled by the attention handler 340 * 341 * Basic (non TI) events will generate a standard message-registry based PEL 342 * 343 * TI events will create two PEL's. One PEL will be informational and will 344 * contain trace information relevent to attention handler. The second PEL 345 * will be specific to the TI type (including the primary SRC) and will be 346 * based off of the TI information provided to the attention handler through 347 * shared TI info data area. 348 * 349 * @param i_event - The event type 350 * @param i_additional - Additional PEL data 351 * @param i_ffdc - FFDC PEL data 352 * @return Event log Id (0 if no event log generated) 353 */ 354 uint32_t event(EventType i_event, 355 std::map<std::string, std::string>& i_additional, 356 const std::vector<util::FFDCFile>& i_ffdc) 357 { 358 uint32_t pelId = 0; // assume no event log generated 359 360 bool eventValid = false; // assume no event created 361 bool tiEvent = false; // assume not a terminate event 362 363 std::string eventName; 364 365 switch (i_event) 366 { 367 case EventType::Checkstop: 368 eventName = "org.open_power.HwDiags.Error.Checkstop"; 369 eventValid = true; 370 break; 371 case EventType::Terminate: 372 eventName = "org.open_power.Attn.Error.Terminate"; 373 eventValid = true; 374 tiEvent = true; 375 break; 376 case EventType::Vital: 377 eventName = "org.open_power.Attn.Error.Vital"; 378 eventValid = true; 379 break; 380 case EventType::HwDiagsFail: 381 case EventType::AttentionFail: 382 eventName = "org.open_power.Attn.Error.Fail"; 383 eventValid = true; 384 break; 385 case EventType::PhalSbeChipop: 386 eventName = "org.open_power.Processor.Error.SbeChipOpFailure"; 387 eventValid = true; 388 break; 389 default: 390 eventValid = false; 391 break; 392 } 393 394 if (true == eventValid) 395 { 396 // Create PEL with additional data and FFDC data. The newly created 397 // PEL's platform log-id will be returned. 398 pelId = createPel(eventName, i_additional, createFFDCTuples(i_ffdc)); 399 400 // If this is a TI event we will create an additional PEL that is 401 // specific to the subsystem that generated the TI. 402 if ((0 != pelId) && (true == tiEvent)) 403 { 404 // get file descriptor and size of information PEL 405 int pelFd = getPel(pelId); 406 407 // if PEL found, read into buffer 408 if (-1 != pelFd) 409 { 410 auto pelSize = lseek(pelFd, 0, SEEK_END); 411 lseek(pelFd, 0, SEEK_SET); 412 413 // read information PEL into buffer 414 std::vector<uint8_t> buffer(pelSize); 415 size_t numBytes = read(pelFd, buffer.data(), buffer.size()); 416 if (buffer.size() != numBytes) 417 { 418 std::stringstream traceMsg; 419 traceMsg << "Error reading event log: " << (int)numBytes 420 << " of " << (int)buffer.size() << " bytes read"; 421 auto strobj = traceMsg.str(); 422 trace<level::ERROR>(strobj.c_str()); 423 } 424 else 425 { 426 // create PEL from buffer 427 createPelCustom(buffer, i_additional); 428 } 429 430 close(pelFd); 431 } 432 433 uint8_t subsystem = std::stoi(i_additional["Subsystem"]); 434 435 // If not hypervisor TI 436 if (static_cast<uint8_t>(pel::SubsystemID::hypervisor) != subsystem) 437 { 438 // Request a dump and transition the host 439 if ("true" == i_additional["Dump"]) 440 { 441 // will not return until dump is complete 442 requestDump(pelId, DumpParameters{0, DumpType::Hostboot}); 443 } 444 } 445 } 446 } 447 return pelId; 448 } 449 450 /** 451 * Commit special attention TI event to log 452 * 453 * Create a event log with provided additional information and standard 454 * FFDC data plus TI FFDC data 455 * 456 * @param i_additional - Additional log data 457 * @param i_ti_InfoData - TI FFDC data 458 */ 459 void eventTerminate(std::map<std::string, std::string> i_additionalData, 460 char* i_tiInfoData) 461 { 462 463 uint32_t tiInfoSize = 0; // assume TI info was not available 464 465 if (nullptr != i_tiInfoData) 466 { 467 tiInfoSize = 56; // assume not hypervisor TI 468 469 uint8_t subsystem = std::stoi(i_additionalData["Subsystem"]); 470 471 // If hypervisor 472 if (static_cast<uint8_t>(pel::SubsystemID::hypervisor) == subsystem) 473 { 474 tiInfoSize = 1024; // assume hypervisor max 475 476 // hypervisor may just want some of the data 477 if (0 == (*(i_tiInfoData + 0x09) & 0x01)) 478 { 479 uint32_t* additionalLength = (uint32_t*)(i_tiInfoData + 0x50); 480 uint32_t tiAdditional = be32toh(*additionalLength); 481 tiInfoSize = std::min(tiInfoSize, (84 + tiAdditional)); 482 } 483 } 484 } 485 486 std::string traceMsg = "TI info size = " + std::to_string(tiInfoSize); 487 trace<level::INFO>(traceMsg.c_str()); 488 489 event(EventType::Terminate, i_additionalData, 490 createFFDCFiles(i_tiInfoData, tiInfoSize)); 491 } 492 493 /** @brief Commit SBE vital event to log, returns event log ID */ 494 uint32_t eventVital() 495 { 496 // Additional data for log 497 std::map<std::string, std::string> additionalData; 498 499 // Create log event with additional data and FFDC data 500 return event(EventType::Vital, additionalData, createFFDCFiles(nullptr, 0)); 501 } 502 503 /** 504 * Commit attention handler failure event to log 505 * 506 * Create an event log containing the specified error code. 507 * 508 * @param i_error - Error code 509 */ 510 void eventAttentionFail(int i_error) 511 { 512 // Additional data for log 513 std::map<std::string, std::string> additionalData; 514 additionalData["ERROR_CODE"] = std::to_string(i_error); 515 516 // Create log event with additional data and FFDC data 517 event(EventType::AttentionFail, additionalData, 518 createFFDCFiles(nullptr, 0)); 519 } 520 521 /** 522 * Commit SBE timeout event to log 523 * 524 * Create an event log indicating an SBE operation timed out. 525 * 526 * @param proc - processor that encountered the error 527 */ 528 void eventPhalSbeChipop(uint32_t proc) 529 { 530 trace<level::ERROR>("SBE error while getting TI info"); 531 532 // report proc number in event log entry 533 std::map<std::string, std::string> additionalData; 534 additionalData.emplace("SRC6", std::to_string(proc << 16)); 535 536 // create event with additional data and no ffdc 537 event(EventType::PhalSbeChipop, additionalData, 538 createFFDCFiles(nullptr, 0)); 539 } 540 541 /** 542 * Parse systemd journal message field 543 * 544 * Parse the journal looking for the specified field and return the journal 545 * data for that field. 546 * 547 * @param journal - The journal to parse 548 * @param field - Field containing the data to retrieve 549 * @return Data for the speciefied field 550 */ 551 std::string sdjGetFieldValue(sd_journal* journal, const char* field) 552 { 553 const char* data{nullptr}; 554 size_t length{0}; 555 556 // get field value 557 if (0 == sd_journal_get_data(journal, field, (const void**)&data, &length)) 558 { 559 size_t prefix{0}; 560 561 // The data returned by sd_journal_get_data will be prefixed with the 562 // field name and "=" 563 const void* eq = memchr(data, '=', length); 564 if (nullptr != eq) 565 { 566 // get just data following the "=" 567 prefix = (const char*)eq - data + 1; 568 } 569 else 570 { 571 // all the data (should not happen) 572 prefix = 0; 573 std::string value{}; // empty string 574 } 575 576 return std::string{data + prefix, length - prefix}; 577 } 578 else 579 { 580 return std::string{}; // empty string 581 } 582 } 583 584 /** 585 * Gather messages from the journal 586 * 587 * Fetch journal entry data for all entries with the specified field equal to 588 * the specified value. 589 * 590 * @param field - Field to search on 591 * @param fieldValue - Value to search for 592 * @param max - Maximum number of messages fetch 593 * @return Vector of journal entry data 594 */ 595 std::vector<std::string> sdjGetMessages(const std::string& field, 596 const std::string& fieldValue, 597 unsigned int max) 598 { 599 sd_journal* journal; 600 std::vector<std::string> messages; 601 602 if (0 == sd_journal_open(&journal, SD_JOURNAL_LOCAL_ONLY)) 603 { 604 SD_JOURNAL_FOREACH_BACKWARDS(journal) 605 { 606 // Get input field 607 std::string value = sdjGetFieldValue(journal, field.c_str()); 608 609 // Compare field value and read data 610 if (value == fieldValue) 611 { 612 // Get SYSLOG_IDENTIFIER field (process that logged message) 613 std::string syslog = 614 sdjGetFieldValue(journal, "SYSLOG_IDENTIFIER"); 615 616 // Get _PID field 617 std::string pid = sdjGetFieldValue(journal, "_PID"); 618 619 // Get MESSAGE field 620 std::string message = sdjGetFieldValue(journal, "MESSAGE"); 621 622 // Get timestamp 623 uint64_t usec{0}; 624 if (0 == sd_journal_get_realtime_usec(journal, &usec)) 625 { 626 627 // Convert realtime microseconds to date format 628 char dateBuffer[80]; 629 std::string date; 630 std::time_t timeInSecs = usec / 1000000; 631 strftime(dateBuffer, sizeof(dateBuffer), "%b %d %H:%M:%S", 632 std::localtime(&timeInSecs)); 633 date = dateBuffer; 634 635 // Store value to messages 636 value = date + " " + syslog + "[" + pid + "]: " + message; 637 messages.insert(messages.begin(), value); 638 } 639 } 640 641 // limit maximum number of messages 642 if (messages.size() >= max) 643 { 644 break; 645 } 646 } 647 648 sd_journal_close(journal); // close journal when done 649 } 650 651 return messages; 652 } 653 654 } // namespace attn 655