1 #include "config.h" 2 3 #include "log_manager.hpp" 4 5 #include "elog_entry.hpp" 6 #include "elog_meta.hpp" 7 #include "elog_serialize.hpp" 8 #include "extensions.hpp" 9 #include "lib/lg2_commit.hpp" 10 #include "paths.hpp" 11 #include "util.hpp" 12 13 #include <systemd/sd-bus.h> 14 #include <systemd/sd-journal.h> 15 #include <unistd.h> 16 17 #include <phosphor-logging/lg2.hpp> 18 #include <sdbusplus/vtable.hpp> 19 #include <xyz/openbmc_project/State/Host/server.hpp> 20 21 #include <cassert> 22 #include <chrono> 23 #include <cstdio> 24 #include <cstring> 25 #include <fstream> 26 #include <functional> 27 #include <future> 28 #include <iostream> 29 #include <map> 30 #include <ranges> 31 #include <set> 32 #include <string> 33 #include <string_view> 34 #include <vector> 35 36 using namespace std::chrono; 37 extern const std::map< 38 phosphor::logging::metadata::Metadata, 39 std::function<phosphor::logging::metadata::associations::Type>> 40 meta; 41 42 namespace phosphor 43 { 44 namespace logging 45 { 46 namespace internal 47 { 48 inline auto getLevel(const std::string& errMsg) 49 { 50 auto reqLevel = Entry::Level::Error; // Default to Error 51 52 auto levelmap = g_errLevelMap.find(errMsg); 53 if (levelmap != g_errLevelMap.end()) 54 { 55 reqLevel = static_cast<Entry::Level>(levelmap->second); 56 } 57 58 return reqLevel; 59 } 60 61 int Manager::getRealErrSize() 62 { 63 return realErrors.size(); 64 } 65 66 int Manager::getInfoErrSize() 67 { 68 return infoErrors.size(); 69 } 70 71 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg) 72 { 73 auto level = getLevel(errMsg); 74 _commit(transactionId, std::move(errMsg), level); 75 return entryId; 76 } 77 78 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg, 79 uint32_t errLvl) 80 { 81 _commit(transactionId, std::move(errMsg), 82 static_cast<Entry::Level>(errLvl)); 83 return entryId; 84 } 85 86 void Manager::_commit(uint64_t transactionId [[maybe_unused]], 87 std::string&& errMsg, Entry::Level errLvl) 88 { 89 std::map<std::string, std::string> additionalData{}; 90 91 // When running as a test-case, the system may have a LOT of journal 92 // data and we may not have permissions to do some of the journal sync 93 // operations. Just skip over them. 94 if (!IS_UNIT_TEST) 95 { 96 static constexpr auto transactionIdVar = 97 std::string_view{"TRANSACTION_ID"}; 98 // Length of 'TRANSACTION_ID' string. 99 static constexpr auto transactionIdVarSize = transactionIdVar.size(); 100 // Length of 'TRANSACTION_ID=' string. 101 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1; 102 103 // Flush all the pending log messages into the journal 104 util::journalSync(); 105 106 sd_journal* j = nullptr; 107 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); 108 if (rc < 0) 109 { 110 lg2::error("Failed to open journal: {ERROR}", "ERROR", 111 strerror(-rc)); 112 return; 113 } 114 115 std::string transactionIdStr = std::to_string(transactionId); 116 std::set<std::string> metalist; 117 auto metamap = g_errMetaMap.find(errMsg); 118 if (metamap != g_errMetaMap.end()) 119 { 120 metalist.insert(metamap->second.begin(), metamap->second.end()); 121 } 122 123 // Add _PID field information in AdditionalData. 124 metalist.insert("_PID"); 125 126 // Read the journal from the end to get the most recent entry first. 127 // The result from the sd_journal_get_data() is of the form 128 // VARIABLE=value. 129 SD_JOURNAL_FOREACH_BACKWARDS(j) 130 { 131 const char* data = nullptr; 132 size_t length = 0; 133 134 // Look for the transaction id metadata variable 135 rc = sd_journal_get_data(j, transactionIdVar.data(), 136 (const void**)&data, &length); 137 if (rc < 0) 138 { 139 // This journal entry does not have the TRANSACTION_ID 140 // metadata variable. 141 continue; 142 } 143 144 // journald does not guarantee that sd_journal_get_data() returns 145 // NULL terminated strings, so need to specify the size to use to 146 // compare, use the returned length instead of anything that relies 147 // on NULL terminators like strlen(). The data variable is in the 148 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID 149 // characters plus the (=) sign to do the comparison. 'data + 150 // transactionIdVarOffset' will be in the form of '1234'. 'length - 151 // transactionIdVarOffset' will be the length of '1234'. 152 if ((length <= (transactionIdVarOffset)) || 153 (transactionIdStr.compare( 154 0, transactionIdStr.size(), data + transactionIdVarOffset, 155 length - transactionIdVarOffset) != 0)) 156 { 157 // The value of the TRANSACTION_ID metadata is not the requested 158 // transaction id number. 159 continue; 160 } 161 162 // Search for all metadata variables in the current journal entry. 163 for (auto i = metalist.cbegin(); i != metalist.cend();) 164 { 165 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data, 166 &length); 167 if (rc < 0) 168 { 169 // Metadata variable not found, check next metadata 170 // variable. 171 i++; 172 continue; 173 } 174 175 // Metadata variable found, save it and remove it from the set. 176 std::string metadata(data, length); 177 if (auto pos = metadata.find('='); pos != std::string::npos) 178 { 179 auto key = metadata.substr(0, pos); 180 auto value = metadata.substr(pos + 1); 181 additionalData.emplace(std::move(key), std::move(value)); 182 } 183 i = metalist.erase(i); 184 } 185 if (metalist.empty()) 186 { 187 // All metadata variables found, break out of journal loop. 188 break; 189 } 190 } 191 if (!metalist.empty()) 192 { 193 // Not all the metadata variables were found in the journal. 194 for (auto& metaVarStr : metalist) 195 { 196 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD", 197 metaVarStr); 198 } 199 } 200 201 sd_journal_close(j); 202 } 203 createEntry(errMsg, errLvl, additionalData); 204 } 205 206 auto Manager::createEntry(std::string errMsg, Entry::Level errLvl, 207 std::map<std::string, std::string> additionalData, 208 const FFDCEntries& ffdc) 209 -> sdbusplus::message::object_path 210 { 211 if (!Extensions::disableDefaultLogCaps()) 212 { 213 if (errLvl < Entry::sevLowerLimit) 214 { 215 if (realErrors.size() >= ERROR_CAP) 216 { 217 erase(realErrors.front()); 218 } 219 } 220 else 221 { 222 if (infoErrors.size() >= ERROR_INFO_CAP) 223 { 224 erase(infoErrors.front()); 225 } 226 } 227 } 228 229 entryId++; 230 if (errLvl >= Entry::sevLowerLimit) 231 { 232 infoErrors.push_back(entryId); 233 } 234 else 235 { 236 realErrors.push_back(entryId); 237 } 238 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>( 239 std::chrono::system_clock::now().time_since_epoch()) 240 .count(); 241 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 242 243 AssociationList objects{}; 244 auto additionalDataVec = util::additional_data::combine(additionalData); 245 processMetadata(errMsg, additionalDataVec, objects); 246 247 auto e = std::make_unique<Entry>( 248 busLog, objPath, entryId, 249 ms, // Milliseconds since 1970 250 errLvl, std::move(errMsg), std::move(additionalData), 251 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this); 252 253 serialize(*e); 254 255 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) && 256 isCalloutPresent(*e)) 257 { 258 quiesceOnError(entryId); 259 } 260 261 // Add entry before calling the extensions so that they have access to it 262 entries.insert(std::make_pair(entryId, std::move(e))); 263 264 doExtensionLogCreate(*entries.find(entryId)->second, ffdc); 265 266 // Note: No need to close the file descriptors in the FFDC. 267 268 return objPath; 269 } 270 271 auto 272 Manager::createFromEvent(sdbusplus::exception::generated_event_base&& event) 273 -> sdbusplus::message::object_path 274 { 275 auto [msg, level, data] = lg2::details::extractEvent(std::move(event)); 276 return this->createEntry(msg, level, std::move(data)); 277 } 278 279 bool Manager::isQuiesceOnErrorEnabled() 280 { 281 // When running under tests, the Logging.Settings service will not be 282 // present. Assume false. 283 if (IS_UNIT_TEST) 284 { 285 return false; 286 } 287 288 std::variant<bool> property; 289 290 auto method = this->busLog.new_method_call( 291 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings", 292 "org.freedesktop.DBus.Properties", "Get"); 293 294 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError"); 295 296 try 297 { 298 auto reply = this->busLog.call(method); 299 reply.read(property); 300 } 301 catch (const sdbusplus::exception_t& e) 302 { 303 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 304 e); 305 return false; 306 } 307 308 return std::get<bool>(property); 309 } 310 311 bool Manager::isCalloutPresent(const Entry& entry) 312 { 313 for (const auto& c : std::views::keys(entry.additionalData())) 314 { 315 if (c.find("CALLOUT_") != std::string::npos) 316 { 317 return true; 318 } 319 } 320 321 return false; 322 } 323 324 void Manager::findAndRemoveResolvedBlocks() 325 { 326 for (auto& entry : entries) 327 { 328 if (entry.second->resolved()) 329 { 330 checkAndRemoveBlockingError(entry.first); 331 } 332 } 333 } 334 335 void Manager::onEntryResolve(sdbusplus::message_t& msg) 336 { 337 using Interface = std::string; 338 using Property = std::string; 339 using Value = std::string; 340 using Properties = std::map<Property, std::variant<Value>>; 341 342 Interface interface; 343 Properties properties; 344 345 msg.read(interface, properties); 346 347 for (const auto& p : properties) 348 { 349 if (p.first == "Resolved") 350 { 351 findAndRemoveResolvedBlocks(); 352 return; 353 } 354 } 355 } 356 357 void Manager::checkAndQuiesceHost() 358 { 359 using Host = sdbusplus::server::xyz::openbmc_project::state::Host; 360 361 // First check host state 362 std::variant<Host::HostState> property; 363 364 auto method = this->busLog.new_method_call( 365 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0", 366 "org.freedesktop.DBus.Properties", "Get"); 367 368 method.append("xyz.openbmc_project.State.Host", "CurrentHostState"); 369 370 try 371 { 372 auto reply = this->busLog.call(method); 373 reply.read(property); 374 } 375 catch (const sdbusplus::exception_t& e) 376 { 377 // Quiescing the host is a "best effort" type function. If unable to 378 // read the host state or it comes back empty, just return. 379 // The boot block object will still be created and the associations to 380 // find the log will be present. Don't want a dependency with 381 // phosphor-state-manager service 382 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 383 e); 384 return; 385 } 386 387 auto hostState = std::get<Host::HostState>(property); 388 if (hostState != Host::HostState::Running) 389 { 390 return; 391 } 392 393 auto quiesce = this->busLog.new_method_call( 394 "org.freedesktop.systemd1", "/org/freedesktop/systemd1", 395 "org.freedesktop.systemd1.Manager", "StartUnit"); 396 397 quiesce.append("obmc-host-graceful-quiesce@0.target"); 398 quiesce.append("replace"); 399 400 this->busLog.call_noreply(quiesce); 401 } 402 403 void Manager::quiesceOnError(const uint32_t entryId) 404 { 405 // Verify we don't already have this entry blocking 406 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(), 407 [&](const std::unique_ptr<Block>& obj) { 408 return obj->entryId == entryId; 409 }); 410 if (it != this->blockingErrors.end()) 411 { 412 // Already recorded so just return 413 lg2::debug( 414 "QuiesceOnError set and callout present but entry already logged"); 415 return; 416 } 417 418 lg2::info("QuiesceOnError set and callout present"); 419 420 auto blockPath = 421 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId); 422 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId); 423 this->blockingErrors.push_back(std::move(blockObj)); 424 425 // Register call back if log is resolved 426 using namespace sdbusplus::bus::match::rules; 427 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 428 auto callback = std::make_unique<sdbusplus::bus::match_t>( 429 this->busLog, 430 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"), 431 std::bind(std::mem_fn(&Manager::onEntryResolve), this, 432 std::placeholders::_1)); 433 434 propChangedEntryCallback.insert( 435 std::make_pair(entryId, std::move(callback))); 436 437 checkAndQuiesceHost(); 438 } 439 440 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc) 441 { 442 // Make the association <endpointpath>/<endpointtype> paths 443 std::vector<std::string> assocs; 444 for (const auto& [forwardType, reverseType, endpoint] : 445 entry.associations()) 446 { 447 std::string e{endpoint}; 448 e += '/' + reverseType; 449 assocs.push_back(e); 450 } 451 452 for (auto& create : Extensions::getCreateFunctions()) 453 { 454 try 455 { 456 create(entry.message(), entry.id(), entry.timestamp(), 457 entry.severity(), entry.additionalData2(), assocs, ffdc); 458 } 459 catch (const std::exception& e) 460 { 461 lg2::error( 462 "An extension's create function threw an exception: {ERROR}", 463 "ERROR", e); 464 } 465 } 466 } 467 468 void Manager::processMetadata(const std::string& /*errorName*/, 469 const std::vector<std::string>& additionalData, 470 AssociationList& objects) const 471 { 472 // additionalData is a list of "metadata=value" 473 constexpr auto separator = '='; 474 for (const auto& entryItem : additionalData) 475 { 476 auto found = entryItem.find(separator); 477 if (std::string::npos != found) 478 { 479 auto metadata = entryItem.substr(0, found); 480 auto iter = meta.find(metadata); 481 if (meta.end() != iter) 482 { 483 (iter->second)(metadata, additionalData, objects); 484 } 485 } 486 } 487 } 488 489 void Manager::checkAndRemoveBlockingError(uint32_t entryId) 490 { 491 // First look for blocking object and remove 492 auto it = find_if(blockingErrors.begin(), blockingErrors.end(), 493 [&](const std::unique_ptr<Block>& obj) { 494 return obj->entryId == entryId; 495 }); 496 if (it != blockingErrors.end()) 497 { 498 blockingErrors.erase(it); 499 } 500 501 // Now remove the callback looking for the error to be resolved 502 auto resolveFind = propChangedEntryCallback.find(entryId); 503 if (resolveFind != propChangedEntryCallback.end()) 504 { 505 propChangedEntryCallback.erase(resolveFind); 506 } 507 508 return; 509 } 510 511 size_t Manager::eraseAll() 512 { 513 std::vector<uint32_t> logIDWithHwIsolation; 514 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions()) 515 { 516 try 517 { 518 func(logIDWithHwIsolation); 519 } 520 catch (const std::exception& e) 521 { 522 lg2::error("An extension's LogIDWithHwIsolation function threw an " 523 "exception: {ERROR}", 524 "ERROR", e); 525 } 526 } 527 size_t entriesSize = entries.size(); 528 auto iter = entries.begin(); 529 if (logIDWithHwIsolation.empty()) 530 { 531 while (iter != entries.end()) 532 { 533 auto e = iter->first; 534 ++iter; 535 erase(e); 536 } 537 entryId = 0; 538 } 539 else 540 { 541 while (iter != entries.end()) 542 { 543 auto e = iter->first; 544 ++iter; 545 try 546 { 547 if (!std::ranges::contains(logIDWithHwIsolation, e)) 548 { 549 erase(e); 550 } 551 else 552 { 553 entriesSize--; 554 } 555 } 556 catch (const sdbusplus::xyz::openbmc_project::Common::Error:: 557 Unavailable& e) 558 { 559 entriesSize--; 560 } 561 } 562 if (!entries.empty()) 563 { 564 entryId = std::ranges::max_element(entries, [](const auto& a, 565 const auto& b) { 566 return a.first < b.first; 567 })->first; 568 } 569 else 570 { 571 entryId = 0; 572 } 573 } 574 return entriesSize; 575 } 576 577 void Manager::erase(uint32_t entryId) 578 { 579 auto entryFound = entries.find(entryId); 580 if (entries.end() != entryFound) 581 { 582 for (auto& func : Extensions::getDeleteProhibitedFunctions()) 583 { 584 try 585 { 586 bool prohibited = false; 587 func(entryId, prohibited); 588 if (prohibited) 589 { 590 throw sdbusplus::xyz::openbmc_project::Common::Error:: 591 Unavailable(); 592 } 593 } 594 catch (const sdbusplus::xyz::openbmc_project::Common::Error:: 595 Unavailable& e) 596 { 597 throw; 598 } 599 catch (const std::exception& e) 600 { 601 lg2::error("An extension's deleteProhibited function threw an " 602 "exception: {ERROR}", 603 "ERROR", e); 604 } 605 } 606 607 // Delete the persistent representation of this error. 608 fs::path errorPath(paths::error()); 609 errorPath /= std::to_string(entryId); 610 fs::remove(errorPath); 611 612 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) { 613 auto it = std::find(ids.begin(), ids.end(), id); 614 if (it != ids.end()) 615 { 616 ids.erase(it); 617 } 618 }; 619 if (entryFound->second->severity() >= Entry::sevLowerLimit) 620 { 621 removeId(infoErrors, entryId); 622 } 623 else 624 { 625 removeId(realErrors, entryId); 626 } 627 entries.erase(entryFound); 628 629 checkAndRemoveBlockingError(entryId); 630 631 for (auto& remove : Extensions::getDeleteFunctions()) 632 { 633 try 634 { 635 remove(entryId); 636 } 637 catch (const std::exception& e) 638 { 639 lg2::error("An extension's delete function threw an exception: " 640 "{ERROR}", 641 "ERROR", e); 642 } 643 } 644 } 645 else 646 { 647 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId); 648 } 649 } 650 651 void Manager::restore() 652 { 653 auto sanity = [](const auto& id, const auto& restoredId) { 654 return id == restoredId; 655 }; 656 657 fs::path dir(paths::error()); 658 if (!fs::exists(dir) || fs::is_empty(dir)) 659 { 660 return; 661 } 662 663 for (auto& file : fs::directory_iterator(dir)) 664 { 665 auto id = file.path().filename().c_str(); 666 auto idNum = std::stol(id); 667 auto e = std::make_unique<Entry>( 668 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this); 669 if (deserialize(file.path(), *e)) 670 { 671 // validate the restored error entry id 672 if (sanity(static_cast<uint32_t>(idNum), e->id())) 673 { 674 e->path(file.path(), true); 675 if (e->severity() >= Entry::sevLowerLimit) 676 { 677 infoErrors.push_back(idNum); 678 } 679 else 680 { 681 realErrors.push_back(idNum); 682 } 683 684 entries.insert(std::make_pair(idNum, std::move(e))); 685 } 686 else 687 { 688 lg2::error( 689 "Failed in sanity check while restoring error entry. " 690 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.", 691 "ID_NUM", idNum, "ENTRY_ID", e->id()); 692 } 693 } 694 } 695 696 if (!entries.empty()) 697 { 698 entryId = entries.rbegin()->first; 699 } 700 } 701 702 std::string Manager::readFWVersion() 703 { 704 auto version = util::getOSReleaseValue("VERSION_ID"); 705 706 if (!version) 707 { 708 lg2::error("Unable to read BMC firmware version"); 709 } 710 711 return version.value_or(""); 712 } 713 714 auto Manager::create(const std::string& message, Entry::Level severity, 715 const std::map<std::string, std::string>& additionalData, 716 const FFDCEntries& ffdc) -> sdbusplus::message::object_path 717 { 718 return createEntry(message, severity, additionalData, ffdc); 719 } 720 721 } // namespace internal 722 } // namespace logging 723 } // namespace phosphor 724