1 #include "config.h" 2 3 #include "log_manager.hpp" 4 5 #include "elog_entry.hpp" 6 #include "elog_meta.hpp" 7 #include "elog_serialize.hpp" 8 #include "extensions.hpp" 9 #include "lib/lg2_commit.hpp" 10 #include "paths.hpp" 11 #include "util.hpp" 12 13 #include <systemd/sd-bus.h> 14 #include <systemd/sd-journal.h> 15 #include <unistd.h> 16 17 #include <phosphor-logging/lg2.hpp> 18 #include <sdbusplus/vtable.hpp> 19 #include <xyz/openbmc_project/State/Host/server.hpp> 20 21 #include <cassert> 22 #include <chrono> 23 #include <cstdio> 24 #include <cstring> 25 #include <functional> 26 #include <map> 27 #include <ranges> 28 #include <set> 29 #include <string> 30 #include <string_view> 31 #include <vector> 32 33 using namespace std::chrono; 34 extern const std::map< 35 phosphor::logging::metadata::Metadata, 36 std::function<phosphor::logging::metadata::associations::Type>> 37 meta; 38 39 namespace phosphor 40 { 41 namespace logging 42 { 43 namespace internal 44 { 45 inline auto getLevel(const std::string& errMsg) 46 { 47 auto reqLevel = Entry::Level::Error; // Default to Error 48 49 auto levelmap = g_errLevelMap.find(errMsg); 50 if (levelmap != g_errLevelMap.end()) 51 { 52 reqLevel = static_cast<Entry::Level>(levelmap->second); 53 } 54 55 return reqLevel; 56 } 57 58 int Manager::getRealErrSize() 59 { 60 return realErrors.size(); 61 } 62 63 int Manager::getInfoErrSize() 64 { 65 return infoErrors.size(); 66 } 67 68 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg) 69 { 70 auto level = getLevel(errMsg); 71 _commit(transactionId, std::move(errMsg), level); 72 return entryId; 73 } 74 75 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg, 76 uint32_t errLvl) 77 { 78 _commit(transactionId, std::move(errMsg), 79 static_cast<Entry::Level>(errLvl)); 80 return entryId; 81 } 82 83 void Manager::_commit(uint64_t transactionId [[maybe_unused]], 84 std::string&& errMsg, Entry::Level errLvl) 85 { 86 std::map<std::string, std::string> additionalData{}; 87 88 // When running as a test-case, the system may have a LOT of journal 89 // data and we may not have permissions to do some of the journal sync 90 // operations. Just skip over them. 91 if (!IS_UNIT_TEST) 92 { 93 static constexpr auto transactionIdVar = 94 std::string_view{"TRANSACTION_ID"}; 95 // Length of 'TRANSACTION_ID' string. 96 static constexpr auto transactionIdVarSize = transactionIdVar.size(); 97 // Length of 'TRANSACTION_ID=' string. 98 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1; 99 100 // Flush all the pending log messages into the journal 101 util::journalSync(); 102 103 sd_journal* j = nullptr; 104 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); 105 if (rc < 0) 106 { 107 lg2::error("Failed to open journal: {ERROR}", "ERROR", 108 strerror(-rc)); 109 return; 110 } 111 112 std::string transactionIdStr = std::to_string(transactionId); 113 std::set<std::string> metalist; 114 auto metamap = g_errMetaMap.find(errMsg); 115 if (metamap != g_errMetaMap.end()) 116 { 117 metalist.insert(metamap->second.begin(), metamap->second.end()); 118 } 119 120 // Add _PID field information in AdditionalData. 121 metalist.insert("_PID"); 122 123 // Read the journal from the end to get the most recent entry first. 124 // The result from the sd_journal_get_data() is of the form 125 // VARIABLE=value. 126 SD_JOURNAL_FOREACH_BACKWARDS(j) 127 { 128 const char* data = nullptr; 129 size_t length = 0; 130 131 // Look for the transaction id metadata variable 132 rc = sd_journal_get_data(j, transactionIdVar.data(), 133 (const void**)&data, &length); 134 if (rc < 0) 135 { 136 // This journal entry does not have the TRANSACTION_ID 137 // metadata variable. 138 continue; 139 } 140 141 // journald does not guarantee that sd_journal_get_data() returns 142 // NULL terminated strings, so need to specify the size to use to 143 // compare, use the returned length instead of anything that relies 144 // on NULL terminators like strlen(). The data variable is in the 145 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID 146 // characters plus the (=) sign to do the comparison. 'data + 147 // transactionIdVarOffset' will be in the form of '1234'. 'length - 148 // transactionIdVarOffset' will be the length of '1234'. 149 if ((length <= (transactionIdVarOffset)) || 150 (transactionIdStr.compare( 151 0, transactionIdStr.size(), data + transactionIdVarOffset, 152 length - transactionIdVarOffset) != 0)) 153 { 154 // The value of the TRANSACTION_ID metadata is not the requested 155 // transaction id number. 156 continue; 157 } 158 159 // Search for all metadata variables in the current journal entry. 160 for (auto i = metalist.cbegin(); i != metalist.cend();) 161 { 162 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data, 163 &length); 164 if (rc < 0) 165 { 166 // Metadata variable not found, check next metadata 167 // variable. 168 i++; 169 continue; 170 } 171 172 // Metadata variable found, save it and remove it from the set. 173 std::string metadata(data, length); 174 if (auto pos = metadata.find('='); pos != std::string::npos) 175 { 176 auto key = metadata.substr(0, pos); 177 auto value = metadata.substr(pos + 1); 178 additionalData.emplace(std::move(key), std::move(value)); 179 } 180 i = metalist.erase(i); 181 } 182 if (metalist.empty()) 183 { 184 // All metadata variables found, break out of journal loop. 185 break; 186 } 187 } 188 if (!metalist.empty()) 189 { 190 // Not all the metadata variables were found in the journal. 191 for (auto& metaVarStr : metalist) 192 { 193 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD", 194 metaVarStr); 195 } 196 } 197 198 sd_journal_close(j); 199 } 200 createEntry(errMsg, errLvl, additionalData); 201 } 202 203 auto Manager::createEntry(std::string errMsg, Entry::Level errLvl, 204 std::map<std::string, std::string> additionalData, 205 const FFDCEntries& ffdc) 206 -> sdbusplus::message::object_path 207 { 208 if (!Extensions::disableDefaultLogCaps()) 209 { 210 if (errLvl < Entry::sevLowerLimit) 211 { 212 if (realErrors.size() >= ERROR_CAP) 213 { 214 erase(realErrors.front()); 215 } 216 } 217 else 218 { 219 if (infoErrors.size() >= ERROR_INFO_CAP) 220 { 221 erase(infoErrors.front()); 222 } 223 } 224 } 225 226 entryId++; 227 if (errLvl >= Entry::sevLowerLimit) 228 { 229 infoErrors.push_back(entryId); 230 } 231 else 232 { 233 realErrors.push_back(entryId); 234 } 235 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>( 236 std::chrono::system_clock::now().time_since_epoch()) 237 .count(); 238 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 239 240 AssociationList objects{}; 241 auto additionalDataVec = util::additional_data::combine(additionalData); 242 processMetadata(errMsg, additionalDataVec, objects); 243 244 auto e = std::make_unique<Entry>( 245 busLog, objPath, entryId, 246 ms, // Milliseconds since 1970 247 errLvl, std::move(errMsg), std::move(additionalData), 248 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this); 249 250 serialize(*e); 251 252 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) && 253 isCalloutPresent(*e)) 254 { 255 quiesceOnError(entryId); 256 } 257 258 // Add entry before calling the extensions so that they have access to it 259 entries.insert(std::make_pair(entryId, std::move(e))); 260 261 doExtensionLogCreate(*entries.find(entryId)->second, ffdc); 262 263 // Note: No need to close the file descriptors in the FFDC. 264 265 return objPath; 266 } 267 268 auto Manager::createFromEvent( 269 sdbusplus::exception::generated_event_base&& event) 270 -> sdbusplus::message::object_path 271 { 272 auto [msg, level, data] = lg2::details::extractEvent(std::move(event)); 273 return this->createEntry(msg, level, std::move(data)); 274 } 275 276 bool Manager::isQuiesceOnErrorEnabled() 277 { 278 // When running under tests, the Logging.Settings service will not be 279 // present. Assume false. 280 if (IS_UNIT_TEST) 281 { 282 return false; 283 } 284 285 std::variant<bool> property; 286 287 auto method = this->busLog.new_method_call( 288 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings", 289 "org.freedesktop.DBus.Properties", "Get"); 290 291 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError"); 292 293 try 294 { 295 auto reply = this->busLog.call(method); 296 reply.read(property); 297 } 298 catch (const sdbusplus::exception_t& e) 299 { 300 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 301 e); 302 return false; 303 } 304 305 return std::get<bool>(property); 306 } 307 308 bool Manager::isCalloutPresent(const Entry& entry) 309 { 310 for (const auto& c : std::views::keys(entry.additionalData())) 311 { 312 if (c.find("CALLOUT_") != std::string::npos) 313 { 314 return true; 315 } 316 } 317 318 return false; 319 } 320 321 void Manager::findAndRemoveResolvedBlocks() 322 { 323 for (auto& entry : entries) 324 { 325 if (entry.second->resolved()) 326 { 327 checkAndRemoveBlockingError(entry.first); 328 } 329 } 330 } 331 332 void Manager::onEntryResolve(sdbusplus::message_t& msg) 333 { 334 using Interface = std::string; 335 using Property = std::string; 336 using Value = std::string; 337 using Properties = std::map<Property, std::variant<Value>>; 338 339 Interface interface; 340 Properties properties; 341 342 msg.read(interface, properties); 343 344 for (const auto& p : properties) 345 { 346 if (p.first == "Resolved") 347 { 348 findAndRemoveResolvedBlocks(); 349 return; 350 } 351 } 352 } 353 354 void Manager::checkAndQuiesceHost() 355 { 356 using Host = sdbusplus::server::xyz::openbmc_project::state::Host; 357 358 // First check host state 359 std::variant<Host::HostState> property; 360 361 auto method = this->busLog.new_method_call( 362 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0", 363 "org.freedesktop.DBus.Properties", "Get"); 364 365 method.append("xyz.openbmc_project.State.Host", "CurrentHostState"); 366 367 try 368 { 369 auto reply = this->busLog.call(method); 370 reply.read(property); 371 } 372 catch (const sdbusplus::exception_t& e) 373 { 374 // Quiescing the host is a "best effort" type function. If unable to 375 // read the host state or it comes back empty, just return. 376 // The boot block object will still be created and the associations to 377 // find the log will be present. Don't want a dependency with 378 // phosphor-state-manager service 379 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 380 e); 381 return; 382 } 383 384 auto hostState = std::get<Host::HostState>(property); 385 if (hostState != Host::HostState::Running) 386 { 387 return; 388 } 389 390 auto quiesce = this->busLog.new_method_call( 391 "org.freedesktop.systemd1", "/org/freedesktop/systemd1", 392 "org.freedesktop.systemd1.Manager", "StartUnit"); 393 394 quiesce.append("obmc-host-graceful-quiesce@0.target"); 395 quiesce.append("replace"); 396 397 this->busLog.call_noreply(quiesce); 398 } 399 400 void Manager::quiesceOnError(const uint32_t entryId) 401 { 402 // Verify we don't already have this entry blocking 403 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(), 404 [&](const std::unique_ptr<Block>& obj) { 405 return obj->entryId == entryId; 406 }); 407 if (it != this->blockingErrors.end()) 408 { 409 // Already recorded so just return 410 lg2::debug( 411 "QuiesceOnError set and callout present but entry already logged"); 412 return; 413 } 414 415 lg2::info("QuiesceOnError set and callout present"); 416 417 auto blockPath = 418 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId); 419 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId); 420 this->blockingErrors.push_back(std::move(blockObj)); 421 422 // Register call back if log is resolved 423 using namespace sdbusplus::bus::match::rules; 424 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 425 auto callback = std::make_unique<sdbusplus::bus::match_t>( 426 this->busLog, 427 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"), 428 std::bind(std::mem_fn(&Manager::onEntryResolve), this, 429 std::placeholders::_1)); 430 431 propChangedEntryCallback.insert( 432 std::make_pair(entryId, std::move(callback))); 433 434 checkAndQuiesceHost(); 435 } 436 437 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc) 438 { 439 // Make the association <endpointpath>/<endpointtype> paths 440 std::vector<std::string> assocs; 441 for (const auto& [forwardType, reverseType, endpoint] : 442 entry.associations()) 443 { 444 std::string e{endpoint}; 445 e += '/' + reverseType; 446 assocs.push_back(e); 447 } 448 449 for (auto& create : Extensions::getCreateFunctions()) 450 { 451 try 452 { 453 create(entry.message(), entry.id(), entry.timestamp(), 454 entry.severity(), entry.additionalData(), assocs, ffdc); 455 } 456 catch (const std::exception& e) 457 { 458 lg2::error( 459 "An extension's create function threw an exception: {ERROR}", 460 "ERROR", e); 461 } 462 } 463 } 464 465 void Manager::processMetadata(const std::string& /*errorName*/, 466 const std::vector<std::string>& additionalData, 467 AssociationList& objects) const 468 { 469 // additionalData is a list of "metadata=value" 470 constexpr auto separator = '='; 471 for (const auto& entryItem : additionalData) 472 { 473 auto found = entryItem.find(separator); 474 if (std::string::npos != found) 475 { 476 auto metadata = entryItem.substr(0, found); 477 auto iter = meta.find(metadata); 478 if (meta.end() != iter) 479 { 480 (iter->second)(metadata, additionalData, objects); 481 } 482 } 483 } 484 } 485 486 void Manager::checkAndRemoveBlockingError(uint32_t entryId) 487 { 488 // First look for blocking object and remove 489 auto it = find_if(blockingErrors.begin(), blockingErrors.end(), 490 [&](const std::unique_ptr<Block>& obj) { 491 return obj->entryId == entryId; 492 }); 493 if (it != blockingErrors.end()) 494 { 495 blockingErrors.erase(it); 496 } 497 498 // Now remove the callback looking for the error to be resolved 499 auto resolveFind = propChangedEntryCallback.find(entryId); 500 if (resolveFind != propChangedEntryCallback.end()) 501 { 502 propChangedEntryCallback.erase(resolveFind); 503 } 504 505 return; 506 } 507 508 size_t Manager::eraseAll() 509 { 510 std::vector<uint32_t> logIDWithHwIsolation; 511 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions()) 512 { 513 try 514 { 515 func(logIDWithHwIsolation); 516 } 517 catch (const std::exception& e) 518 { 519 lg2::error("An extension's LogIDWithHwIsolation function threw an " 520 "exception: {ERROR}", 521 "ERROR", e); 522 } 523 } 524 size_t entriesSize = entries.size(); 525 auto iter = entries.begin(); 526 if (logIDWithHwIsolation.empty()) 527 { 528 while (iter != entries.end()) 529 { 530 auto e = iter->first; 531 ++iter; 532 erase(e); 533 } 534 entryId = 0; 535 } 536 else 537 { 538 while (iter != entries.end()) 539 { 540 auto e = iter->first; 541 ++iter; 542 try 543 { 544 if (!std::ranges::contains(logIDWithHwIsolation, e)) 545 { 546 erase(e); 547 } 548 else 549 { 550 entriesSize--; 551 } 552 } 553 catch (const sdbusplus::xyz::openbmc_project::Common::Error:: 554 Unavailable& e) 555 { 556 entriesSize--; 557 } 558 } 559 if (!entries.empty()) 560 { 561 entryId = std::ranges::max_element(entries, [](const auto& a, 562 const auto& b) { 563 return a.first < b.first; 564 })->first; 565 } 566 else 567 { 568 entryId = 0; 569 } 570 } 571 return entriesSize; 572 } 573 574 void Manager::erase(uint32_t entryId) 575 { 576 auto entryFound = entries.find(entryId); 577 if (entries.end() != entryFound) 578 { 579 for (auto& func : Extensions::getDeleteProhibitedFunctions()) 580 { 581 try 582 { 583 bool prohibited = false; 584 func(entryId, prohibited); 585 if (prohibited) 586 { 587 throw sdbusplus::xyz::openbmc_project::Common::Error:: 588 Unavailable(); 589 } 590 } 591 catch (const sdbusplus::xyz::openbmc_project::Common::Error:: 592 Unavailable& e) 593 { 594 throw; 595 } 596 catch (const std::exception& e) 597 { 598 lg2::error("An extension's deleteProhibited function threw an " 599 "exception: {ERROR}", 600 "ERROR", e); 601 } 602 } 603 604 // Delete the persistent representation of this error. 605 fs::path errorPath(paths::error()); 606 errorPath /= std::to_string(entryId); 607 fs::remove(errorPath); 608 609 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) { 610 auto it = std::find(ids.begin(), ids.end(), id); 611 if (it != ids.end()) 612 { 613 ids.erase(it); 614 } 615 }; 616 if (entryFound->second->severity() >= Entry::sevLowerLimit) 617 { 618 removeId(infoErrors, entryId); 619 } 620 else 621 { 622 removeId(realErrors, entryId); 623 } 624 entries.erase(entryFound); 625 626 checkAndRemoveBlockingError(entryId); 627 628 for (auto& remove : Extensions::getDeleteFunctions()) 629 { 630 try 631 { 632 remove(entryId); 633 } 634 catch (const std::exception& e) 635 { 636 lg2::error("An extension's delete function threw an exception: " 637 "{ERROR}", 638 "ERROR", e); 639 } 640 } 641 } 642 else 643 { 644 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId); 645 } 646 } 647 648 void Manager::restore() 649 { 650 auto sanity = [](const auto& id, const auto& restoredId) { 651 return id == restoredId; 652 }; 653 654 fs::path dir(paths::error()); 655 if (!fs::exists(dir) || fs::is_empty(dir)) 656 { 657 return; 658 } 659 660 for (auto& file : fs::directory_iterator(dir)) 661 { 662 auto id = file.path().filename().c_str(); 663 auto idNum = std::stol(id); 664 auto e = std::make_unique<Entry>( 665 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this); 666 if (deserialize(file.path(), *e)) 667 { 668 // validate the restored error entry id 669 if (sanity(static_cast<uint32_t>(idNum), e->id())) 670 { 671 e->path(file.path(), true); 672 if (e->severity() >= Entry::sevLowerLimit) 673 { 674 infoErrors.push_back(idNum); 675 } 676 else 677 { 678 realErrors.push_back(idNum); 679 } 680 681 entries.insert(std::make_pair(idNum, std::move(e))); 682 } 683 else 684 { 685 lg2::error( 686 "Failed in sanity check while restoring error entry. " 687 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.", 688 "ID_NUM", idNum, "ENTRY_ID", e->id()); 689 } 690 } 691 } 692 693 if (!entries.empty()) 694 { 695 entryId = entries.rbegin()->first; 696 } 697 } 698 699 std::string Manager::readFWVersion() 700 { 701 auto version = util::getOSReleaseValue("VERSION_ID"); 702 703 if (!version) 704 { 705 lg2::error("Unable to read BMC firmware version"); 706 } 707 708 return version.value_or(""); 709 } 710 711 auto Manager::create(const std::string& message, Entry::Level severity, 712 const std::map<std::string, std::string>& additionalData, 713 const FFDCEntries& ffdc) -> sdbusplus::message::object_path 714 { 715 return createEntry(message, severity, additionalData, ffdc); 716 } 717 718 } // namespace internal 719 } // namespace logging 720 } // namespace phosphor 721