1 #include "config.h" 2 3 #include "log_manager.hpp" 4 5 #include "elog_entry.hpp" 6 #include "elog_meta.hpp" 7 #include "elog_serialize.hpp" 8 #include "extensions.hpp" 9 #include "lib/lg2_commit.hpp" 10 #include "paths.hpp" 11 #include "util.hpp" 12 13 #include <systemd/sd-bus.h> 14 #include <systemd/sd-journal.h> 15 #include <unistd.h> 16 17 #include <phosphor-logging/lg2.hpp> 18 #include <sdbusplus/vtable.hpp> 19 #include <xyz/openbmc_project/State/Host/server.hpp> 20 21 #include <cassert> 22 #include <chrono> 23 #include <cstdio> 24 #include <cstring> 25 #include <fstream> 26 #include <functional> 27 #include <future> 28 #include <iostream> 29 #include <map> 30 #include <set> 31 #include <string> 32 #include <string_view> 33 #include <vector> 34 35 using namespace std::chrono; 36 extern const std::map< 37 phosphor::logging::metadata::Metadata, 38 std::function<phosphor::logging::metadata::associations::Type>> 39 meta; 40 41 namespace phosphor 42 { 43 namespace logging 44 { 45 namespace internal 46 { 47 inline auto getLevel(const std::string& errMsg) 48 { 49 auto reqLevel = Entry::Level::Error; // Default to Error 50 51 auto levelmap = g_errLevelMap.find(errMsg); 52 if (levelmap != g_errLevelMap.end()) 53 { 54 reqLevel = static_cast<Entry::Level>(levelmap->second); 55 } 56 57 return reqLevel; 58 } 59 60 int Manager::getRealErrSize() 61 { 62 return realErrors.size(); 63 } 64 65 int Manager::getInfoErrSize() 66 { 67 return infoErrors.size(); 68 } 69 70 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg) 71 { 72 auto level = getLevel(errMsg); 73 _commit(transactionId, std::move(errMsg), level); 74 return entryId; 75 } 76 77 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg, 78 uint32_t errLvl) 79 { 80 _commit(transactionId, std::move(errMsg), 81 static_cast<Entry::Level>(errLvl)); 82 return entryId; 83 } 84 85 void Manager::_commit(uint64_t transactionId [[maybe_unused]], 86 std::string&& errMsg, Entry::Level errLvl) 87 { 88 std::vector<std::string> additionalData{}; 89 90 // When running as a test-case, the system may have a LOT of journal 91 // data and we may not have permissions to do some of the journal sync 92 // operations. Just skip over them. 93 if (!IS_UNIT_TEST) 94 { 95 static constexpr auto transactionIdVar = 96 std::string_view{"TRANSACTION_ID"}; 97 // Length of 'TRANSACTION_ID' string. 98 static constexpr auto transactionIdVarSize = transactionIdVar.size(); 99 // Length of 'TRANSACTION_ID=' string. 100 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1; 101 102 // Flush all the pending log messages into the journal 103 util::journalSync(); 104 105 sd_journal* j = nullptr; 106 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); 107 if (rc < 0) 108 { 109 lg2::error("Failed to open journal: {ERROR}", "ERROR", 110 strerror(-rc)); 111 return; 112 } 113 114 std::string transactionIdStr = std::to_string(transactionId); 115 std::set<std::string> metalist; 116 auto metamap = g_errMetaMap.find(errMsg); 117 if (metamap != g_errMetaMap.end()) 118 { 119 metalist.insert(metamap->second.begin(), metamap->second.end()); 120 } 121 122 // Add _PID field information in AdditionalData. 123 metalist.insert("_PID"); 124 125 // Read the journal from the end to get the most recent entry first. 126 // The result from the sd_journal_get_data() is of the form 127 // VARIABLE=value. 128 SD_JOURNAL_FOREACH_BACKWARDS(j) 129 { 130 const char* data = nullptr; 131 size_t length = 0; 132 133 // Look for the transaction id metadata variable 134 rc = sd_journal_get_data(j, transactionIdVar.data(), 135 (const void**)&data, &length); 136 if (rc < 0) 137 { 138 // This journal entry does not have the TRANSACTION_ID 139 // metadata variable. 140 continue; 141 } 142 143 // journald does not guarantee that sd_journal_get_data() returns 144 // NULL terminated strings, so need to specify the size to use to 145 // compare, use the returned length instead of anything that relies 146 // on NULL terminators like strlen(). The data variable is in the 147 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID 148 // characters plus the (=) sign to do the comparison. 'data + 149 // transactionIdVarOffset' will be in the form of '1234'. 'length - 150 // transactionIdVarOffset' will be the length of '1234'. 151 if ((length <= (transactionIdVarOffset)) || 152 (transactionIdStr.compare( 153 0, transactionIdStr.size(), data + transactionIdVarOffset, 154 length - transactionIdVarOffset) != 0)) 155 { 156 // The value of the TRANSACTION_ID metadata is not the requested 157 // transaction id number. 158 continue; 159 } 160 161 // Search for all metadata variables in the current journal entry. 162 for (auto i = metalist.cbegin(); i != metalist.cend();) 163 { 164 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data, 165 &length); 166 if (rc < 0) 167 { 168 // Metadata variable not found, check next metadata 169 // variable. 170 i++; 171 continue; 172 } 173 174 // Metadata variable found, save it and remove it from the set. 175 additionalData.emplace_back(data, length); 176 i = metalist.erase(i); 177 } 178 if (metalist.empty()) 179 { 180 // All metadata variables found, break out of journal loop. 181 break; 182 } 183 } 184 if (!metalist.empty()) 185 { 186 // Not all the metadata variables were found in the journal. 187 for (auto& metaVarStr : metalist) 188 { 189 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD", 190 metaVarStr); 191 } 192 } 193 194 sd_journal_close(j); 195 } 196 createEntry(errMsg, errLvl, additionalData); 197 } 198 199 auto Manager::createEntry( 200 std::string errMsg, Entry::Level errLvl, 201 std::vector<std::string> additionalData, 202 const FFDCEntries& ffdc) -> sdbusplus::message::object_path 203 { 204 if (!Extensions::disableDefaultLogCaps()) 205 { 206 if (errLvl < Entry::sevLowerLimit) 207 { 208 if (realErrors.size() >= ERROR_CAP) 209 { 210 erase(realErrors.front()); 211 } 212 } 213 else 214 { 215 if (infoErrors.size() >= ERROR_INFO_CAP) 216 { 217 erase(infoErrors.front()); 218 } 219 } 220 } 221 222 entryId++; 223 if (errLvl >= Entry::sevLowerLimit) 224 { 225 infoErrors.push_back(entryId); 226 } 227 else 228 { 229 realErrors.push_back(entryId); 230 } 231 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>( 232 std::chrono::system_clock::now().time_since_epoch()) 233 .count(); 234 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 235 236 AssociationList objects{}; 237 processMetadata(errMsg, additionalData, objects); 238 239 auto e = std::make_unique<Entry>( 240 busLog, objPath, entryId, 241 ms, // Milliseconds since 1970 242 errLvl, std::move(errMsg), std::move(additionalData), 243 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this); 244 245 serialize(*e); 246 247 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) && 248 isCalloutPresent(*e)) 249 { 250 quiesceOnError(entryId); 251 } 252 253 // Add entry before calling the extensions so that they have access to it 254 entries.insert(std::make_pair(entryId, std::move(e))); 255 256 doExtensionLogCreate(*entries.find(entryId)->second, ffdc); 257 258 // Note: No need to close the file descriptors in the FFDC. 259 260 return objPath; 261 } 262 263 auto Manager::createFromEvent(sdbusplus::exception::generated_event_base&& 264 event) -> sdbusplus::message::object_path 265 { 266 auto [msg, level, data] = lg2::details::extractEvent(std::move(event)); 267 return this->createEntry(msg, level, std::move(data)); 268 } 269 270 bool Manager::isQuiesceOnErrorEnabled() 271 { 272 // When running under tests, the Logging.Settings service will not be 273 // present. Assume false. 274 if (IS_UNIT_TEST) 275 { 276 return false; 277 } 278 279 std::variant<bool> property; 280 281 auto method = this->busLog.new_method_call( 282 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings", 283 "org.freedesktop.DBus.Properties", "Get"); 284 285 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError"); 286 287 try 288 { 289 auto reply = this->busLog.call(method); 290 reply.read(property); 291 } 292 catch (const sdbusplus::exception_t& e) 293 { 294 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 295 e); 296 return false; 297 } 298 299 return std::get<bool>(property); 300 } 301 302 bool Manager::isCalloutPresent(const Entry& entry) 303 { 304 for (const auto& c : entry.additionalData()) 305 { 306 if (c.find("CALLOUT_") != std::string::npos) 307 { 308 return true; 309 } 310 } 311 312 return false; 313 } 314 315 void Manager::findAndRemoveResolvedBlocks() 316 { 317 for (auto& entry : entries) 318 { 319 if (entry.second->resolved()) 320 { 321 checkAndRemoveBlockingError(entry.first); 322 } 323 } 324 } 325 326 void Manager::onEntryResolve(sdbusplus::message_t& msg) 327 { 328 using Interface = std::string; 329 using Property = std::string; 330 using Value = std::string; 331 using Properties = std::map<Property, std::variant<Value>>; 332 333 Interface interface; 334 Properties properties; 335 336 msg.read(interface, properties); 337 338 for (const auto& p : properties) 339 { 340 if (p.first == "Resolved") 341 { 342 findAndRemoveResolvedBlocks(); 343 return; 344 } 345 } 346 } 347 348 void Manager::checkAndQuiesceHost() 349 { 350 using Host = sdbusplus::server::xyz::openbmc_project::state::Host; 351 352 // First check host state 353 std::variant<Host::HostState> property; 354 355 auto method = this->busLog.new_method_call( 356 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0", 357 "org.freedesktop.DBus.Properties", "Get"); 358 359 method.append("xyz.openbmc_project.State.Host", "CurrentHostState"); 360 361 try 362 { 363 auto reply = this->busLog.call(method); 364 reply.read(property); 365 } 366 catch (const sdbusplus::exception_t& e) 367 { 368 // Quiescing the host is a "best effort" type function. If unable to 369 // read the host state or it comes back empty, just return. 370 // The boot block object will still be created and the associations to 371 // find the log will be present. Don't want a dependency with 372 // phosphor-state-manager service 373 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 374 e); 375 return; 376 } 377 378 auto hostState = std::get<Host::HostState>(property); 379 if (hostState != Host::HostState::Running) 380 { 381 return; 382 } 383 384 auto quiesce = this->busLog.new_method_call( 385 "org.freedesktop.systemd1", "/org/freedesktop/systemd1", 386 "org.freedesktop.systemd1.Manager", "StartUnit"); 387 388 quiesce.append("obmc-host-graceful-quiesce@0.target"); 389 quiesce.append("replace"); 390 391 this->busLog.call_noreply(quiesce); 392 } 393 394 void Manager::quiesceOnError(const uint32_t entryId) 395 { 396 // Verify we don't already have this entry blocking 397 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(), 398 [&](const std::unique_ptr<Block>& obj) { 399 return obj->entryId == entryId; 400 }); 401 if (it != this->blockingErrors.end()) 402 { 403 // Already recorded so just return 404 lg2::debug( 405 "QuiesceOnError set and callout present but entry already logged"); 406 return; 407 } 408 409 lg2::info("QuiesceOnError set and callout present"); 410 411 auto blockPath = 412 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId); 413 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId); 414 this->blockingErrors.push_back(std::move(blockObj)); 415 416 // Register call back if log is resolved 417 using namespace sdbusplus::bus::match::rules; 418 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 419 auto callback = std::make_unique<sdbusplus::bus::match_t>( 420 this->busLog, 421 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"), 422 std::bind(std::mem_fn(&Manager::onEntryResolve), this, 423 std::placeholders::_1)); 424 425 propChangedEntryCallback.insert( 426 std::make_pair(entryId, std::move(callback))); 427 428 checkAndQuiesceHost(); 429 } 430 431 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc) 432 { 433 // Make the association <endpointpath>/<endpointtype> paths 434 std::vector<std::string> assocs; 435 for (const auto& [forwardType, reverseType, endpoint] : 436 entry.associations()) 437 { 438 std::string e{endpoint}; 439 e += '/' + reverseType; 440 assocs.push_back(e); 441 } 442 443 for (auto& create : Extensions::getCreateFunctions()) 444 { 445 try 446 { 447 create(entry.message(), entry.id(), entry.timestamp(), 448 entry.severity(), entry.additionalData(), assocs, ffdc); 449 } 450 catch (const std::exception& e) 451 { 452 lg2::error( 453 "An extension's create function threw an exception: {ERROR}", 454 "ERROR", e); 455 } 456 } 457 } 458 459 void Manager::processMetadata(const std::string& /*errorName*/, 460 const std::vector<std::string>& additionalData, 461 AssociationList& objects) const 462 { 463 // additionalData is a list of "metadata=value" 464 constexpr auto separator = '='; 465 for (const auto& entryItem : additionalData) 466 { 467 auto found = entryItem.find(separator); 468 if (std::string::npos != found) 469 { 470 auto metadata = entryItem.substr(0, found); 471 auto iter = meta.find(metadata); 472 if (meta.end() != iter) 473 { 474 (iter->second)(metadata, additionalData, objects); 475 } 476 } 477 } 478 } 479 480 void Manager::checkAndRemoveBlockingError(uint32_t entryId) 481 { 482 // First look for blocking object and remove 483 auto it = find_if(blockingErrors.begin(), blockingErrors.end(), 484 [&](const std::unique_ptr<Block>& obj) { 485 return obj->entryId == entryId; 486 }); 487 if (it != blockingErrors.end()) 488 { 489 blockingErrors.erase(it); 490 } 491 492 // Now remove the callback looking for the error to be resolved 493 auto resolveFind = propChangedEntryCallback.find(entryId); 494 if (resolveFind != propChangedEntryCallback.end()) 495 { 496 propChangedEntryCallback.erase(resolveFind); 497 } 498 499 return; 500 } 501 502 size_t Manager::eraseAll() 503 { 504 std::vector<uint32_t> logIDWithHwIsolation; 505 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions()) 506 { 507 try 508 { 509 func(logIDWithHwIsolation); 510 } 511 catch (const std::exception& e) 512 { 513 lg2::error("An extension's LogIDWithHwIsolation function threw an " 514 "exception: {ERROR}", 515 "ERROR", e); 516 } 517 } 518 size_t entriesSize = entries.size(); 519 auto iter = entries.begin(); 520 if (logIDWithHwIsolation.empty()) 521 { 522 while (iter != entries.end()) 523 { 524 auto e = iter->first; 525 ++iter; 526 erase(e); 527 } 528 entryId = 0; 529 } 530 else 531 { 532 while (iter != entries.end()) 533 { 534 auto e = iter->first; 535 ++iter; 536 try 537 { 538 if (!std::ranges::contains(logIDWithHwIsolation, e)) 539 { 540 erase(e); 541 } 542 else 543 { 544 entriesSize--; 545 } 546 } 547 catch (const sdbusplus::xyz::openbmc_project::Common::Error:: 548 Unavailable& e) 549 { 550 entriesSize--; 551 } 552 } 553 if (!entries.empty()) 554 { 555 entryId = std::ranges::max_element(entries, [](const auto& a, 556 const auto& b) { 557 return a.first < b.first; 558 })->first; 559 } 560 else 561 { 562 entryId = 0; 563 } 564 } 565 return entriesSize; 566 } 567 568 void Manager::erase(uint32_t entryId) 569 { 570 auto entryFound = entries.find(entryId); 571 if (entries.end() != entryFound) 572 { 573 for (auto& func : Extensions::getDeleteProhibitedFunctions()) 574 { 575 try 576 { 577 bool prohibited = false; 578 func(entryId, prohibited); 579 if (prohibited) 580 { 581 throw sdbusplus::xyz::openbmc_project::Common::Error:: 582 Unavailable(); 583 } 584 } 585 catch (const sdbusplus::xyz::openbmc_project::Common::Error:: 586 Unavailable& e) 587 { 588 throw; 589 } 590 catch (const std::exception& e) 591 { 592 lg2::error("An extension's deleteProhibited function threw an " 593 "exception: {ERROR}", 594 "ERROR", e); 595 } 596 } 597 598 // Delete the persistent representation of this error. 599 fs::path errorPath(paths::error()); 600 errorPath /= std::to_string(entryId); 601 fs::remove(errorPath); 602 603 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) { 604 auto it = std::find(ids.begin(), ids.end(), id); 605 if (it != ids.end()) 606 { 607 ids.erase(it); 608 } 609 }; 610 if (entryFound->second->severity() >= Entry::sevLowerLimit) 611 { 612 removeId(infoErrors, entryId); 613 } 614 else 615 { 616 removeId(realErrors, entryId); 617 } 618 entries.erase(entryFound); 619 620 checkAndRemoveBlockingError(entryId); 621 622 for (auto& remove : Extensions::getDeleteFunctions()) 623 { 624 try 625 { 626 remove(entryId); 627 } 628 catch (const std::exception& e) 629 { 630 lg2::error("An extension's delete function threw an exception: " 631 "{ERROR}", 632 "ERROR", e); 633 } 634 } 635 } 636 else 637 { 638 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId); 639 } 640 } 641 642 void Manager::restore() 643 { 644 auto sanity = [](const auto& id, const auto& restoredId) { 645 return id == restoredId; 646 }; 647 648 fs::path dir(paths::error()); 649 if (!fs::exists(dir) || fs::is_empty(dir)) 650 { 651 return; 652 } 653 654 for (auto& file : fs::directory_iterator(dir)) 655 { 656 auto id = file.path().filename().c_str(); 657 auto idNum = std::stol(id); 658 auto e = std::make_unique<Entry>( 659 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this); 660 if (deserialize(file.path(), *e)) 661 { 662 // validate the restored error entry id 663 if (sanity(static_cast<uint32_t>(idNum), e->id())) 664 { 665 e->path(file.path(), true); 666 if (e->severity() >= Entry::sevLowerLimit) 667 { 668 infoErrors.push_back(idNum); 669 } 670 else 671 { 672 realErrors.push_back(idNum); 673 } 674 675 entries.insert(std::make_pair(idNum, std::move(e))); 676 } 677 else 678 { 679 lg2::error( 680 "Failed in sanity check while restoring error entry. " 681 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.", 682 "ID_NUM", idNum, "ENTRY_ID", e->id()); 683 } 684 } 685 } 686 687 if (!entries.empty()) 688 { 689 entryId = entries.rbegin()->first; 690 } 691 } 692 693 std::string Manager::readFWVersion() 694 { 695 auto version = util::getOSReleaseValue("VERSION_ID"); 696 697 if (!version) 698 { 699 lg2::error("Unable to read BMC firmware version"); 700 } 701 702 return version.value_or(""); 703 } 704 705 auto Manager::create(const std::string& message, Entry::Level severity, 706 const std::map<std::string, std::string>& additionalData, 707 const FFDCEntries& ffdc) -> sdbusplus::message::object_path 708 { 709 // Convert the map into a vector of "key=value" strings 710 auto ad = util::additional_data::combine(additionalData); 711 712 return createEntry(message, severity, ad, ffdc); 713 } 714 715 } // namespace internal 716 } // namespace logging 717 } // namespace phosphor 718