1 #include "config.h" 2 3 #include "log_manager.hpp" 4 5 #include "elog_entry.hpp" 6 #include "elog_meta.hpp" 7 #include "elog_serialize.hpp" 8 #include "extensions.hpp" 9 #include "paths.hpp" 10 #include "util.hpp" 11 12 #include <systemd/sd-bus.h> 13 #include <systemd/sd-journal.h> 14 #include <unistd.h> 15 16 #include <phosphor-logging/lg2.hpp> 17 #include <sdbusplus/vtable.hpp> 18 #include <xyz/openbmc_project/State/Host/server.hpp> 19 20 #include <cassert> 21 #include <chrono> 22 #include <cstdio> 23 #include <cstring> 24 #include <fstream> 25 #include <functional> 26 #include <future> 27 #include <iostream> 28 #include <map> 29 #include <set> 30 #include <string> 31 #include <string_view> 32 #include <vector> 33 34 using namespace std::chrono; 35 extern const std::map< 36 phosphor::logging::metadata::Metadata, 37 std::function<phosphor::logging::metadata::associations::Type>> 38 meta; 39 40 namespace phosphor 41 { 42 namespace logging 43 { 44 namespace internal 45 { 46 inline auto getLevel(const std::string& errMsg) 47 { 48 auto reqLevel = Entry::Level::Error; // Default to Error 49 50 auto levelmap = g_errLevelMap.find(errMsg); 51 if (levelmap != g_errLevelMap.end()) 52 { 53 reqLevel = static_cast<Entry::Level>(levelmap->second); 54 } 55 56 return reqLevel; 57 } 58 59 int Manager::getRealErrSize() 60 { 61 return realErrors.size(); 62 } 63 64 int Manager::getInfoErrSize() 65 { 66 return infoErrors.size(); 67 } 68 69 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg) 70 { 71 auto level = getLevel(errMsg); 72 _commit(transactionId, std::move(errMsg), level); 73 return entryId; 74 } 75 76 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg, 77 uint32_t errLvl) 78 { 79 _commit(transactionId, std::move(errMsg), 80 static_cast<Entry::Level>(errLvl)); 81 return entryId; 82 } 83 84 void Manager::_commit(uint64_t transactionId [[maybe_unused]], 85 std::string&& errMsg, Entry::Level errLvl) 86 { 87 std::vector<std::string> additionalData{}; 88 89 // When running as a test-case, the system may have a LOT of journal 90 // data and we may not have permissions to do some of the journal sync 91 // operations. Just skip over them. 92 if (!IS_UNIT_TEST) 93 { 94 static constexpr auto transactionIdVar = 95 std::string_view{"TRANSACTION_ID"}; 96 // Length of 'TRANSACTION_ID' string. 97 static constexpr auto transactionIdVarSize = transactionIdVar.size(); 98 // Length of 'TRANSACTION_ID=' string. 99 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1; 100 101 // Flush all the pending log messages into the journal 102 util::journalSync(); 103 104 sd_journal* j = nullptr; 105 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); 106 if (rc < 0) 107 { 108 lg2::error("Failed to open journal: {ERROR}", "ERROR", 109 strerror(-rc)); 110 return; 111 } 112 113 std::string transactionIdStr = std::to_string(transactionId); 114 std::set<std::string> metalist; 115 auto metamap = g_errMetaMap.find(errMsg); 116 if (metamap != g_errMetaMap.end()) 117 { 118 metalist.insert(metamap->second.begin(), metamap->second.end()); 119 } 120 121 // Add _PID field information in AdditionalData. 122 metalist.insert("_PID"); 123 124 // Read the journal from the end to get the most recent entry first. 125 // The result from the sd_journal_get_data() is of the form 126 // VARIABLE=value. 127 SD_JOURNAL_FOREACH_BACKWARDS(j) 128 { 129 const char* data = nullptr; 130 size_t length = 0; 131 132 // Look for the transaction id metadata variable 133 rc = sd_journal_get_data(j, transactionIdVar.data(), 134 (const void**)&data, &length); 135 if (rc < 0) 136 { 137 // This journal entry does not have the TRANSACTION_ID 138 // metadata variable. 139 continue; 140 } 141 142 // journald does not guarantee that sd_journal_get_data() returns 143 // NULL terminated strings, so need to specify the size to use to 144 // compare, use the returned length instead of anything that relies 145 // on NULL terminators like strlen(). The data variable is in the 146 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID 147 // characters plus the (=) sign to do the comparison. 'data + 148 // transactionIdVarOffset' will be in the form of '1234'. 'length - 149 // transactionIdVarOffset' will be the length of '1234'. 150 if ((length <= (transactionIdVarOffset)) || 151 (transactionIdStr.compare( 152 0, transactionIdStr.size(), data + transactionIdVarOffset, 153 length - transactionIdVarOffset) != 0)) 154 { 155 // The value of the TRANSACTION_ID metadata is not the requested 156 // transaction id number. 157 continue; 158 } 159 160 // Search for all metadata variables in the current journal entry. 161 for (auto i = metalist.cbegin(); i != metalist.cend();) 162 { 163 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data, 164 &length); 165 if (rc < 0) 166 { 167 // Metadata variable not found, check next metadata 168 // variable. 169 i++; 170 continue; 171 } 172 173 // Metadata variable found, save it and remove it from the set. 174 additionalData.emplace_back(data, length); 175 i = metalist.erase(i); 176 } 177 if (metalist.empty()) 178 { 179 // All metadata variables found, break out of journal loop. 180 break; 181 } 182 } 183 if (!metalist.empty()) 184 { 185 // Not all the metadata variables were found in the journal. 186 for (auto& metaVarStr : metalist) 187 { 188 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD", 189 metaVarStr); 190 } 191 } 192 193 sd_journal_close(j); 194 } 195 createEntry(errMsg, errLvl, additionalData); 196 } 197 198 auto Manager::createEntry( 199 std::string errMsg, Entry::Level errLvl, 200 std::vector<std::string> additionalData, 201 const FFDCEntries& ffdc) -> sdbusplus::message::object_path 202 { 203 if (!Extensions::disableDefaultLogCaps()) 204 { 205 if (errLvl < Entry::sevLowerLimit) 206 { 207 if (realErrors.size() >= ERROR_CAP) 208 { 209 erase(realErrors.front()); 210 } 211 } 212 else 213 { 214 if (infoErrors.size() >= ERROR_INFO_CAP) 215 { 216 erase(infoErrors.front()); 217 } 218 } 219 } 220 221 entryId++; 222 if (errLvl >= Entry::sevLowerLimit) 223 { 224 infoErrors.push_back(entryId); 225 } 226 else 227 { 228 realErrors.push_back(entryId); 229 } 230 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>( 231 std::chrono::system_clock::now().time_since_epoch()) 232 .count(); 233 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 234 235 AssociationList objects{}; 236 processMetadata(errMsg, additionalData, objects); 237 238 auto e = std::make_unique<Entry>( 239 busLog, objPath, entryId, 240 ms, // Milliseconds since 1970 241 errLvl, std::move(errMsg), std::move(additionalData), 242 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this); 243 244 serialize(*e); 245 246 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) && 247 isCalloutPresent(*e)) 248 { 249 quiesceOnError(entryId); 250 } 251 252 // Add entry before calling the extensions so that they have access to it 253 entries.insert(std::make_pair(entryId, std::move(e))); 254 255 doExtensionLogCreate(*entries.find(entryId)->second, ffdc); 256 257 // Note: No need to close the file descriptors in the FFDC. 258 259 return objPath; 260 } 261 262 bool Manager::isQuiesceOnErrorEnabled() 263 { 264 // When running under tests, the Logging.Settings service will not be 265 // present. Assume false. 266 if (IS_UNIT_TEST) 267 { 268 return false; 269 } 270 271 std::variant<bool> property; 272 273 auto method = this->busLog.new_method_call( 274 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings", 275 "org.freedesktop.DBus.Properties", "Get"); 276 277 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError"); 278 279 try 280 { 281 auto reply = this->busLog.call(method); 282 reply.read(property); 283 } 284 catch (const sdbusplus::exception_t& e) 285 { 286 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 287 e); 288 return false; 289 } 290 291 return std::get<bool>(property); 292 } 293 294 bool Manager::isCalloutPresent(const Entry& entry) 295 { 296 for (const auto& c : entry.additionalData()) 297 { 298 if (c.find("CALLOUT_") != std::string::npos) 299 { 300 return true; 301 } 302 } 303 304 return false; 305 } 306 307 void Manager::findAndRemoveResolvedBlocks() 308 { 309 for (auto& entry : entries) 310 { 311 if (entry.second->resolved()) 312 { 313 checkAndRemoveBlockingError(entry.first); 314 } 315 } 316 } 317 318 void Manager::onEntryResolve(sdbusplus::message_t& msg) 319 { 320 using Interface = std::string; 321 using Property = std::string; 322 using Value = std::string; 323 using Properties = std::map<Property, std::variant<Value>>; 324 325 Interface interface; 326 Properties properties; 327 328 msg.read(interface, properties); 329 330 for (const auto& p : properties) 331 { 332 if (p.first == "Resolved") 333 { 334 findAndRemoveResolvedBlocks(); 335 return; 336 } 337 } 338 } 339 340 void Manager::checkAndQuiesceHost() 341 { 342 using Host = sdbusplus::server::xyz::openbmc_project::state::Host; 343 344 // First check host state 345 std::variant<Host::HostState> property; 346 347 auto method = this->busLog.new_method_call( 348 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0", 349 "org.freedesktop.DBus.Properties", "Get"); 350 351 method.append("xyz.openbmc_project.State.Host", "CurrentHostState"); 352 353 try 354 { 355 auto reply = this->busLog.call(method); 356 reply.read(property); 357 } 358 catch (const sdbusplus::exception_t& e) 359 { 360 // Quiescing the host is a "best effort" type function. If unable to 361 // read the host state or it comes back empty, just return. 362 // The boot block object will still be created and the associations to 363 // find the log will be present. Don't want a dependency with 364 // phosphor-state-manager service 365 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 366 e); 367 return; 368 } 369 370 auto hostState = std::get<Host::HostState>(property); 371 if (hostState != Host::HostState::Running) 372 { 373 return; 374 } 375 376 auto quiesce = this->busLog.new_method_call( 377 "org.freedesktop.systemd1", "/org/freedesktop/systemd1", 378 "org.freedesktop.systemd1.Manager", "StartUnit"); 379 380 quiesce.append("obmc-host-graceful-quiesce@0.target"); 381 quiesce.append("replace"); 382 383 this->busLog.call_noreply(quiesce); 384 } 385 386 void Manager::quiesceOnError(const uint32_t entryId) 387 { 388 // Verify we don't already have this entry blocking 389 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(), 390 [&](const std::unique_ptr<Block>& obj) { 391 return obj->entryId == entryId; 392 }); 393 if (it != this->blockingErrors.end()) 394 { 395 // Already recorded so just return 396 lg2::debug( 397 "QuiesceOnError set and callout present but entry already logged"); 398 return; 399 } 400 401 lg2::info("QuiesceOnError set and callout present"); 402 403 auto blockPath = 404 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId); 405 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId); 406 this->blockingErrors.push_back(std::move(blockObj)); 407 408 // Register call back if log is resolved 409 using namespace sdbusplus::bus::match::rules; 410 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 411 auto callback = std::make_unique<sdbusplus::bus::match_t>( 412 this->busLog, 413 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"), 414 std::bind(std::mem_fn(&Manager::onEntryResolve), this, 415 std::placeholders::_1)); 416 417 propChangedEntryCallback.insert( 418 std::make_pair(entryId, std::move(callback))); 419 420 checkAndQuiesceHost(); 421 } 422 423 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc) 424 { 425 // Make the association <endpointpath>/<endpointtype> paths 426 std::vector<std::string> assocs; 427 for (const auto& [forwardType, reverseType, endpoint] : 428 entry.associations()) 429 { 430 std::string e{endpoint}; 431 e += '/' + reverseType; 432 assocs.push_back(e); 433 } 434 435 for (auto& create : Extensions::getCreateFunctions()) 436 { 437 try 438 { 439 create(entry.message(), entry.id(), entry.timestamp(), 440 entry.severity(), entry.additionalData(), assocs, ffdc); 441 } 442 catch (const std::exception& e) 443 { 444 lg2::error( 445 "An extension's create function threw an exception: {ERROR}", 446 "ERROR", e); 447 } 448 } 449 } 450 451 void Manager::processMetadata(const std::string& /*errorName*/, 452 const std::vector<std::string>& additionalData, 453 AssociationList& objects) const 454 { 455 // additionalData is a list of "metadata=value" 456 constexpr auto separator = '='; 457 for (const auto& entryItem : additionalData) 458 { 459 auto found = entryItem.find(separator); 460 if (std::string::npos != found) 461 { 462 auto metadata = entryItem.substr(0, found); 463 auto iter = meta.find(metadata); 464 if (meta.end() != iter) 465 { 466 (iter->second)(metadata, additionalData, objects); 467 } 468 } 469 } 470 } 471 472 void Manager::checkAndRemoveBlockingError(uint32_t entryId) 473 { 474 // First look for blocking object and remove 475 auto it = find_if(blockingErrors.begin(), blockingErrors.end(), 476 [&](const std::unique_ptr<Block>& obj) { 477 return obj->entryId == entryId; 478 }); 479 if (it != blockingErrors.end()) 480 { 481 blockingErrors.erase(it); 482 } 483 484 // Now remove the callback looking for the error to be resolved 485 auto resolveFind = propChangedEntryCallback.find(entryId); 486 if (resolveFind != propChangedEntryCallback.end()) 487 { 488 propChangedEntryCallback.erase(resolveFind); 489 } 490 491 return; 492 } 493 494 size_t Manager::eraseAll() 495 { 496 std::vector<uint32_t> logIDWithHwIsolation; 497 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions()) 498 { 499 try 500 { 501 func(logIDWithHwIsolation); 502 } 503 catch (const std::exception& e) 504 { 505 lg2::error("An extension's LogIDWithHwIsolation function threw an " 506 "exception: {ERROR}", 507 "ERROR", e); 508 } 509 } 510 size_t entriesSize = entries.size(); 511 auto iter = entries.begin(); 512 if (logIDWithHwIsolation.empty()) 513 { 514 while (iter != entries.end()) 515 { 516 auto e = iter->first; 517 ++iter; 518 erase(e); 519 } 520 entryId = 0; 521 } 522 else 523 { 524 while (iter != entries.end()) 525 { 526 auto e = iter->first; 527 ++iter; 528 try 529 { 530 if (!std::ranges::contains(logIDWithHwIsolation, e)) 531 { 532 erase(e); 533 } 534 else 535 { 536 entriesSize--; 537 } 538 } 539 catch (const sdbusplus::xyz::openbmc_project::Common::Error:: 540 Unavailable& e) 541 { 542 entriesSize--; 543 } 544 } 545 if (!entries.empty()) 546 { 547 entryId = std::ranges::max_element(entries, [](const auto& a, 548 const auto& b) { 549 return a.first < b.first; 550 })->first; 551 } 552 else 553 { 554 entryId = 0; 555 } 556 } 557 return entriesSize; 558 } 559 560 void Manager::erase(uint32_t entryId) 561 { 562 auto entryFound = entries.find(entryId); 563 if (entries.end() != entryFound) 564 { 565 for (auto& func : Extensions::getDeleteProhibitedFunctions()) 566 { 567 try 568 { 569 bool prohibited = false; 570 func(entryId, prohibited); 571 if (prohibited) 572 { 573 throw sdbusplus::xyz::openbmc_project::Common::Error:: 574 Unavailable(); 575 } 576 } 577 catch (const sdbusplus::xyz::openbmc_project::Common::Error:: 578 Unavailable& e) 579 { 580 throw; 581 } 582 catch (const std::exception& e) 583 { 584 lg2::error("An extension's deleteProhibited function threw an " 585 "exception: {ERROR}", 586 "ERROR", e); 587 } 588 } 589 590 // Delete the persistent representation of this error. 591 fs::path errorPath(paths::error()); 592 errorPath /= std::to_string(entryId); 593 fs::remove(errorPath); 594 595 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) { 596 auto it = std::find(ids.begin(), ids.end(), id); 597 if (it != ids.end()) 598 { 599 ids.erase(it); 600 } 601 }; 602 if (entryFound->second->severity() >= Entry::sevLowerLimit) 603 { 604 removeId(infoErrors, entryId); 605 } 606 else 607 { 608 removeId(realErrors, entryId); 609 } 610 entries.erase(entryFound); 611 612 checkAndRemoveBlockingError(entryId); 613 614 for (auto& remove : Extensions::getDeleteFunctions()) 615 { 616 try 617 { 618 remove(entryId); 619 } 620 catch (const std::exception& e) 621 { 622 lg2::error("An extension's delete function threw an exception: " 623 "{ERROR}", 624 "ERROR", e); 625 } 626 } 627 } 628 else 629 { 630 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId); 631 } 632 } 633 634 void Manager::restore() 635 { 636 auto sanity = [](const auto& id, const auto& restoredId) { 637 return id == restoredId; 638 }; 639 640 fs::path dir(paths::error()); 641 if (!fs::exists(dir) || fs::is_empty(dir)) 642 { 643 return; 644 } 645 646 for (auto& file : fs::directory_iterator(dir)) 647 { 648 auto id = file.path().filename().c_str(); 649 auto idNum = std::stol(id); 650 auto e = std::make_unique<Entry>( 651 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this); 652 if (deserialize(file.path(), *e)) 653 { 654 // validate the restored error entry id 655 if (sanity(static_cast<uint32_t>(idNum), e->id())) 656 { 657 e->path(file.path(), true); 658 if (e->severity() >= Entry::sevLowerLimit) 659 { 660 infoErrors.push_back(idNum); 661 } 662 else 663 { 664 realErrors.push_back(idNum); 665 } 666 667 entries.insert(std::make_pair(idNum, std::move(e))); 668 } 669 else 670 { 671 lg2::error( 672 "Failed in sanity check while restoring error entry. " 673 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.", 674 "ID_NUM", idNum, "ENTRY_ID", e->id()); 675 } 676 } 677 } 678 679 if (!entries.empty()) 680 { 681 entryId = entries.rbegin()->first; 682 } 683 } 684 685 std::string Manager::readFWVersion() 686 { 687 auto version = util::getOSReleaseValue("VERSION_ID"); 688 689 if (!version) 690 { 691 lg2::error("Unable to read BMC firmware version"); 692 } 693 694 return version.value_or(""); 695 } 696 697 auto Manager::create(const std::string& message, Entry::Level severity, 698 const std::map<std::string, std::string>& additionalData, 699 const FFDCEntries& ffdc) -> sdbusplus::message::object_path 700 { 701 // Convert the map into a vector of "key=value" strings 702 std::vector<std::string> ad; 703 metadata::associations::combine(additionalData, ad); 704 705 return createEntry(message, severity, ad, ffdc); 706 } 707 708 } // namespace internal 709 } // namespace logging 710 } // namespace phosphor 711