1 #include "config.h" 2 3 #include "log_manager.hpp" 4 5 #include "elog_entry.hpp" 6 #include "elog_meta.hpp" 7 #include "elog_serialize.hpp" 8 #include "extensions.hpp" 9 #include "util.hpp" 10 11 #include <systemd/sd-bus.h> 12 #include <systemd/sd-journal.h> 13 #include <unistd.h> 14 15 #include <phosphor-logging/lg2.hpp> 16 #include <sdbusplus/vtable.hpp> 17 #include <xyz/openbmc_project/State/Host/server.hpp> 18 19 #include <cassert> 20 #include <chrono> 21 #include <cstdio> 22 #include <cstring> 23 #include <fstream> 24 #include <functional> 25 #include <future> 26 #include <iostream> 27 #include <map> 28 #include <set> 29 #include <string> 30 #include <string_view> 31 #include <vector> 32 33 using namespace std::chrono; 34 extern const std::map< 35 phosphor::logging::metadata::Metadata, 36 std::function<phosphor::logging::metadata::associations::Type>> 37 meta; 38 39 namespace phosphor 40 { 41 namespace logging 42 { 43 namespace internal 44 { 45 inline auto getLevel(const std::string& errMsg) 46 { 47 auto reqLevel = Entry::Level::Error; // Default to Error 48 49 auto levelmap = g_errLevelMap.find(errMsg); 50 if (levelmap != g_errLevelMap.end()) 51 { 52 reqLevel = static_cast<Entry::Level>(levelmap->second); 53 } 54 55 return reqLevel; 56 } 57 58 int Manager::getRealErrSize() 59 { 60 return realErrors.size(); 61 } 62 63 int Manager::getInfoErrSize() 64 { 65 return infoErrors.size(); 66 } 67 68 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg) 69 { 70 auto level = getLevel(errMsg); 71 _commit(transactionId, std::move(errMsg), level); 72 return entryId; 73 } 74 75 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg, 76 uint32_t errLvl) 77 { 78 _commit(transactionId, std::move(errMsg), 79 static_cast<Entry::Level>(errLvl)); 80 return entryId; 81 } 82 83 void Manager::_commit(uint64_t transactionId [[maybe_unused]], 84 std::string&& errMsg, Entry::Level errLvl) 85 { 86 std::vector<std::string> additionalData{}; 87 88 // When running as a test-case, the system may have a LOT of journal 89 // data and we may not have permissions to do some of the journal sync 90 // operations. Just skip over them. 91 if (!IS_UNIT_TEST) 92 { 93 static constexpr auto transactionIdVar = 94 std::string_view{"TRANSACTION_ID"}; 95 // Length of 'TRANSACTION_ID' string. 96 static constexpr auto transactionIdVarSize = transactionIdVar.size(); 97 // Length of 'TRANSACTION_ID=' string. 98 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1; 99 100 // Flush all the pending log messages into the journal 101 util::journalSync(); 102 103 sd_journal* j = nullptr; 104 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); 105 if (rc < 0) 106 { 107 lg2::error("Failed to open journal: {ERROR}", "ERROR", 108 strerror(-rc)); 109 return; 110 } 111 112 std::string transactionIdStr = std::to_string(transactionId); 113 std::set<std::string> metalist; 114 auto metamap = g_errMetaMap.find(errMsg); 115 if (metamap != g_errMetaMap.end()) 116 { 117 metalist.insert(metamap->second.begin(), metamap->second.end()); 118 } 119 120 // Add _PID field information in AdditionalData. 121 metalist.insert("_PID"); 122 123 // Read the journal from the end to get the most recent entry first. 124 // The result from the sd_journal_get_data() is of the form 125 // VARIABLE=value. 126 SD_JOURNAL_FOREACH_BACKWARDS(j) 127 { 128 const char* data = nullptr; 129 size_t length = 0; 130 131 // Look for the transaction id metadata variable 132 rc = sd_journal_get_data(j, transactionIdVar.data(), 133 (const void**)&data, &length); 134 if (rc < 0) 135 { 136 // This journal entry does not have the TRANSACTION_ID 137 // metadata variable. 138 continue; 139 } 140 141 // journald does not guarantee that sd_journal_get_data() returns 142 // NULL terminated strings, so need to specify the size to use to 143 // compare, use the returned length instead of anything that relies 144 // on NULL terminators like strlen(). The data variable is in the 145 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID 146 // characters plus the (=) sign to do the comparison. 'data + 147 // transactionIdVarOffset' will be in the form of '1234'. 'length - 148 // transactionIdVarOffset' will be the length of '1234'. 149 if ((length <= (transactionIdVarOffset)) || 150 (transactionIdStr.compare( 151 0, transactionIdStr.size(), data + transactionIdVarOffset, 152 length - transactionIdVarOffset) != 0)) 153 { 154 // The value of the TRANSACTION_ID metadata is not the requested 155 // transaction id number. 156 continue; 157 } 158 159 // Search for all metadata variables in the current journal entry. 160 for (auto i = metalist.cbegin(); i != metalist.cend();) 161 { 162 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data, 163 &length); 164 if (rc < 0) 165 { 166 // Metadata variable not found, check next metadata 167 // variable. 168 i++; 169 continue; 170 } 171 172 // Metadata variable found, save it and remove it from the set. 173 additionalData.emplace_back(data, length); 174 i = metalist.erase(i); 175 } 176 if (metalist.empty()) 177 { 178 // All metadata variables found, break out of journal loop. 179 break; 180 } 181 } 182 if (!metalist.empty()) 183 { 184 // Not all the metadata variables were found in the journal. 185 for (auto& metaVarStr : metalist) 186 { 187 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD", 188 metaVarStr); 189 } 190 } 191 192 sd_journal_close(j); 193 } 194 createEntry(errMsg, errLvl, additionalData); 195 } 196 197 void Manager::createEntry(std::string errMsg, Entry::Level errLvl, 198 std::vector<std::string> additionalData, 199 const FFDCEntries& ffdc) 200 { 201 if (!Extensions::disableDefaultLogCaps()) 202 { 203 if (errLvl < Entry::sevLowerLimit) 204 { 205 if (realErrors.size() >= ERROR_CAP) 206 { 207 erase(realErrors.front()); 208 } 209 } 210 else 211 { 212 if (infoErrors.size() >= ERROR_INFO_CAP) 213 { 214 erase(infoErrors.front()); 215 } 216 } 217 } 218 219 entryId++; 220 if (errLvl >= Entry::sevLowerLimit) 221 { 222 infoErrors.push_back(entryId); 223 } 224 else 225 { 226 realErrors.push_back(entryId); 227 } 228 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>( 229 std::chrono::system_clock::now().time_since_epoch()) 230 .count(); 231 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 232 233 AssociationList objects{}; 234 processMetadata(errMsg, additionalData, objects); 235 236 auto e = std::make_unique<Entry>( 237 busLog, objPath, entryId, 238 ms, // Milliseconds since 1970 239 errLvl, std::move(errMsg), std::move(additionalData), 240 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this); 241 242 serialize(*e); 243 244 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) && 245 isCalloutPresent(*e)) 246 { 247 quiesceOnError(entryId); 248 } 249 250 // Add entry before calling the extensions so that they have access to it 251 entries.insert(std::make_pair(entryId, std::move(e))); 252 253 doExtensionLogCreate(*entries.find(entryId)->second, ffdc); 254 255 // Note: No need to close the file descriptors in the FFDC. 256 } 257 258 bool Manager::isQuiesceOnErrorEnabled() 259 { 260 // When running under tests, the Logging.Settings service will not be 261 // present. Assume false. 262 if (IS_UNIT_TEST) 263 { 264 return false; 265 } 266 267 std::variant<bool> property; 268 269 auto method = this->busLog.new_method_call( 270 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings", 271 "org.freedesktop.DBus.Properties", "Get"); 272 273 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError"); 274 275 try 276 { 277 auto reply = this->busLog.call(method); 278 reply.read(property); 279 } 280 catch (const sdbusplus::exception_t& e) 281 { 282 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 283 e); 284 throw; 285 } 286 287 return std::get<bool>(property); 288 } 289 290 bool Manager::isCalloutPresent(const Entry& entry) 291 { 292 for (const auto& c : entry.additionalData()) 293 { 294 if (c.find("CALLOUT_") != std::string::npos) 295 { 296 return true; 297 } 298 } 299 300 return false; 301 } 302 303 void Manager::findAndRemoveResolvedBlocks() 304 { 305 for (auto& entry : entries) 306 { 307 if (entry.second->resolved()) 308 { 309 checkAndRemoveBlockingError(entry.first); 310 } 311 } 312 } 313 314 void Manager::onEntryResolve(sdbusplus::message_t& msg) 315 { 316 using Interface = std::string; 317 using Property = std::string; 318 using Value = std::string; 319 using Properties = std::map<Property, std::variant<Value>>; 320 321 Interface interface; 322 Properties properties; 323 324 msg.read(interface, properties); 325 326 for (const auto& p : properties) 327 { 328 if (p.first == "Resolved") 329 { 330 findAndRemoveResolvedBlocks(); 331 return; 332 } 333 } 334 } 335 336 void Manager::checkAndQuiesceHost() 337 { 338 using Host = sdbusplus::xyz::openbmc_project::State::server::Host; 339 340 // First check host state 341 std::variant<Host::HostState> property; 342 343 auto method = this->busLog.new_method_call( 344 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0", 345 "org.freedesktop.DBus.Properties", "Get"); 346 347 method.append("xyz.openbmc_project.State.Host", "CurrentHostState"); 348 349 try 350 { 351 auto reply = this->busLog.call(method); 352 reply.read(property); 353 } 354 catch (const sdbusplus::exception_t& e) 355 { 356 // Quiescing the host is a "best effort" type function. If unable to 357 // read the host state or it comes back empty, just return. 358 // The boot block object will still be created and the associations to 359 // find the log will be present. Don't want a dependency with 360 // phosphor-state-manager service 361 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR", 362 e); 363 return; 364 } 365 366 auto hostState = std::get<Host::HostState>(property); 367 if (hostState != Host::HostState::Running) 368 { 369 return; 370 } 371 372 auto quiesce = this->busLog.new_method_call( 373 "org.freedesktop.systemd1", "/org/freedesktop/systemd1", 374 "org.freedesktop.systemd1.Manager", "StartUnit"); 375 376 quiesce.append("obmc-host-graceful-quiesce@0.target"); 377 quiesce.append("replace"); 378 379 this->busLog.call_noreply(quiesce); 380 } 381 382 void Manager::quiesceOnError(const uint32_t entryId) 383 { 384 // Verify we don't already have this entry blocking 385 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(), 386 [&](const std::unique_ptr<Block>& obj) { 387 return obj->entryId == entryId; 388 }); 389 if (it != this->blockingErrors.end()) 390 { 391 // Already recorded so just return 392 lg2::debug( 393 "QuiesceOnError set and callout present but entry already logged"); 394 return; 395 } 396 397 lg2::info("QuiesceOnError set and callout present"); 398 399 auto blockPath = std::string(OBJ_LOGGING) + "/block" + 400 std::to_string(entryId); 401 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId); 402 this->blockingErrors.push_back(std::move(blockObj)); 403 404 // Register call back if log is resolved 405 using namespace sdbusplus::bus::match::rules; 406 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 407 auto callback = std::make_unique<sdbusplus::bus::match_t>( 408 this->busLog, 409 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"), 410 std::bind(std::mem_fn(&Manager::onEntryResolve), this, 411 std::placeholders::_1)); 412 413 propChangedEntryCallback.insert( 414 std::make_pair(entryId, std::move(callback))); 415 416 checkAndQuiesceHost(); 417 } 418 419 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc) 420 { 421 // Make the association <endpointpath>/<endpointtype> paths 422 std::vector<std::string> assocs; 423 for (const auto& [forwardType, reverseType, endpoint] : 424 entry.associations()) 425 { 426 std::string e{endpoint}; 427 e += '/' + reverseType; 428 assocs.push_back(e); 429 } 430 431 for (auto& create : Extensions::getCreateFunctions()) 432 { 433 try 434 { 435 create(entry.message(), entry.id(), entry.timestamp(), 436 entry.severity(), entry.additionalData(), assocs, ffdc); 437 } 438 catch (const std::exception& e) 439 { 440 lg2::error( 441 "An extension's create function threw an exception: {ERROR}", 442 "ERROR", e); 443 } 444 } 445 } 446 447 void Manager::processMetadata(const std::string& /*errorName*/, 448 const std::vector<std::string>& additionalData, 449 AssociationList& objects) const 450 { 451 // additionalData is a list of "metadata=value" 452 constexpr auto separator = '='; 453 for (const auto& entryItem : additionalData) 454 { 455 auto found = entryItem.find(separator); 456 if (std::string::npos != found) 457 { 458 auto metadata = entryItem.substr(0, found); 459 auto iter = meta.find(metadata); 460 if (meta.end() != iter) 461 { 462 (iter->second)(metadata, additionalData, objects); 463 } 464 } 465 } 466 } 467 468 void Manager::checkAndRemoveBlockingError(uint32_t entryId) 469 { 470 // First look for blocking object and remove 471 auto it = find_if(blockingErrors.begin(), blockingErrors.end(), 472 [&](const std::unique_ptr<Block>& obj) { 473 return obj->entryId == entryId; 474 }); 475 if (it != blockingErrors.end()) 476 { 477 blockingErrors.erase(it); 478 } 479 480 // Now remove the callback looking for the error to be resolved 481 auto resolveFind = propChangedEntryCallback.find(entryId); 482 if (resolveFind != propChangedEntryCallback.end()) 483 { 484 propChangedEntryCallback.erase(resolveFind); 485 } 486 487 return; 488 } 489 490 void Manager::erase(uint32_t entryId) 491 { 492 auto entryFound = entries.find(entryId); 493 if (entries.end() != entryFound) 494 { 495 for (auto& func : Extensions::getDeleteProhibitedFunctions()) 496 { 497 try 498 { 499 bool prohibited = false; 500 func(entryId, prohibited); 501 if (prohibited) 502 { 503 // Future work remains to throw an error here. 504 return; 505 } 506 } 507 catch (const std::exception& e) 508 { 509 lg2::error("An extension's deleteProhibited function threw an " 510 "exception: {ERROR}", 511 "ERROR", e); 512 } 513 } 514 515 // Delete the persistent representation of this error. 516 fs::path errorPath(ERRLOG_PERSIST_PATH); 517 errorPath /= std::to_string(entryId); 518 fs::remove(errorPath); 519 520 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) { 521 auto it = std::find(ids.begin(), ids.end(), id); 522 if (it != ids.end()) 523 { 524 ids.erase(it); 525 } 526 }; 527 if (entryFound->second->severity() >= Entry::sevLowerLimit) 528 { 529 removeId(infoErrors, entryId); 530 } 531 else 532 { 533 removeId(realErrors, entryId); 534 } 535 entries.erase(entryFound); 536 537 checkAndRemoveBlockingError(entryId); 538 539 for (auto& remove : Extensions::getDeleteFunctions()) 540 { 541 try 542 { 543 remove(entryId); 544 } 545 catch (const std::exception& e) 546 { 547 lg2::error("An extension's delete function threw an exception: " 548 "{ERROR}", 549 "ERROR", e); 550 } 551 } 552 } 553 else 554 { 555 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId); 556 } 557 } 558 559 void Manager::restore() 560 { 561 auto sanity = [](const auto& id, const auto& restoredId) { 562 return id == restoredId; 563 }; 564 565 fs::path dir(ERRLOG_PERSIST_PATH); 566 if (!fs::exists(dir) || fs::is_empty(dir)) 567 { 568 return; 569 } 570 571 for (auto& file : fs::directory_iterator(dir)) 572 { 573 auto id = file.path().filename().c_str(); 574 auto idNum = std::stol(id); 575 auto e = std::make_unique<Entry>( 576 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this); 577 if (deserialize(file.path(), *e)) 578 { 579 // validate the restored error entry id 580 if (sanity(static_cast<uint32_t>(idNum), e->id())) 581 { 582 e->path(file.path(), true); 583 if (e->severity() >= Entry::sevLowerLimit) 584 { 585 infoErrors.push_back(idNum); 586 } 587 else 588 { 589 realErrors.push_back(idNum); 590 } 591 592 entries.insert(std::make_pair(idNum, std::move(e))); 593 } 594 else 595 { 596 lg2::error( 597 "Failed in sanity check while restoring error entry. " 598 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.", 599 "ID_NUM", idNum, "ENTRY_ID", e->id()); 600 } 601 } 602 } 603 604 if (!entries.empty()) 605 { 606 entryId = entries.rbegin()->first; 607 } 608 } 609 610 std::string Manager::readFWVersion() 611 { 612 auto version = util::getOSReleaseValue("VERSION_ID"); 613 614 if (!version) 615 { 616 lg2::error("Unable to read BMC firmware version"); 617 } 618 619 return version.value_or(""); 620 } 621 622 void Manager::create(const std::string& message, Entry::Level severity, 623 const std::map<std::string, std::string>& additionalData) 624 { 625 // Convert the map into a vector of "key=value" strings 626 std::vector<std::string> ad; 627 metadata::associations::combine(additionalData, ad); 628 629 createEntry(message, severity, ad); 630 } 631 632 void Manager::createWithFFDC( 633 const std::string& message, Entry::Level severity, 634 const std::map<std::string, std::string>& additionalData, 635 const FFDCEntries& ffdc) 636 { 637 // Convert the map into a vector of "key=value" strings 638 std::vector<std::string> ad; 639 metadata::associations::combine(additionalData, ad); 640 641 createEntry(message, severity, ad, ffdc); 642 } 643 644 } // namespace internal 645 } // namespace logging 646 } // namespace phosphor 647