1 #include "config.h" 2 3 #include "log_manager.hpp" 4 5 #include "elog_entry.hpp" 6 #include "elog_meta.hpp" 7 #include "elog_serialize.hpp" 8 #include "extensions.hpp" 9 10 #include <poll.h> 11 #include <sys/inotify.h> 12 #include <systemd/sd-bus.h> 13 #include <systemd/sd-journal.h> 14 #include <unistd.h> 15 16 #include <cassert> 17 #include <chrono> 18 #include <cstdio> 19 #include <cstring> 20 #include <fstream> 21 #include <functional> 22 #include <future> 23 #include <iostream> 24 #include <map> 25 #include <phosphor-logging/log.hpp> 26 #include <sdbusplus/vtable.hpp> 27 #include <set> 28 #include <string> 29 #include <vector> 30 31 using namespace phosphor::logging; 32 using namespace std::chrono; 33 extern const std::map<metadata::Metadata, 34 std::function<metadata::associations::Type>> 35 meta; 36 37 namespace phosphor 38 { 39 namespace logging 40 { 41 namespace internal 42 { 43 inline auto getLevel(const std::string& errMsg) 44 { 45 auto reqLevel = Entry::Level::Error; // Default to Error 46 47 auto levelmap = g_errLevelMap.find(errMsg); 48 if (levelmap != g_errLevelMap.end()) 49 { 50 reqLevel = static_cast<Entry::Level>(levelmap->second); 51 } 52 53 return reqLevel; 54 } 55 56 int Manager::getRealErrSize() 57 { 58 return realErrors.size(); 59 } 60 61 int Manager::getInfoErrSize() 62 { 63 return infoErrors.size(); 64 } 65 66 void Manager::commit(uint64_t transactionId, std::string errMsg) 67 { 68 auto level = getLevel(errMsg); 69 _commit(transactionId, std::move(errMsg), level); 70 } 71 72 void Manager::commitWithLvl(uint64_t transactionId, std::string errMsg, 73 uint32_t errLvl) 74 { 75 _commit(transactionId, std::move(errMsg), 76 static_cast<Entry::Level>(errLvl)); 77 } 78 79 void Manager::_commit(uint64_t transactionId, std::string&& errMsg, 80 Entry::Level errLvl) 81 { 82 constexpr const auto transactionIdVar = "TRANSACTION_ID"; 83 // Length of 'TRANSACTION_ID' string. 84 constexpr const auto transactionIdVarSize = std::strlen(transactionIdVar); 85 // Length of 'TRANSACTION_ID=' string. 86 constexpr const auto transactionIdVarOffset = transactionIdVarSize + 1; 87 88 // Flush all the pending log messages into the journal 89 journalSync(); 90 91 sd_journal* j = nullptr; 92 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY); 93 if (rc < 0) 94 { 95 logging::log<logging::level::ERR>( 96 "Failed to open journal", 97 logging::entry("DESCRIPTION=%s", strerror(-rc))); 98 return; 99 } 100 101 std::string transactionIdStr = std::to_string(transactionId); 102 std::set<std::string> metalist; 103 auto metamap = g_errMetaMap.find(errMsg); 104 if (metamap != g_errMetaMap.end()) 105 { 106 metalist.insert(metamap->second.begin(), metamap->second.end()); 107 } 108 109 // Add _PID field information in AdditionalData. 110 metalist.insert("_PID"); 111 112 std::vector<std::string> additionalData; 113 114 // Read the journal from the end to get the most recent entry first. 115 // The result from the sd_journal_get_data() is of the form VARIABLE=value. 116 SD_JOURNAL_FOREACH_BACKWARDS(j) 117 { 118 const char* data = nullptr; 119 size_t length = 0; 120 121 // Look for the transaction id metadata variable 122 rc = sd_journal_get_data(j, transactionIdVar, (const void**)&data, 123 &length); 124 if (rc < 0) 125 { 126 // This journal entry does not have the TRANSACTION_ID 127 // metadata variable. 128 continue; 129 } 130 131 // journald does not guarantee that sd_journal_get_data() returns NULL 132 // terminated strings, so need to specify the size to use to compare, 133 // use the returned length instead of anything that relies on NULL 134 // terminators like strlen(). 135 // The data variable is in the form of 'TRANSACTION_ID=1234'. Remove 136 // the TRANSACTION_ID characters plus the (=) sign to do the comparison. 137 // 'data + transactionIdVarOffset' will be in the form of '1234'. 138 // 'length - transactionIdVarOffset' will be the length of '1234'. 139 if ((length <= (transactionIdVarOffset)) || 140 (transactionIdStr.compare(0, transactionIdStr.size(), 141 data + transactionIdVarOffset, 142 length - transactionIdVarOffset) != 0)) 143 { 144 // The value of the TRANSACTION_ID metadata is not the requested 145 // transaction id number. 146 continue; 147 } 148 149 // Search for all metadata variables in the current journal entry. 150 for (auto i = metalist.cbegin(); i != metalist.cend();) 151 { 152 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data, 153 &length); 154 if (rc < 0) 155 { 156 // Metadata variable not found, check next metadata variable. 157 i++; 158 continue; 159 } 160 161 // Metadata variable found, save it and remove it from the set. 162 additionalData.emplace_back(data, length); 163 i = metalist.erase(i); 164 } 165 if (metalist.empty()) 166 { 167 // All metadata variables found, break out of journal loop. 168 break; 169 } 170 } 171 if (!metalist.empty()) 172 { 173 // Not all the metadata variables were found in the journal. 174 for (auto& metaVarStr : metalist) 175 { 176 logging::log<logging::level::INFO>( 177 "Failed to find metadata", 178 logging::entry("META_FIELD=%s", metaVarStr.c_str())); 179 } 180 } 181 182 sd_journal_close(j); 183 184 createEntry(errMsg, errLvl, additionalData); 185 } 186 187 void Manager::createEntry(std::string errMsg, Entry::Level errLvl, 188 std::vector<std::string> additionalData) 189 { 190 if (!Extensions::disableDefaultLogCaps()) 191 { 192 if (errLvl < Entry::sevLowerLimit) 193 { 194 if (realErrors.size() >= ERROR_CAP) 195 { 196 erase(realErrors.front()); 197 } 198 } 199 else 200 { 201 if (infoErrors.size() >= ERROR_INFO_CAP) 202 { 203 erase(infoErrors.front()); 204 } 205 } 206 } 207 208 entryId++; 209 if (errLvl >= Entry::sevLowerLimit) 210 { 211 infoErrors.push_back(entryId); 212 } 213 else 214 { 215 realErrors.push_back(entryId); 216 } 217 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>( 218 std::chrono::system_clock::now().time_since_epoch()) 219 .count(); 220 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId); 221 222 AssociationList objects{}; 223 processMetadata(errMsg, additionalData, objects); 224 225 auto e = std::make_unique<Entry>(busLog, objPath, entryId, 226 ms, // Milliseconds since 1970 227 errLvl, std::move(errMsg), 228 std::move(additionalData), 229 std::move(objects), fwVersion, *this); 230 serialize(*e); 231 232 doExtensionLogCreate(*e); 233 234 entries.insert(std::make_pair(entryId, std::move(e))); 235 } 236 237 void Manager::doExtensionLogCreate(const Entry& entry) 238 { 239 // Make the association <endpointpath>/<endpointtype> paths 240 std::vector<std::string> assocs; 241 for (const auto& [forwardType, reverseType, endpoint] : 242 entry.associations()) 243 { 244 std::string e{endpoint}; 245 e += '/' + reverseType; 246 assocs.push_back(e); 247 } 248 249 for (auto& create : Extensions::getCreateFunctions()) 250 { 251 try 252 { 253 create(entry.message(), entry.id(), entry.timestamp(), 254 entry.severity(), entry.additionalData(), assocs); 255 } 256 catch (std::exception& e) 257 { 258 log<level::ERR>("An extension's create function threw an exception", 259 phosphor::logging::entry("ERROR=%s", e.what())); 260 } 261 } 262 } 263 264 void Manager::processMetadata(const std::string& errorName, 265 const std::vector<std::string>& additionalData, 266 AssociationList& objects) const 267 { 268 // additionalData is a list of "metadata=value" 269 constexpr auto separator = '='; 270 for (const auto& entryItem : additionalData) 271 { 272 auto found = entryItem.find(separator); 273 if (std::string::npos != found) 274 { 275 auto metadata = entryItem.substr(0, found); 276 auto iter = meta.find(metadata); 277 if (meta.end() != iter) 278 { 279 (iter->second)(metadata, additionalData, objects); 280 } 281 } 282 } 283 } 284 285 void Manager::erase(uint32_t entryId) 286 { 287 auto entryFound = entries.find(entryId); 288 if (entries.end() != entryFound) 289 { 290 for (auto& func : Extensions::getDeleteProhibitedFunctions()) 291 { 292 try 293 { 294 bool prohibited = false; 295 func(entryId, prohibited); 296 if (prohibited) 297 { 298 // Future work remains to throw an error here. 299 return; 300 } 301 } 302 catch (std::exception& e) 303 { 304 log<level::ERR>( 305 "An extension's deleteProhibited function threw " 306 "an exception", 307 entry("ERROR=%s", e.what())); 308 } 309 } 310 311 // Delete the persistent representation of this error. 312 fs::path errorPath(ERRLOG_PERSIST_PATH); 313 errorPath /= std::to_string(entryId); 314 fs::remove(errorPath); 315 316 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) { 317 auto it = std::find(ids.begin(), ids.end(), id); 318 if (it != ids.end()) 319 { 320 ids.erase(it); 321 } 322 }; 323 if (entryFound->second->severity() >= Entry::sevLowerLimit) 324 { 325 removeId(infoErrors, entryId); 326 } 327 else 328 { 329 removeId(realErrors, entryId); 330 } 331 entries.erase(entryFound); 332 333 for (auto& remove : Extensions::getDeleteFunctions()) 334 { 335 try 336 { 337 remove(entryId); 338 } 339 catch (std::exception& e) 340 { 341 log<level::ERR>("An extension's delete function threw an " 342 "exception", 343 entry("ERROR=%s", e.what())); 344 } 345 } 346 } 347 else 348 { 349 logging::log<level::ERR>("Invalid entry ID to delete", 350 logging::entry("ID=%d", entryId)); 351 } 352 } 353 354 void Manager::restore() 355 { 356 auto sanity = [](const auto& id, const auto& restoredId) { 357 return id == restoredId; 358 }; 359 std::vector<uint32_t> errorIds; 360 361 fs::path dir(ERRLOG_PERSIST_PATH); 362 if (!fs::exists(dir) || fs::is_empty(dir)) 363 { 364 return; 365 } 366 367 for (auto& file : fs::directory_iterator(dir)) 368 { 369 auto id = file.path().filename().c_str(); 370 auto idNum = std::stol(id); 371 auto e = std::make_unique<Entry>( 372 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this); 373 if (deserialize(file.path(), *e)) 374 { 375 // validate the restored error entry id 376 if (sanity(static_cast<uint32_t>(idNum), e->id())) 377 { 378 e->emit_object_added(); 379 if (e->severity() >= Entry::sevLowerLimit) 380 { 381 infoErrors.push_back(idNum); 382 } 383 else 384 { 385 realErrors.push_back(idNum); 386 } 387 388 entries.insert(std::make_pair(idNum, std::move(e))); 389 errorIds.push_back(idNum); 390 } 391 else 392 { 393 logging::log<logging::level::ERR>( 394 "Failed in sanity check while restoring error entry. " 395 "Ignoring error entry", 396 logging::entry("ID_NUM=%d", idNum), 397 logging::entry("ENTRY_ID=%d", e->id())); 398 } 399 } 400 } 401 402 if (!errorIds.empty()) 403 { 404 entryId = *(std::max_element(errorIds.begin(), errorIds.end())); 405 } 406 } 407 408 void Manager::journalSync() 409 { 410 bool syncRequested = false; 411 auto fd = -1; 412 auto rc = -1; 413 auto wd = -1; 414 auto bus = sdbusplus::bus::new_default(); 415 416 auto start = 417 duration_cast<microseconds>(steady_clock::now().time_since_epoch()) 418 .count(); 419 420 // Each time an error log is committed, a request to sync the journal 421 // must occur and block that error log commit until it completes. A 5sec 422 // block is done to allow sufficient time for the journal to be synced. 423 // 424 // Number of loop iterations = 3 for the following reasons: 425 // Iteration #1: Requests a journal sync by killing the journald service. 426 // Iteration #2: Setup an inotify watch to monitor the synced file that 427 // journald updates with the timestamp the last time the 428 // journal was flushed. 429 // Iteration #3: Poll to wait until inotify reports an event which blocks 430 // the error log from being commited until the sync completes. 431 constexpr auto maxRetry = 3; 432 for (int i = 0; i < maxRetry; i++) 433 { 434 // Read timestamp from synced file 435 constexpr auto syncedPath = "/run/systemd/journal/synced"; 436 std::ifstream syncedFile(syncedPath); 437 if (syncedFile.fail()) 438 { 439 // If the synced file doesn't exist, a sync request will create it. 440 if (errno != ENOENT) 441 { 442 log<level::ERR>("Failed to open journal synced file", 443 entry("FILENAME=%s", syncedPath), 444 entry("ERRNO=%d", errno)); 445 return; 446 } 447 } 448 else 449 { 450 // Only read the synced file if it exists. 451 // See if a sync happened by now 452 std::string timestampStr; 453 std::getline(syncedFile, timestampStr); 454 auto timestamp = std::stoll(timestampStr); 455 if (timestamp >= start) 456 { 457 break; 458 } 459 } 460 461 // Let's ask for a sync, but only once 462 if (!syncRequested) 463 { 464 syncRequested = true; 465 466 constexpr auto JOURNAL_UNIT = "systemd-journald.service"; 467 auto signal = SIGRTMIN + 1; 468 469 auto method = bus.new_method_call(SYSTEMD_BUSNAME, SYSTEMD_PATH, 470 SYSTEMD_INTERFACE, "KillUnit"); 471 method.append(JOURNAL_UNIT, "main", signal); 472 bus.call(method); 473 if (method.is_method_error()) 474 { 475 log<level::ERR>("Failed to kill journal service"); 476 break; 477 } 478 continue; 479 } 480 481 // Let's install the inotify watch, if we didn't do that yet. This watch 482 // monitors the syncedFile for when journald updates it with a newer 483 // timestamp. This means the journal has been flushed. 484 if (fd < 0) 485 { 486 fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC); 487 if (fd < 0) 488 { 489 log<level::ERR>("Failed to create inotify watch", 490 entry("ERRNO=%d", errno)); 491 return; 492 } 493 494 constexpr auto JOURNAL_RUN_PATH = "/run/systemd/journal"; 495 wd = inotify_add_watch(fd, JOURNAL_RUN_PATH, 496 IN_MOVED_TO | IN_DONT_FOLLOW | IN_ONLYDIR); 497 if (wd < 0) 498 { 499 log<level::ERR>("Failed to watch journal directory", 500 entry("PATH=%s", JOURNAL_RUN_PATH), 501 entry("ERRNO=%d", errno)); 502 close(fd); 503 return; 504 } 505 continue; 506 } 507 508 // Let's wait until inotify reports an event 509 struct pollfd fds = { 510 .fd = fd, 511 .events = POLLIN, 512 }; 513 constexpr auto pollTimeout = 5; // 5 seconds 514 rc = poll(&fds, 1, pollTimeout * 1000); 515 if (rc < 0) 516 { 517 log<level::ERR>("Failed to add event", entry("ERRNO=%d", errno), 518 entry("ERR=%s", strerror(-rc))); 519 inotify_rm_watch(fd, wd); 520 close(fd); 521 return; 522 } 523 else if (rc == 0) 524 { 525 log<level::INFO>("Poll timeout, no new journal synced data", 526 entry("TIMEOUT=%d", pollTimeout)); 527 break; 528 } 529 530 // Read from the specified file descriptor until there is no new data, 531 // throwing away everything read since the timestamp will be read at the 532 // beginning of the loop. 533 constexpr auto maxBytes = 64; 534 uint8_t buffer[maxBytes]; 535 while (read(fd, buffer, maxBytes) > 0) 536 ; 537 } 538 539 if (fd != -1) 540 { 541 if (wd != -1) 542 { 543 inotify_rm_watch(fd, wd); 544 } 545 close(fd); 546 } 547 548 return; 549 } 550 551 std::string Manager::readFWVersion() 552 { 553 std::string version; 554 std::ifstream versionFile{BMC_VERSION_FILE}; 555 std::string line; 556 static constexpr auto VERSION_ID = "VERSION_ID="; 557 558 while (std::getline(versionFile, line)) 559 { 560 if (line.find(VERSION_ID) != std::string::npos) 561 { 562 auto pos = line.find_first_of('"') + 1; 563 version = line.substr(pos, line.find_last_of('"') - pos); 564 break; 565 } 566 } 567 568 if (version.empty()) 569 { 570 log<level::ERR>("Unable to read BMC firmware version"); 571 } 572 573 return version; 574 } 575 576 void Manager::create(const std::string& message, Entry::Level severity, 577 const std::map<std::string, std::string>& additionalData) 578 { 579 // Convert the map into a vector of "key=value" strings 580 std::vector<std::string> ad; 581 metadata::associations::combine(additionalData, ad); 582 583 createEntry(message, severity, ad); 584 } 585 586 } // namespace internal 587 } // namespace logging 588 } // namespace phosphor 589