1 #include "config.h"
2
3 #include "log_manager.hpp"
4
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "lib/lg2_commit.hpp"
10 #include "paths.hpp"
11 #include "util.hpp"
12
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/vtable.hpp>
19 #include <xyz/openbmc_project/State/Host/server.hpp>
20
21 #include <cassert>
22 #include <chrono>
23 #include <cstdio>
24 #include <cstring>
25 #include <fstream>
26 #include <functional>
27 #include <future>
28 #include <iostream>
29 #include <map>
30 #include <ranges>
31 #include <set>
32 #include <string>
33 #include <string_view>
34 #include <vector>
35
36 using namespace std::chrono;
37 extern const std::map<
38 phosphor::logging::metadata::Metadata,
39 std::function<phosphor::logging::metadata::associations::Type>>
40 meta;
41
42 namespace phosphor
43 {
44 namespace logging
45 {
46 namespace internal
47 {
getLevel(const std::string & errMsg)48 inline auto getLevel(const std::string& errMsg)
49 {
50 auto reqLevel = Entry::Level::Error; // Default to Error
51
52 auto levelmap = g_errLevelMap.find(errMsg);
53 if (levelmap != g_errLevelMap.end())
54 {
55 reqLevel = static_cast<Entry::Level>(levelmap->second);
56 }
57
58 return reqLevel;
59 }
60
getRealErrSize()61 int Manager::getRealErrSize()
62 {
63 return realErrors.size();
64 }
65
getInfoErrSize()66 int Manager::getInfoErrSize()
67 {
68 return infoErrors.size();
69 }
70
commit(uint64_t transactionId,std::string errMsg)71 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
72 {
73 auto level = getLevel(errMsg);
74 _commit(transactionId, std::move(errMsg), level);
75 return entryId;
76 }
77
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)78 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
79 uint32_t errLvl)
80 {
81 _commit(transactionId, std::move(errMsg),
82 static_cast<Entry::Level>(errLvl));
83 return entryId;
84 }
85
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)86 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
87 std::string&& errMsg, Entry::Level errLvl)
88 {
89 std::map<std::string, std::string> additionalData{};
90
91 // When running as a test-case, the system may have a LOT of journal
92 // data and we may not have permissions to do some of the journal sync
93 // operations. Just skip over them.
94 if (!IS_UNIT_TEST)
95 {
96 static constexpr auto transactionIdVar =
97 std::string_view{"TRANSACTION_ID"};
98 // Length of 'TRANSACTION_ID' string.
99 static constexpr auto transactionIdVarSize = transactionIdVar.size();
100 // Length of 'TRANSACTION_ID=' string.
101 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
102
103 // Flush all the pending log messages into the journal
104 util::journalSync();
105
106 sd_journal* j = nullptr;
107 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
108 if (rc < 0)
109 {
110 lg2::error("Failed to open journal: {ERROR}", "ERROR",
111 strerror(-rc));
112 return;
113 }
114
115 std::string transactionIdStr = std::to_string(transactionId);
116 std::set<std::string> metalist;
117 auto metamap = g_errMetaMap.find(errMsg);
118 if (metamap != g_errMetaMap.end())
119 {
120 metalist.insert(metamap->second.begin(), metamap->second.end());
121 }
122
123 // Add _PID field information in AdditionalData.
124 metalist.insert("_PID");
125
126 // Read the journal from the end to get the most recent entry first.
127 // The result from the sd_journal_get_data() is of the form
128 // VARIABLE=value.
129 SD_JOURNAL_FOREACH_BACKWARDS(j)
130 {
131 const char* data = nullptr;
132 size_t length = 0;
133
134 // Look for the transaction id metadata variable
135 rc = sd_journal_get_data(j, transactionIdVar.data(),
136 (const void**)&data, &length);
137 if (rc < 0)
138 {
139 // This journal entry does not have the TRANSACTION_ID
140 // metadata variable.
141 continue;
142 }
143
144 // journald does not guarantee that sd_journal_get_data() returns
145 // NULL terminated strings, so need to specify the size to use to
146 // compare, use the returned length instead of anything that relies
147 // on NULL terminators like strlen(). The data variable is in the
148 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
149 // characters plus the (=) sign to do the comparison. 'data +
150 // transactionIdVarOffset' will be in the form of '1234'. 'length -
151 // transactionIdVarOffset' will be the length of '1234'.
152 if ((length <= (transactionIdVarOffset)) ||
153 (transactionIdStr.compare(
154 0, transactionIdStr.size(), data + transactionIdVarOffset,
155 length - transactionIdVarOffset) != 0))
156 {
157 // The value of the TRANSACTION_ID metadata is not the requested
158 // transaction id number.
159 continue;
160 }
161
162 // Search for all metadata variables in the current journal entry.
163 for (auto i = metalist.cbegin(); i != metalist.cend();)
164 {
165 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
166 &length);
167 if (rc < 0)
168 {
169 // Metadata variable not found, check next metadata
170 // variable.
171 i++;
172 continue;
173 }
174
175 // Metadata variable found, save it and remove it from the set.
176 std::string metadata(data, length);
177 if (auto pos = metadata.find('='); pos != std::string::npos)
178 {
179 auto key = metadata.substr(0, pos);
180 auto value = metadata.substr(pos + 1);
181 additionalData.emplace(std::move(key), std::move(value));
182 }
183 i = metalist.erase(i);
184 }
185 if (metalist.empty())
186 {
187 // All metadata variables found, break out of journal loop.
188 break;
189 }
190 }
191 if (!metalist.empty())
192 {
193 // Not all the metadata variables were found in the journal.
194 for (auto& metaVarStr : metalist)
195 {
196 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
197 metaVarStr);
198 }
199 }
200
201 sd_journal_close(j);
202 }
203 createEntry(errMsg, errLvl, additionalData);
204 }
205
createEntry(std::string errMsg,Entry::Level errLvl,std::map<std::string,std::string> additionalData,const FFDCEntries & ffdc)206 auto Manager::createEntry(
207 std::string errMsg, Entry::Level errLvl,
208 std::map<std::string, std::string> additionalData,
209 const FFDCEntries& ffdc) -> sdbusplus::message::object_path
210 {
211 if (!Extensions::disableDefaultLogCaps())
212 {
213 if (errLvl < Entry::sevLowerLimit)
214 {
215 if (realErrors.size() >= ERROR_CAP)
216 {
217 erase(realErrors.front());
218 }
219 }
220 else
221 {
222 if (infoErrors.size() >= ERROR_INFO_CAP)
223 {
224 erase(infoErrors.front());
225 }
226 }
227 }
228
229 entryId++;
230 if (errLvl >= Entry::sevLowerLimit)
231 {
232 infoErrors.push_back(entryId);
233 }
234 else
235 {
236 realErrors.push_back(entryId);
237 }
238 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
239 std::chrono::system_clock::now().time_since_epoch())
240 .count();
241 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
242
243 AssociationList objects{};
244 auto additionalDataVec = util::additional_data::combine(additionalData);
245 processMetadata(errMsg, additionalDataVec, objects);
246
247 auto e = std::make_unique<Entry>(
248 busLog, objPath, entryId,
249 ms, // Milliseconds since 1970
250 errLvl, std::move(errMsg), std::move(additionalData),
251 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
252
253 serialize(*e);
254
255 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
256 isCalloutPresent(*e))
257 {
258 quiesceOnError(entryId);
259 }
260
261 // Add entry before calling the extensions so that they have access to it
262 entries.insert(std::make_pair(entryId, std::move(e)));
263
264 doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
265
266 // Note: No need to close the file descriptors in the FFDC.
267
268 return objPath;
269 }
270
createFromEvent(sdbusplus::exception::generated_event_base && event)271 auto Manager::createFromEvent(sdbusplus::exception::generated_event_base&&
272 event) -> sdbusplus::message::object_path
273 {
274 auto [msg, level, data] = lg2::details::extractEvent(std::move(event));
275 return this->createEntry(msg, level, std::move(data));
276 }
277
isQuiesceOnErrorEnabled()278 bool Manager::isQuiesceOnErrorEnabled()
279 {
280 // When running under tests, the Logging.Settings service will not be
281 // present. Assume false.
282 if (IS_UNIT_TEST)
283 {
284 return false;
285 }
286
287 std::variant<bool> property;
288
289 auto method = this->busLog.new_method_call(
290 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
291 "org.freedesktop.DBus.Properties", "Get");
292
293 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
294
295 try
296 {
297 auto reply = this->busLog.call(method);
298 reply.read(property);
299 }
300 catch (const sdbusplus::exception_t& e)
301 {
302 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
303 e);
304 return false;
305 }
306
307 return std::get<bool>(property);
308 }
309
isCalloutPresent(const Entry & entry)310 bool Manager::isCalloutPresent(const Entry& entry)
311 {
312 for (const auto& c : std::views::keys(entry.additionalData()))
313 {
314 if (c.find("CALLOUT_") != std::string::npos)
315 {
316 return true;
317 }
318 }
319
320 return false;
321 }
322
findAndRemoveResolvedBlocks()323 void Manager::findAndRemoveResolvedBlocks()
324 {
325 for (auto& entry : entries)
326 {
327 if (entry.second->resolved())
328 {
329 checkAndRemoveBlockingError(entry.first);
330 }
331 }
332 }
333
onEntryResolve(sdbusplus::message_t & msg)334 void Manager::onEntryResolve(sdbusplus::message_t& msg)
335 {
336 using Interface = std::string;
337 using Property = std::string;
338 using Value = std::string;
339 using Properties = std::map<Property, std::variant<Value>>;
340
341 Interface interface;
342 Properties properties;
343
344 msg.read(interface, properties);
345
346 for (const auto& p : properties)
347 {
348 if (p.first == "Resolved")
349 {
350 findAndRemoveResolvedBlocks();
351 return;
352 }
353 }
354 }
355
checkAndQuiesceHost()356 void Manager::checkAndQuiesceHost()
357 {
358 using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
359
360 // First check host state
361 std::variant<Host::HostState> property;
362
363 auto method = this->busLog.new_method_call(
364 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
365 "org.freedesktop.DBus.Properties", "Get");
366
367 method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
368
369 try
370 {
371 auto reply = this->busLog.call(method);
372 reply.read(property);
373 }
374 catch (const sdbusplus::exception_t& e)
375 {
376 // Quiescing the host is a "best effort" type function. If unable to
377 // read the host state or it comes back empty, just return.
378 // The boot block object will still be created and the associations to
379 // find the log will be present. Don't want a dependency with
380 // phosphor-state-manager service
381 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
382 e);
383 return;
384 }
385
386 auto hostState = std::get<Host::HostState>(property);
387 if (hostState != Host::HostState::Running)
388 {
389 return;
390 }
391
392 auto quiesce = this->busLog.new_method_call(
393 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
394 "org.freedesktop.systemd1.Manager", "StartUnit");
395
396 quiesce.append("obmc-host-graceful-quiesce@0.target");
397 quiesce.append("replace");
398
399 this->busLog.call_noreply(quiesce);
400 }
401
quiesceOnError(const uint32_t entryId)402 void Manager::quiesceOnError(const uint32_t entryId)
403 {
404 // Verify we don't already have this entry blocking
405 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
406 [&](const std::unique_ptr<Block>& obj) {
407 return obj->entryId == entryId;
408 });
409 if (it != this->blockingErrors.end())
410 {
411 // Already recorded so just return
412 lg2::debug(
413 "QuiesceOnError set and callout present but entry already logged");
414 return;
415 }
416
417 lg2::info("QuiesceOnError set and callout present");
418
419 auto blockPath =
420 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
421 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
422 this->blockingErrors.push_back(std::move(blockObj));
423
424 // Register call back if log is resolved
425 using namespace sdbusplus::bus::match::rules;
426 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
427 auto callback = std::make_unique<sdbusplus::bus::match_t>(
428 this->busLog,
429 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
430 std::bind(std::mem_fn(&Manager::onEntryResolve), this,
431 std::placeholders::_1));
432
433 propChangedEntryCallback.insert(
434 std::make_pair(entryId, std::move(callback)));
435
436 checkAndQuiesceHost();
437 }
438
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)439 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
440 {
441 // Make the association <endpointpath>/<endpointtype> paths
442 std::vector<std::string> assocs;
443 for (const auto& [forwardType, reverseType, endpoint] :
444 entry.associations())
445 {
446 std::string e{endpoint};
447 e += '/' + reverseType;
448 assocs.push_back(e);
449 }
450
451 for (auto& create : Extensions::getCreateFunctions())
452 {
453 try
454 {
455 create(entry.message(), entry.id(), entry.timestamp(),
456 entry.severity(), entry.additionalData2(), assocs, ffdc);
457 }
458 catch (const std::exception& e)
459 {
460 lg2::error(
461 "An extension's create function threw an exception: {ERROR}",
462 "ERROR", e);
463 }
464 }
465 }
466
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const467 void Manager::processMetadata(const std::string& /*errorName*/,
468 const std::vector<std::string>& additionalData,
469 AssociationList& objects) const
470 {
471 // additionalData is a list of "metadata=value"
472 constexpr auto separator = '=';
473 for (const auto& entryItem : additionalData)
474 {
475 auto found = entryItem.find(separator);
476 if (std::string::npos != found)
477 {
478 auto metadata = entryItem.substr(0, found);
479 auto iter = meta.find(metadata);
480 if (meta.end() != iter)
481 {
482 (iter->second)(metadata, additionalData, objects);
483 }
484 }
485 }
486 }
487
checkAndRemoveBlockingError(uint32_t entryId)488 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
489 {
490 // First look for blocking object and remove
491 auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
492 [&](const std::unique_ptr<Block>& obj) {
493 return obj->entryId == entryId;
494 });
495 if (it != blockingErrors.end())
496 {
497 blockingErrors.erase(it);
498 }
499
500 // Now remove the callback looking for the error to be resolved
501 auto resolveFind = propChangedEntryCallback.find(entryId);
502 if (resolveFind != propChangedEntryCallback.end())
503 {
504 propChangedEntryCallback.erase(resolveFind);
505 }
506
507 return;
508 }
509
eraseAll()510 size_t Manager::eraseAll()
511 {
512 std::vector<uint32_t> logIDWithHwIsolation;
513 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
514 {
515 try
516 {
517 func(logIDWithHwIsolation);
518 }
519 catch (const std::exception& e)
520 {
521 lg2::error("An extension's LogIDWithHwIsolation function threw an "
522 "exception: {ERROR}",
523 "ERROR", e);
524 }
525 }
526 size_t entriesSize = entries.size();
527 auto iter = entries.begin();
528 if (logIDWithHwIsolation.empty())
529 {
530 while (iter != entries.end())
531 {
532 auto e = iter->first;
533 ++iter;
534 erase(e);
535 }
536 entryId = 0;
537 }
538 else
539 {
540 while (iter != entries.end())
541 {
542 auto e = iter->first;
543 ++iter;
544 try
545 {
546 if (!std::ranges::contains(logIDWithHwIsolation, e))
547 {
548 erase(e);
549 }
550 else
551 {
552 entriesSize--;
553 }
554 }
555 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
556 Unavailable& e)
557 {
558 entriesSize--;
559 }
560 }
561 if (!entries.empty())
562 {
563 entryId = std::ranges::max_element(entries, [](const auto& a,
564 const auto& b) {
565 return a.first < b.first;
566 })->first;
567 }
568 else
569 {
570 entryId = 0;
571 }
572 }
573 return entriesSize;
574 }
575
erase(uint32_t entryId)576 void Manager::erase(uint32_t entryId)
577 {
578 auto entryFound = entries.find(entryId);
579 if (entries.end() != entryFound)
580 {
581 for (auto& func : Extensions::getDeleteProhibitedFunctions())
582 {
583 try
584 {
585 bool prohibited = false;
586 func(entryId, prohibited);
587 if (prohibited)
588 {
589 throw sdbusplus::xyz::openbmc_project::Common::Error::
590 Unavailable();
591 }
592 }
593 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
594 Unavailable& e)
595 {
596 throw;
597 }
598 catch (const std::exception& e)
599 {
600 lg2::error("An extension's deleteProhibited function threw an "
601 "exception: {ERROR}",
602 "ERROR", e);
603 }
604 }
605
606 // Delete the persistent representation of this error.
607 fs::path errorPath(paths::error());
608 errorPath /= std::to_string(entryId);
609 fs::remove(errorPath);
610
611 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
612 auto it = std::find(ids.begin(), ids.end(), id);
613 if (it != ids.end())
614 {
615 ids.erase(it);
616 }
617 };
618 if (entryFound->second->severity() >= Entry::sevLowerLimit)
619 {
620 removeId(infoErrors, entryId);
621 }
622 else
623 {
624 removeId(realErrors, entryId);
625 }
626 entries.erase(entryFound);
627
628 checkAndRemoveBlockingError(entryId);
629
630 for (auto& remove : Extensions::getDeleteFunctions())
631 {
632 try
633 {
634 remove(entryId);
635 }
636 catch (const std::exception& e)
637 {
638 lg2::error("An extension's delete function threw an exception: "
639 "{ERROR}",
640 "ERROR", e);
641 }
642 }
643 }
644 else
645 {
646 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
647 }
648 }
649
restore()650 void Manager::restore()
651 {
652 auto sanity = [](const auto& id, const auto& restoredId) {
653 return id == restoredId;
654 };
655
656 fs::path dir(paths::error());
657 if (!fs::exists(dir) || fs::is_empty(dir))
658 {
659 return;
660 }
661
662 for (auto& file : fs::directory_iterator(dir))
663 {
664 auto id = file.path().filename().c_str();
665 auto idNum = std::stol(id);
666 auto e = std::make_unique<Entry>(
667 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
668 if (deserialize(file.path(), *e))
669 {
670 // validate the restored error entry id
671 if (sanity(static_cast<uint32_t>(idNum), e->id()))
672 {
673 e->path(file.path(), true);
674 if (e->severity() >= Entry::sevLowerLimit)
675 {
676 infoErrors.push_back(idNum);
677 }
678 else
679 {
680 realErrors.push_back(idNum);
681 }
682
683 entries.insert(std::make_pair(idNum, std::move(e)));
684 }
685 else
686 {
687 lg2::error(
688 "Failed in sanity check while restoring error entry. "
689 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
690 "ID_NUM", idNum, "ENTRY_ID", e->id());
691 }
692 }
693 }
694
695 if (!entries.empty())
696 {
697 entryId = entries.rbegin()->first;
698 }
699 }
700
readFWVersion()701 std::string Manager::readFWVersion()
702 {
703 auto version = util::getOSReleaseValue("VERSION_ID");
704
705 if (!version)
706 {
707 lg2::error("Unable to read BMC firmware version");
708 }
709
710 return version.value_or("");
711 }
712
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)713 auto Manager::create(const std::string& message, Entry::Level severity,
714 const std::map<std::string, std::string>& additionalData,
715 const FFDCEntries& ffdc) -> sdbusplus::message::object_path
716 {
717 return createEntry(message, severity, additionalData, ffdc);
718 }
719
720 } // namespace internal
721 } // namespace logging
722 } // namespace phosphor
723