1 #include "config.h"
2
3 #include "log_manager.hpp"
4
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "lib/lg2_commit.hpp"
10 #include "paths.hpp"
11 #include "util.hpp"
12
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/vtable.hpp>
19 #include <xyz/openbmc_project/State/Host/server.hpp>
20
21 #include <cassert>
22 #include <chrono>
23 #include <cstdio>
24 #include <cstring>
25 #include <functional>
26 #include <map>
27 #include <ranges>
28 #include <set>
29 #include <string>
30 #include <string_view>
31 #include <vector>
32
33 using namespace std::chrono;
34 extern const std::map<
35 phosphor::logging::metadata::Metadata,
36 std::function<phosphor::logging::metadata::associations::Type>>
37 meta;
38
39 namespace phosphor
40 {
41 namespace logging
42 {
43 namespace internal
44 {
getLevel(const std::string & errMsg)45 inline auto getLevel(const std::string& errMsg)
46 {
47 auto reqLevel = Entry::Level::Error; // Default to Error
48
49 auto levelmap = g_errLevelMap.find(errMsg);
50 if (levelmap != g_errLevelMap.end())
51 {
52 reqLevel = static_cast<Entry::Level>(levelmap->second);
53 }
54
55 return reqLevel;
56 }
57
getRealErrSize()58 int Manager::getRealErrSize()
59 {
60 return realErrors.size();
61 }
62
getInfoErrSize()63 int Manager::getInfoErrSize()
64 {
65 return infoErrors.size();
66 }
67
commit(uint64_t transactionId,std::string errMsg)68 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
69 {
70 auto level = getLevel(errMsg);
71 _commit(transactionId, std::move(errMsg), level);
72 return entryId;
73 }
74
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)75 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
76 uint32_t errLvl)
77 {
78 _commit(transactionId, std::move(errMsg),
79 static_cast<Entry::Level>(errLvl));
80 return entryId;
81 }
82
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)83 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
84 std::string&& errMsg, Entry::Level errLvl)
85 {
86 std::map<std::string, std::string> additionalData{};
87
88 // When running as a test-case, the system may have a LOT of journal
89 // data and we may not have permissions to do some of the journal sync
90 // operations. Just skip over them.
91 if (!IS_UNIT_TEST)
92 {
93 static constexpr auto transactionIdVar =
94 std::string_view{"TRANSACTION_ID"};
95 // Length of 'TRANSACTION_ID' string.
96 static constexpr auto transactionIdVarSize = transactionIdVar.size();
97 // Length of 'TRANSACTION_ID=' string.
98 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
99
100 // Flush all the pending log messages into the journal
101 util::journalSync();
102
103 sd_journal* j = nullptr;
104 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
105 if (rc < 0)
106 {
107 lg2::error("Failed to open journal: {ERROR}", "ERROR",
108 strerror(-rc));
109 return;
110 }
111
112 std::string transactionIdStr = std::to_string(transactionId);
113 std::set<std::string> metalist;
114 auto metamap = g_errMetaMap.find(errMsg);
115 if (metamap != g_errMetaMap.end())
116 {
117 metalist.insert(metamap->second.begin(), metamap->second.end());
118 }
119
120 // Add _PID field information in AdditionalData.
121 metalist.insert("_PID");
122
123 // Read the journal from the end to get the most recent entry first.
124 // The result from the sd_journal_get_data() is of the form
125 // VARIABLE=value.
126 SD_JOURNAL_FOREACH_BACKWARDS(j)
127 {
128 const char* data = nullptr;
129 size_t length = 0;
130
131 // Look for the transaction id metadata variable
132 rc = sd_journal_get_data(j, transactionIdVar.data(),
133 (const void**)&data, &length);
134 if (rc < 0)
135 {
136 // This journal entry does not have the TRANSACTION_ID
137 // metadata variable.
138 continue;
139 }
140
141 // journald does not guarantee that sd_journal_get_data() returns
142 // NULL terminated strings, so need to specify the size to use to
143 // compare, use the returned length instead of anything that relies
144 // on NULL terminators like strlen(). The data variable is in the
145 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
146 // characters plus the (=) sign to do the comparison. 'data +
147 // transactionIdVarOffset' will be in the form of '1234'. 'length -
148 // transactionIdVarOffset' will be the length of '1234'.
149 if ((length <= (transactionIdVarOffset)) ||
150 (transactionIdStr.compare(
151 0, transactionIdStr.size(), data + transactionIdVarOffset,
152 length - transactionIdVarOffset) != 0))
153 {
154 // The value of the TRANSACTION_ID metadata is not the requested
155 // transaction id number.
156 continue;
157 }
158
159 // Search for all metadata variables in the current journal entry.
160 for (auto i = metalist.cbegin(); i != metalist.cend();)
161 {
162 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
163 &length);
164 if (rc < 0)
165 {
166 // Metadata variable not found, check next metadata
167 // variable.
168 i++;
169 continue;
170 }
171
172 // Metadata variable found, save it and remove it from the set.
173 std::string metadata(data, length);
174 if (auto pos = metadata.find('='); pos != std::string::npos)
175 {
176 auto key = metadata.substr(0, pos);
177 auto value = metadata.substr(pos + 1);
178 additionalData.emplace(std::move(key), std::move(value));
179 }
180 i = metalist.erase(i);
181 }
182 if (metalist.empty())
183 {
184 // All metadata variables found, break out of journal loop.
185 break;
186 }
187 }
188 if (!metalist.empty())
189 {
190 // Not all the metadata variables were found in the journal.
191 for (auto& metaVarStr : metalist)
192 {
193 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
194 metaVarStr);
195 }
196 }
197
198 sd_journal_close(j);
199 }
200 createEntry(errMsg, errLvl, additionalData);
201 }
202
createEntry(std::string errMsg,Entry::Level errLvl,std::map<std::string,std::string> additionalData,const FFDCEntries & ffdc)203 auto Manager::createEntry(std::string errMsg, Entry::Level errLvl,
204 std::map<std::string, std::string> additionalData,
205 const FFDCEntries& ffdc)
206 -> sdbusplus::message::object_path
207 {
208 if (!Extensions::disableDefaultLogCaps())
209 {
210 if (errLvl < Entry::sevLowerLimit)
211 {
212 if (realErrors.size() >= ERROR_CAP)
213 {
214 erase(realErrors.front());
215 }
216 }
217 else
218 {
219 if (infoErrors.size() >= ERROR_INFO_CAP)
220 {
221 erase(infoErrors.front());
222 }
223 }
224 }
225
226 entryId++;
227 if (errLvl >= Entry::sevLowerLimit)
228 {
229 infoErrors.push_back(entryId);
230 }
231 else
232 {
233 realErrors.push_back(entryId);
234 }
235 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
236 std::chrono::system_clock::now().time_since_epoch())
237 .count();
238 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
239
240 AssociationList objects{};
241 auto additionalDataVec = util::additional_data::combine(additionalData);
242 processMetadata(errMsg, additionalDataVec, objects);
243
244 auto e = std::make_unique<Entry>(
245 busLog, objPath, entryId,
246 ms, // Milliseconds since 1970
247 errLvl, std::move(errMsg), std::move(additionalData),
248 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
249
250 serialize(*e);
251
252 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
253 isCalloutPresent(*e))
254 {
255 quiesceOnError(entryId);
256 }
257
258 // Add entry before calling the extensions so that they have access to it
259 entries.insert(std::make_pair(entryId, std::move(e)));
260
261 doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
262
263 // Note: No need to close the file descriptors in the FFDC.
264
265 return objPath;
266 }
267
createFromEvent(sdbusplus::exception::generated_event_base && event)268 auto Manager::createFromEvent(
269 sdbusplus::exception::generated_event_base&& event)
270 -> sdbusplus::message::object_path
271 {
272 auto [msg, level, data] = lg2::details::extractEvent(std::move(event));
273 return this->createEntry(msg, level, std::move(data));
274 }
275
isQuiesceOnErrorEnabled()276 bool Manager::isQuiesceOnErrorEnabled()
277 {
278 // When running under tests, the Logging.Settings service will not be
279 // present. Assume false.
280 if (IS_UNIT_TEST)
281 {
282 return false;
283 }
284
285 std::variant<bool> property;
286
287 auto method = this->busLog.new_method_call(
288 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
289 "org.freedesktop.DBus.Properties", "Get");
290
291 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
292
293 try
294 {
295 auto reply = this->busLog.call(method);
296 reply.read(property);
297 }
298 catch (const sdbusplus::exception_t& e)
299 {
300 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
301 e);
302 return false;
303 }
304
305 return std::get<bool>(property);
306 }
307
isCalloutPresent(const Entry & entry)308 bool Manager::isCalloutPresent(const Entry& entry)
309 {
310 for (const auto& c : std::views::keys(entry.additionalData()))
311 {
312 if (c.find("CALLOUT_") != std::string::npos)
313 {
314 return true;
315 }
316 }
317
318 return false;
319 }
320
findAndRemoveResolvedBlocks()321 void Manager::findAndRemoveResolvedBlocks()
322 {
323 for (auto& entry : entries)
324 {
325 if (entry.second->resolved())
326 {
327 checkAndRemoveBlockingError(entry.first);
328 }
329 }
330 }
331
onEntryResolve(sdbusplus::message_t & msg)332 void Manager::onEntryResolve(sdbusplus::message_t& msg)
333 {
334 using Interface = std::string;
335 using Property = std::string;
336 using Value = std::string;
337 using Properties = std::map<Property, std::variant<Value>>;
338
339 Interface interface;
340 Properties properties;
341
342 msg.read(interface, properties);
343
344 for (const auto& p : properties)
345 {
346 if (p.first == "Resolved")
347 {
348 findAndRemoveResolvedBlocks();
349 return;
350 }
351 }
352 }
353
checkAndQuiesceHost()354 void Manager::checkAndQuiesceHost()
355 {
356 using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
357
358 // First check host state
359 std::variant<Host::HostState> property;
360
361 auto method = this->busLog.new_method_call(
362 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
363 "org.freedesktop.DBus.Properties", "Get");
364
365 method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
366
367 try
368 {
369 auto reply = this->busLog.call(method);
370 reply.read(property);
371 }
372 catch (const sdbusplus::exception_t& e)
373 {
374 // Quiescing the host is a "best effort" type function. If unable to
375 // read the host state or it comes back empty, just return.
376 // The boot block object will still be created and the associations to
377 // find the log will be present. Don't want a dependency with
378 // phosphor-state-manager service
379 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
380 e);
381 return;
382 }
383
384 auto hostState = std::get<Host::HostState>(property);
385 if (hostState != Host::HostState::Running)
386 {
387 return;
388 }
389
390 auto quiesce = this->busLog.new_method_call(
391 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
392 "org.freedesktop.systemd1.Manager", "StartUnit");
393
394 quiesce.append("obmc-host-graceful-quiesce@0.target");
395 quiesce.append("replace");
396
397 this->busLog.call_noreply(quiesce);
398 }
399
quiesceOnError(const uint32_t entryId)400 void Manager::quiesceOnError(const uint32_t entryId)
401 {
402 // Verify we don't already have this entry blocking
403 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
404 [&](const std::unique_ptr<Block>& obj) {
405 return obj->entryId == entryId;
406 });
407 if (it != this->blockingErrors.end())
408 {
409 // Already recorded so just return
410 lg2::debug(
411 "QuiesceOnError set and callout present but entry already logged");
412 return;
413 }
414
415 lg2::info("QuiesceOnError set and callout present");
416
417 auto blockPath =
418 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
419 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
420 this->blockingErrors.push_back(std::move(blockObj));
421
422 // Register call back if log is resolved
423 using namespace sdbusplus::bus::match::rules;
424 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
425 auto callback = std::make_unique<sdbusplus::bus::match_t>(
426 this->busLog,
427 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
428 std::bind(std::mem_fn(&Manager::onEntryResolve), this,
429 std::placeholders::_1));
430
431 propChangedEntryCallback.insert(
432 std::make_pair(entryId, std::move(callback)));
433
434 checkAndQuiesceHost();
435 }
436
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)437 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
438 {
439 // Make the association <endpointpath>/<endpointtype> paths
440 std::vector<std::string> assocs;
441 for (const auto& [forwardType, reverseType, endpoint] :
442 entry.associations())
443 {
444 std::string e{endpoint};
445 e += '/' + reverseType;
446 assocs.push_back(e);
447 }
448
449 for (auto& create : Extensions::getCreateFunctions())
450 {
451 try
452 {
453 create(entry.message(), entry.id(), entry.timestamp(),
454 entry.severity(), entry.additionalData(), assocs, ffdc);
455 }
456 catch (const std::exception& e)
457 {
458 lg2::error(
459 "An extension's create function threw an exception: {ERROR}",
460 "ERROR", e);
461 }
462 }
463 }
464
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const465 void Manager::processMetadata(const std::string& /*errorName*/,
466 const std::vector<std::string>& additionalData,
467 AssociationList& objects) const
468 {
469 // additionalData is a list of "metadata=value"
470 constexpr auto separator = '=';
471 for (const auto& entryItem : additionalData)
472 {
473 auto found = entryItem.find(separator);
474 if (std::string::npos != found)
475 {
476 auto metadata = entryItem.substr(0, found);
477 auto iter = meta.find(metadata);
478 if (meta.end() != iter)
479 {
480 (iter->second)(metadata, additionalData, objects);
481 }
482 }
483 }
484 }
485
checkAndRemoveBlockingError(uint32_t entryId)486 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
487 {
488 // First look for blocking object and remove
489 auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
490 [&](const std::unique_ptr<Block>& obj) {
491 return obj->entryId == entryId;
492 });
493 if (it != blockingErrors.end())
494 {
495 blockingErrors.erase(it);
496 }
497
498 // Now remove the callback looking for the error to be resolved
499 auto resolveFind = propChangedEntryCallback.find(entryId);
500 if (resolveFind != propChangedEntryCallback.end())
501 {
502 propChangedEntryCallback.erase(resolveFind);
503 }
504
505 return;
506 }
507
eraseAll()508 size_t Manager::eraseAll()
509 {
510 std::vector<uint32_t> logIDWithHwIsolation;
511 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
512 {
513 try
514 {
515 func(logIDWithHwIsolation);
516 }
517 catch (const std::exception& e)
518 {
519 lg2::error("An extension's LogIDWithHwIsolation function threw an "
520 "exception: {ERROR}",
521 "ERROR", e);
522 }
523 }
524 size_t entriesSize = entries.size();
525 auto iter = entries.begin();
526 if (logIDWithHwIsolation.empty())
527 {
528 while (iter != entries.end())
529 {
530 auto e = iter->first;
531 ++iter;
532 erase(e);
533 }
534 entryId = 0;
535 }
536 else
537 {
538 while (iter != entries.end())
539 {
540 auto e = iter->first;
541 ++iter;
542 try
543 {
544 if (!std::ranges::contains(logIDWithHwIsolation, e))
545 {
546 erase(e);
547 }
548 else
549 {
550 entriesSize--;
551 }
552 }
553 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
554 Unavailable& e)
555 {
556 entriesSize--;
557 }
558 }
559 if (!entries.empty())
560 {
561 entryId = std::ranges::max_element(entries, [](const auto& a,
562 const auto& b) {
563 return a.first < b.first;
564 })->first;
565 }
566 else
567 {
568 entryId = 0;
569 }
570 }
571 return entriesSize;
572 }
573
erase(uint32_t entryId)574 void Manager::erase(uint32_t entryId)
575 {
576 auto entryFound = entries.find(entryId);
577 if (entries.end() != entryFound)
578 {
579 for (auto& func : Extensions::getDeleteProhibitedFunctions())
580 {
581 try
582 {
583 bool prohibited = false;
584 func(entryId, prohibited);
585 if (prohibited)
586 {
587 throw sdbusplus::xyz::openbmc_project::Common::Error::
588 Unavailable();
589 }
590 }
591 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
592 Unavailable& e)
593 {
594 throw;
595 }
596 catch (const std::exception& e)
597 {
598 lg2::error("An extension's deleteProhibited function threw an "
599 "exception: {ERROR}",
600 "ERROR", e);
601 }
602 }
603
604 // Delete the persistent representation of this error.
605 fs::path errorPath(paths::error());
606 errorPath /= std::to_string(entryId);
607 fs::remove(errorPath);
608
609 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
610 auto it = std::find(ids.begin(), ids.end(), id);
611 if (it != ids.end())
612 {
613 ids.erase(it);
614 }
615 };
616 if (entryFound->second->severity() >= Entry::sevLowerLimit)
617 {
618 removeId(infoErrors, entryId);
619 }
620 else
621 {
622 removeId(realErrors, entryId);
623 }
624 entries.erase(entryFound);
625
626 checkAndRemoveBlockingError(entryId);
627
628 for (auto& remove : Extensions::getDeleteFunctions())
629 {
630 try
631 {
632 remove(entryId);
633 }
634 catch (const std::exception& e)
635 {
636 lg2::error("An extension's delete function threw an exception: "
637 "{ERROR}",
638 "ERROR", e);
639 }
640 }
641 }
642 else
643 {
644 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
645 }
646 }
647
restore()648 void Manager::restore()
649 {
650 auto sanity = [](const auto& id, const auto& restoredId) {
651 return id == restoredId;
652 };
653
654 fs::path dir(paths::error());
655 if (!fs::exists(dir) || fs::is_empty(dir))
656 {
657 return;
658 }
659
660 for (auto& file : fs::directory_iterator(dir))
661 {
662 auto id = file.path().filename().c_str();
663 auto idNum = std::stol(id);
664 auto e = std::make_unique<Entry>(
665 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
666 if (deserialize(file.path(), *e))
667 {
668 // validate the restored error entry id
669 if (sanity(static_cast<uint32_t>(idNum), e->id()))
670 {
671 e->path(file.path(), true);
672 if (e->severity() >= Entry::sevLowerLimit)
673 {
674 infoErrors.push_back(idNum);
675 }
676 else
677 {
678 realErrors.push_back(idNum);
679 }
680
681 entries.insert(std::make_pair(idNum, std::move(e)));
682 }
683 else
684 {
685 lg2::error(
686 "Failed in sanity check while restoring error entry. "
687 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
688 "ID_NUM", idNum, "ENTRY_ID", e->id());
689 }
690 }
691 }
692
693 if (!entries.empty())
694 {
695 entryId = entries.rbegin()->first;
696 }
697 }
698
readFWVersion()699 std::string Manager::readFWVersion()
700 {
701 auto version = util::getOSReleaseValue("VERSION_ID");
702
703 if (!version)
704 {
705 lg2::error("Unable to read BMC firmware version");
706 }
707
708 return version.value_or("");
709 }
710
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)711 auto Manager::create(const std::string& message, Entry::Level severity,
712 const std::map<std::string, std::string>& additionalData,
713 const FFDCEntries& ffdc) -> sdbusplus::message::object_path
714 {
715 return createEntry(message, severity, additionalData, ffdc);
716 }
717
718 } // namespace internal
719 } // namespace logging
720 } // namespace phosphor
721