1 #include "config.h"
2
3 #include "log_manager.hpp"
4
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "lib/lg2_commit.hpp"
10 #include "paths.hpp"
11 #include "util.hpp"
12
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/vtable.hpp>
19 #include <xyz/openbmc_project/State/Host/server.hpp>
20
21 #include <cassert>
22 #include <chrono>
23 #include <cstdio>
24 #include <cstring>
25 #include <fstream>
26 #include <functional>
27 #include <future>
28 #include <iostream>
29 #include <map>
30 #include <ranges>
31 #include <set>
32 #include <string>
33 #include <string_view>
34 #include <vector>
35
36 using namespace std::chrono;
37 extern const std::map<
38 phosphor::logging::metadata::Metadata,
39 std::function<phosphor::logging::metadata::associations::Type>>
40 meta;
41
42 namespace phosphor
43 {
44 namespace logging
45 {
46 namespace internal
47 {
getLevel(const std::string & errMsg)48 inline auto getLevel(const std::string& errMsg)
49 {
50 auto reqLevel = Entry::Level::Error; // Default to Error
51
52 auto levelmap = g_errLevelMap.find(errMsg);
53 if (levelmap != g_errLevelMap.end())
54 {
55 reqLevel = static_cast<Entry::Level>(levelmap->second);
56 }
57
58 return reqLevel;
59 }
60
getRealErrSize()61 int Manager::getRealErrSize()
62 {
63 return realErrors.size();
64 }
65
getInfoErrSize()66 int Manager::getInfoErrSize()
67 {
68 return infoErrors.size();
69 }
70
commit(uint64_t transactionId,std::string errMsg)71 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
72 {
73 auto level = getLevel(errMsg);
74 _commit(transactionId, std::move(errMsg), level);
75 return entryId;
76 }
77
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)78 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
79 uint32_t errLvl)
80 {
81 _commit(transactionId, std::move(errMsg),
82 static_cast<Entry::Level>(errLvl));
83 return entryId;
84 }
85
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)86 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
87 std::string&& errMsg, Entry::Level errLvl)
88 {
89 std::map<std::string, std::string> additionalData{};
90
91 // When running as a test-case, the system may have a LOT of journal
92 // data and we may not have permissions to do some of the journal sync
93 // operations. Just skip over them.
94 if (!IS_UNIT_TEST)
95 {
96 static constexpr auto transactionIdVar =
97 std::string_view{"TRANSACTION_ID"};
98 // Length of 'TRANSACTION_ID' string.
99 static constexpr auto transactionIdVarSize = transactionIdVar.size();
100 // Length of 'TRANSACTION_ID=' string.
101 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
102
103 // Flush all the pending log messages into the journal
104 util::journalSync();
105
106 sd_journal* j = nullptr;
107 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
108 if (rc < 0)
109 {
110 lg2::error("Failed to open journal: {ERROR}", "ERROR",
111 strerror(-rc));
112 return;
113 }
114
115 std::string transactionIdStr = std::to_string(transactionId);
116 std::set<std::string> metalist;
117 auto metamap = g_errMetaMap.find(errMsg);
118 if (metamap != g_errMetaMap.end())
119 {
120 metalist.insert(metamap->second.begin(), metamap->second.end());
121 }
122
123 // Add _PID field information in AdditionalData.
124 metalist.insert("_PID");
125
126 // Read the journal from the end to get the most recent entry first.
127 // The result from the sd_journal_get_data() is of the form
128 // VARIABLE=value.
129 SD_JOURNAL_FOREACH_BACKWARDS(j)
130 {
131 const char* data = nullptr;
132 size_t length = 0;
133
134 // Look for the transaction id metadata variable
135 rc = sd_journal_get_data(j, transactionIdVar.data(),
136 (const void**)&data, &length);
137 if (rc < 0)
138 {
139 // This journal entry does not have the TRANSACTION_ID
140 // metadata variable.
141 continue;
142 }
143
144 // journald does not guarantee that sd_journal_get_data() returns
145 // NULL terminated strings, so need to specify the size to use to
146 // compare, use the returned length instead of anything that relies
147 // on NULL terminators like strlen(). The data variable is in the
148 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
149 // characters plus the (=) sign to do the comparison. 'data +
150 // transactionIdVarOffset' will be in the form of '1234'. 'length -
151 // transactionIdVarOffset' will be the length of '1234'.
152 if ((length <= (transactionIdVarOffset)) ||
153 (transactionIdStr.compare(
154 0, transactionIdStr.size(), data + transactionIdVarOffset,
155 length - transactionIdVarOffset) != 0))
156 {
157 // The value of the TRANSACTION_ID metadata is not the requested
158 // transaction id number.
159 continue;
160 }
161
162 // Search for all metadata variables in the current journal entry.
163 for (auto i = metalist.cbegin(); i != metalist.cend();)
164 {
165 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
166 &length);
167 if (rc < 0)
168 {
169 // Metadata variable not found, check next metadata
170 // variable.
171 i++;
172 continue;
173 }
174
175 // Metadata variable found, save it and remove it from the set.
176 std::string metadata(data, length);
177 if (auto pos = metadata.find('='); pos != std::string::npos)
178 {
179 auto key = metadata.substr(0, pos);
180 auto value = metadata.substr(pos + 1);
181 additionalData.emplace(std::move(key), std::move(value));
182 }
183 i = metalist.erase(i);
184 }
185 if (metalist.empty())
186 {
187 // All metadata variables found, break out of journal loop.
188 break;
189 }
190 }
191 if (!metalist.empty())
192 {
193 // Not all the metadata variables were found in the journal.
194 for (auto& metaVarStr : metalist)
195 {
196 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
197 metaVarStr);
198 }
199 }
200
201 sd_journal_close(j);
202 }
203 createEntry(errMsg, errLvl, additionalData);
204 }
205
createEntry(std::string errMsg,Entry::Level errLvl,std::map<std::string,std::string> additionalData,const FFDCEntries & ffdc)206 auto Manager::createEntry(std::string errMsg, Entry::Level errLvl,
207 std::map<std::string, std::string> additionalData,
208 const FFDCEntries& ffdc)
209 -> sdbusplus::message::object_path
210 {
211 if (!Extensions::disableDefaultLogCaps())
212 {
213 if (errLvl < Entry::sevLowerLimit)
214 {
215 if (realErrors.size() >= ERROR_CAP)
216 {
217 erase(realErrors.front());
218 }
219 }
220 else
221 {
222 if (infoErrors.size() >= ERROR_INFO_CAP)
223 {
224 erase(infoErrors.front());
225 }
226 }
227 }
228
229 entryId++;
230 if (errLvl >= Entry::sevLowerLimit)
231 {
232 infoErrors.push_back(entryId);
233 }
234 else
235 {
236 realErrors.push_back(entryId);
237 }
238 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
239 std::chrono::system_clock::now().time_since_epoch())
240 .count();
241 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
242
243 AssociationList objects{};
244 auto additionalDataVec = util::additional_data::combine(additionalData);
245 processMetadata(errMsg, additionalDataVec, objects);
246
247 auto e = std::make_unique<Entry>(
248 busLog, objPath, entryId,
249 ms, // Milliseconds since 1970
250 errLvl, std::move(errMsg), std::move(additionalData),
251 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
252
253 serialize(*e);
254
255 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
256 isCalloutPresent(*e))
257 {
258 quiesceOnError(entryId);
259 }
260
261 // Add entry before calling the extensions so that they have access to it
262 entries.insert(std::make_pair(entryId, std::move(e)));
263
264 doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
265
266 // Note: No need to close the file descriptors in the FFDC.
267
268 return objPath;
269 }
270
271 auto
createFromEvent(sdbusplus::exception::generated_event_base && event)272 Manager::createFromEvent(sdbusplus::exception::generated_event_base&& event)
273 -> sdbusplus::message::object_path
274 {
275 auto [msg, level, data] = lg2::details::extractEvent(std::move(event));
276 return this->createEntry(msg, level, std::move(data));
277 }
278
isQuiesceOnErrorEnabled()279 bool Manager::isQuiesceOnErrorEnabled()
280 {
281 // When running under tests, the Logging.Settings service will not be
282 // present. Assume false.
283 if (IS_UNIT_TEST)
284 {
285 return false;
286 }
287
288 std::variant<bool> property;
289
290 auto method = this->busLog.new_method_call(
291 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
292 "org.freedesktop.DBus.Properties", "Get");
293
294 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
295
296 try
297 {
298 auto reply = this->busLog.call(method);
299 reply.read(property);
300 }
301 catch (const sdbusplus::exception_t& e)
302 {
303 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
304 e);
305 return false;
306 }
307
308 return std::get<bool>(property);
309 }
310
isCalloutPresent(const Entry & entry)311 bool Manager::isCalloutPresent(const Entry& entry)
312 {
313 for (const auto& c : std::views::keys(entry.additionalData()))
314 {
315 if (c.find("CALLOUT_") != std::string::npos)
316 {
317 return true;
318 }
319 }
320
321 return false;
322 }
323
findAndRemoveResolvedBlocks()324 void Manager::findAndRemoveResolvedBlocks()
325 {
326 for (auto& entry : entries)
327 {
328 if (entry.second->resolved())
329 {
330 checkAndRemoveBlockingError(entry.first);
331 }
332 }
333 }
334
onEntryResolve(sdbusplus::message_t & msg)335 void Manager::onEntryResolve(sdbusplus::message_t& msg)
336 {
337 using Interface = std::string;
338 using Property = std::string;
339 using Value = std::string;
340 using Properties = std::map<Property, std::variant<Value>>;
341
342 Interface interface;
343 Properties properties;
344
345 msg.read(interface, properties);
346
347 for (const auto& p : properties)
348 {
349 if (p.first == "Resolved")
350 {
351 findAndRemoveResolvedBlocks();
352 return;
353 }
354 }
355 }
356
checkAndQuiesceHost()357 void Manager::checkAndQuiesceHost()
358 {
359 using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
360
361 // First check host state
362 std::variant<Host::HostState> property;
363
364 auto method = this->busLog.new_method_call(
365 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
366 "org.freedesktop.DBus.Properties", "Get");
367
368 method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
369
370 try
371 {
372 auto reply = this->busLog.call(method);
373 reply.read(property);
374 }
375 catch (const sdbusplus::exception_t& e)
376 {
377 // Quiescing the host is a "best effort" type function. If unable to
378 // read the host state or it comes back empty, just return.
379 // The boot block object will still be created and the associations to
380 // find the log will be present. Don't want a dependency with
381 // phosphor-state-manager service
382 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
383 e);
384 return;
385 }
386
387 auto hostState = std::get<Host::HostState>(property);
388 if (hostState != Host::HostState::Running)
389 {
390 return;
391 }
392
393 auto quiesce = this->busLog.new_method_call(
394 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
395 "org.freedesktop.systemd1.Manager", "StartUnit");
396
397 quiesce.append("obmc-host-graceful-quiesce@0.target");
398 quiesce.append("replace");
399
400 this->busLog.call_noreply(quiesce);
401 }
402
quiesceOnError(const uint32_t entryId)403 void Manager::quiesceOnError(const uint32_t entryId)
404 {
405 // Verify we don't already have this entry blocking
406 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
407 [&](const std::unique_ptr<Block>& obj) {
408 return obj->entryId == entryId;
409 });
410 if (it != this->blockingErrors.end())
411 {
412 // Already recorded so just return
413 lg2::debug(
414 "QuiesceOnError set and callout present but entry already logged");
415 return;
416 }
417
418 lg2::info("QuiesceOnError set and callout present");
419
420 auto blockPath =
421 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
422 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
423 this->blockingErrors.push_back(std::move(blockObj));
424
425 // Register call back if log is resolved
426 using namespace sdbusplus::bus::match::rules;
427 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
428 auto callback = std::make_unique<sdbusplus::bus::match_t>(
429 this->busLog,
430 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
431 std::bind(std::mem_fn(&Manager::onEntryResolve), this,
432 std::placeholders::_1));
433
434 propChangedEntryCallback.insert(
435 std::make_pair(entryId, std::move(callback)));
436
437 checkAndQuiesceHost();
438 }
439
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)440 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
441 {
442 // Make the association <endpointpath>/<endpointtype> paths
443 std::vector<std::string> assocs;
444 for (const auto& [forwardType, reverseType, endpoint] :
445 entry.associations())
446 {
447 std::string e{endpoint};
448 e += '/' + reverseType;
449 assocs.push_back(e);
450 }
451
452 for (auto& create : Extensions::getCreateFunctions())
453 {
454 try
455 {
456 create(entry.message(), entry.id(), entry.timestamp(),
457 entry.severity(), entry.additionalData2(), assocs, ffdc);
458 }
459 catch (const std::exception& e)
460 {
461 lg2::error(
462 "An extension's create function threw an exception: {ERROR}",
463 "ERROR", e);
464 }
465 }
466 }
467
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const468 void Manager::processMetadata(const std::string& /*errorName*/,
469 const std::vector<std::string>& additionalData,
470 AssociationList& objects) const
471 {
472 // additionalData is a list of "metadata=value"
473 constexpr auto separator = '=';
474 for (const auto& entryItem : additionalData)
475 {
476 auto found = entryItem.find(separator);
477 if (std::string::npos != found)
478 {
479 auto metadata = entryItem.substr(0, found);
480 auto iter = meta.find(metadata);
481 if (meta.end() != iter)
482 {
483 (iter->second)(metadata, additionalData, objects);
484 }
485 }
486 }
487 }
488
checkAndRemoveBlockingError(uint32_t entryId)489 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
490 {
491 // First look for blocking object and remove
492 auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
493 [&](const std::unique_ptr<Block>& obj) {
494 return obj->entryId == entryId;
495 });
496 if (it != blockingErrors.end())
497 {
498 blockingErrors.erase(it);
499 }
500
501 // Now remove the callback looking for the error to be resolved
502 auto resolveFind = propChangedEntryCallback.find(entryId);
503 if (resolveFind != propChangedEntryCallback.end())
504 {
505 propChangedEntryCallback.erase(resolveFind);
506 }
507
508 return;
509 }
510
eraseAll()511 size_t Manager::eraseAll()
512 {
513 std::vector<uint32_t> logIDWithHwIsolation;
514 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
515 {
516 try
517 {
518 func(logIDWithHwIsolation);
519 }
520 catch (const std::exception& e)
521 {
522 lg2::error("An extension's LogIDWithHwIsolation function threw an "
523 "exception: {ERROR}",
524 "ERROR", e);
525 }
526 }
527 size_t entriesSize = entries.size();
528 auto iter = entries.begin();
529 if (logIDWithHwIsolation.empty())
530 {
531 while (iter != entries.end())
532 {
533 auto e = iter->first;
534 ++iter;
535 erase(e);
536 }
537 entryId = 0;
538 }
539 else
540 {
541 while (iter != entries.end())
542 {
543 auto e = iter->first;
544 ++iter;
545 try
546 {
547 if (!std::ranges::contains(logIDWithHwIsolation, e))
548 {
549 erase(e);
550 }
551 else
552 {
553 entriesSize--;
554 }
555 }
556 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
557 Unavailable& e)
558 {
559 entriesSize--;
560 }
561 }
562 if (!entries.empty())
563 {
564 entryId = std::ranges::max_element(entries, [](const auto& a,
565 const auto& b) {
566 return a.first < b.first;
567 })->first;
568 }
569 else
570 {
571 entryId = 0;
572 }
573 }
574 return entriesSize;
575 }
576
erase(uint32_t entryId)577 void Manager::erase(uint32_t entryId)
578 {
579 auto entryFound = entries.find(entryId);
580 if (entries.end() != entryFound)
581 {
582 for (auto& func : Extensions::getDeleteProhibitedFunctions())
583 {
584 try
585 {
586 bool prohibited = false;
587 func(entryId, prohibited);
588 if (prohibited)
589 {
590 throw sdbusplus::xyz::openbmc_project::Common::Error::
591 Unavailable();
592 }
593 }
594 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
595 Unavailable& e)
596 {
597 throw;
598 }
599 catch (const std::exception& e)
600 {
601 lg2::error("An extension's deleteProhibited function threw an "
602 "exception: {ERROR}",
603 "ERROR", e);
604 }
605 }
606
607 // Delete the persistent representation of this error.
608 fs::path errorPath(paths::error());
609 errorPath /= std::to_string(entryId);
610 fs::remove(errorPath);
611
612 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
613 auto it = std::find(ids.begin(), ids.end(), id);
614 if (it != ids.end())
615 {
616 ids.erase(it);
617 }
618 };
619 if (entryFound->second->severity() >= Entry::sevLowerLimit)
620 {
621 removeId(infoErrors, entryId);
622 }
623 else
624 {
625 removeId(realErrors, entryId);
626 }
627 entries.erase(entryFound);
628
629 checkAndRemoveBlockingError(entryId);
630
631 for (auto& remove : Extensions::getDeleteFunctions())
632 {
633 try
634 {
635 remove(entryId);
636 }
637 catch (const std::exception& e)
638 {
639 lg2::error("An extension's delete function threw an exception: "
640 "{ERROR}",
641 "ERROR", e);
642 }
643 }
644 }
645 else
646 {
647 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
648 }
649 }
650
restore()651 void Manager::restore()
652 {
653 auto sanity = [](const auto& id, const auto& restoredId) {
654 return id == restoredId;
655 };
656
657 fs::path dir(paths::error());
658 if (!fs::exists(dir) || fs::is_empty(dir))
659 {
660 return;
661 }
662
663 for (auto& file : fs::directory_iterator(dir))
664 {
665 auto id = file.path().filename().c_str();
666 auto idNum = std::stol(id);
667 auto e = std::make_unique<Entry>(
668 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
669 if (deserialize(file.path(), *e))
670 {
671 // validate the restored error entry id
672 if (sanity(static_cast<uint32_t>(idNum), e->id()))
673 {
674 e->path(file.path(), true);
675 if (e->severity() >= Entry::sevLowerLimit)
676 {
677 infoErrors.push_back(idNum);
678 }
679 else
680 {
681 realErrors.push_back(idNum);
682 }
683
684 entries.insert(std::make_pair(idNum, std::move(e)));
685 }
686 else
687 {
688 lg2::error(
689 "Failed in sanity check while restoring error entry. "
690 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
691 "ID_NUM", idNum, "ENTRY_ID", e->id());
692 }
693 }
694 }
695
696 if (!entries.empty())
697 {
698 entryId = entries.rbegin()->first;
699 }
700 }
701
readFWVersion()702 std::string Manager::readFWVersion()
703 {
704 auto version = util::getOSReleaseValue("VERSION_ID");
705
706 if (!version)
707 {
708 lg2::error("Unable to read BMC firmware version");
709 }
710
711 return version.value_or("");
712 }
713
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)714 auto Manager::create(const std::string& message, Entry::Level severity,
715 const std::map<std::string, std::string>& additionalData,
716 const FFDCEntries& ffdc) -> sdbusplus::message::object_path
717 {
718 return createEntry(message, severity, additionalData, ffdc);
719 }
720
721 } // namespace internal
722 } // namespace logging
723 } // namespace phosphor
724