1 #include "config.h"
2
3 #include "log_manager.hpp"
4
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "lib/lg2_commit.hpp"
10 #include "paths.hpp"
11 #include "util.hpp"
12
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/vtable.hpp>
19 #include <xyz/openbmc_project/State/Host/server.hpp>
20
21 #include <cassert>
22 #include <chrono>
23 #include <cstdio>
24 #include <cstring>
25 #include <fstream>
26 #include <functional>
27 #include <future>
28 #include <iostream>
29 #include <map>
30 #include <set>
31 #include <string>
32 #include <string_view>
33 #include <vector>
34
35 using namespace std::chrono;
36 extern const std::map<
37 phosphor::logging::metadata::Metadata,
38 std::function<phosphor::logging::metadata::associations::Type>>
39 meta;
40
41 namespace phosphor
42 {
43 namespace logging
44 {
45 namespace internal
46 {
getLevel(const std::string & errMsg)47 inline auto getLevel(const std::string& errMsg)
48 {
49 auto reqLevel = Entry::Level::Error; // Default to Error
50
51 auto levelmap = g_errLevelMap.find(errMsg);
52 if (levelmap != g_errLevelMap.end())
53 {
54 reqLevel = static_cast<Entry::Level>(levelmap->second);
55 }
56
57 return reqLevel;
58 }
59
getRealErrSize()60 int Manager::getRealErrSize()
61 {
62 return realErrors.size();
63 }
64
getInfoErrSize()65 int Manager::getInfoErrSize()
66 {
67 return infoErrors.size();
68 }
69
commit(uint64_t transactionId,std::string errMsg)70 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
71 {
72 auto level = getLevel(errMsg);
73 _commit(transactionId, std::move(errMsg), level);
74 return entryId;
75 }
76
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)77 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
78 uint32_t errLvl)
79 {
80 _commit(transactionId, std::move(errMsg),
81 static_cast<Entry::Level>(errLvl));
82 return entryId;
83 }
84
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)85 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
86 std::string&& errMsg, Entry::Level errLvl)
87 {
88 std::vector<std::string> additionalData{};
89
90 // When running as a test-case, the system may have a LOT of journal
91 // data and we may not have permissions to do some of the journal sync
92 // operations. Just skip over them.
93 if (!IS_UNIT_TEST)
94 {
95 static constexpr auto transactionIdVar =
96 std::string_view{"TRANSACTION_ID"};
97 // Length of 'TRANSACTION_ID' string.
98 static constexpr auto transactionIdVarSize = transactionIdVar.size();
99 // Length of 'TRANSACTION_ID=' string.
100 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
101
102 // Flush all the pending log messages into the journal
103 util::journalSync();
104
105 sd_journal* j = nullptr;
106 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
107 if (rc < 0)
108 {
109 lg2::error("Failed to open journal: {ERROR}", "ERROR",
110 strerror(-rc));
111 return;
112 }
113
114 std::string transactionIdStr = std::to_string(transactionId);
115 std::set<std::string> metalist;
116 auto metamap = g_errMetaMap.find(errMsg);
117 if (metamap != g_errMetaMap.end())
118 {
119 metalist.insert(metamap->second.begin(), metamap->second.end());
120 }
121
122 // Add _PID field information in AdditionalData.
123 metalist.insert("_PID");
124
125 // Read the journal from the end to get the most recent entry first.
126 // The result from the sd_journal_get_data() is of the form
127 // VARIABLE=value.
128 SD_JOURNAL_FOREACH_BACKWARDS(j)
129 {
130 const char* data = nullptr;
131 size_t length = 0;
132
133 // Look for the transaction id metadata variable
134 rc = sd_journal_get_data(j, transactionIdVar.data(),
135 (const void**)&data, &length);
136 if (rc < 0)
137 {
138 // This journal entry does not have the TRANSACTION_ID
139 // metadata variable.
140 continue;
141 }
142
143 // journald does not guarantee that sd_journal_get_data() returns
144 // NULL terminated strings, so need to specify the size to use to
145 // compare, use the returned length instead of anything that relies
146 // on NULL terminators like strlen(). The data variable is in the
147 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
148 // characters plus the (=) sign to do the comparison. 'data +
149 // transactionIdVarOffset' will be in the form of '1234'. 'length -
150 // transactionIdVarOffset' will be the length of '1234'.
151 if ((length <= (transactionIdVarOffset)) ||
152 (transactionIdStr.compare(
153 0, transactionIdStr.size(), data + transactionIdVarOffset,
154 length - transactionIdVarOffset) != 0))
155 {
156 // The value of the TRANSACTION_ID metadata is not the requested
157 // transaction id number.
158 continue;
159 }
160
161 // Search for all metadata variables in the current journal entry.
162 for (auto i = metalist.cbegin(); i != metalist.cend();)
163 {
164 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
165 &length);
166 if (rc < 0)
167 {
168 // Metadata variable not found, check next metadata
169 // variable.
170 i++;
171 continue;
172 }
173
174 // Metadata variable found, save it and remove it from the set.
175 additionalData.emplace_back(data, length);
176 i = metalist.erase(i);
177 }
178 if (metalist.empty())
179 {
180 // All metadata variables found, break out of journal loop.
181 break;
182 }
183 }
184 if (!metalist.empty())
185 {
186 // Not all the metadata variables were found in the journal.
187 for (auto& metaVarStr : metalist)
188 {
189 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
190 metaVarStr);
191 }
192 }
193
194 sd_journal_close(j);
195 }
196 createEntry(errMsg, errLvl, additionalData);
197 }
198
createEntry(std::string errMsg,Entry::Level errLvl,std::vector<std::string> additionalData,const FFDCEntries & ffdc)199 auto Manager::createEntry(
200 std::string errMsg, Entry::Level errLvl,
201 std::vector<std::string> additionalData,
202 const FFDCEntries& ffdc) -> sdbusplus::message::object_path
203 {
204 if (!Extensions::disableDefaultLogCaps())
205 {
206 if (errLvl < Entry::sevLowerLimit)
207 {
208 if (realErrors.size() >= ERROR_CAP)
209 {
210 erase(realErrors.front());
211 }
212 }
213 else
214 {
215 if (infoErrors.size() >= ERROR_INFO_CAP)
216 {
217 erase(infoErrors.front());
218 }
219 }
220 }
221
222 entryId++;
223 if (errLvl >= Entry::sevLowerLimit)
224 {
225 infoErrors.push_back(entryId);
226 }
227 else
228 {
229 realErrors.push_back(entryId);
230 }
231 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
232 std::chrono::system_clock::now().time_since_epoch())
233 .count();
234 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
235
236 AssociationList objects{};
237 processMetadata(errMsg, additionalData, objects);
238
239 auto e = std::make_unique<Entry>(
240 busLog, objPath, entryId,
241 ms, // Milliseconds since 1970
242 errLvl, std::move(errMsg), std::move(additionalData),
243 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
244
245 serialize(*e);
246
247 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
248 isCalloutPresent(*e))
249 {
250 quiesceOnError(entryId);
251 }
252
253 // Add entry before calling the extensions so that they have access to it
254 entries.insert(std::make_pair(entryId, std::move(e)));
255
256 doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
257
258 // Note: No need to close the file descriptors in the FFDC.
259
260 return objPath;
261 }
262
createFromEvent(sdbusplus::exception::generated_event_base && event)263 auto Manager::createFromEvent(sdbusplus::exception::generated_event_base&&
264 event) -> sdbusplus::message::object_path
265 {
266 auto [msg, level, data] = lg2::details::extractEvent(std::move(event));
267 return this->createEntry(msg, level, std::move(data));
268 }
269
isQuiesceOnErrorEnabled()270 bool Manager::isQuiesceOnErrorEnabled()
271 {
272 // When running under tests, the Logging.Settings service will not be
273 // present. Assume false.
274 if (IS_UNIT_TEST)
275 {
276 return false;
277 }
278
279 std::variant<bool> property;
280
281 auto method = this->busLog.new_method_call(
282 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
283 "org.freedesktop.DBus.Properties", "Get");
284
285 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
286
287 try
288 {
289 auto reply = this->busLog.call(method);
290 reply.read(property);
291 }
292 catch (const sdbusplus::exception_t& e)
293 {
294 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
295 e);
296 return false;
297 }
298
299 return std::get<bool>(property);
300 }
301
isCalloutPresent(const Entry & entry)302 bool Manager::isCalloutPresent(const Entry& entry)
303 {
304 for (const auto& c : entry.additionalData())
305 {
306 if (c.find("CALLOUT_") != std::string::npos)
307 {
308 return true;
309 }
310 }
311
312 return false;
313 }
314
findAndRemoveResolvedBlocks()315 void Manager::findAndRemoveResolvedBlocks()
316 {
317 for (auto& entry : entries)
318 {
319 if (entry.second->resolved())
320 {
321 checkAndRemoveBlockingError(entry.first);
322 }
323 }
324 }
325
onEntryResolve(sdbusplus::message_t & msg)326 void Manager::onEntryResolve(sdbusplus::message_t& msg)
327 {
328 using Interface = std::string;
329 using Property = std::string;
330 using Value = std::string;
331 using Properties = std::map<Property, std::variant<Value>>;
332
333 Interface interface;
334 Properties properties;
335
336 msg.read(interface, properties);
337
338 for (const auto& p : properties)
339 {
340 if (p.first == "Resolved")
341 {
342 findAndRemoveResolvedBlocks();
343 return;
344 }
345 }
346 }
347
checkAndQuiesceHost()348 void Manager::checkAndQuiesceHost()
349 {
350 using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
351
352 // First check host state
353 std::variant<Host::HostState> property;
354
355 auto method = this->busLog.new_method_call(
356 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
357 "org.freedesktop.DBus.Properties", "Get");
358
359 method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
360
361 try
362 {
363 auto reply = this->busLog.call(method);
364 reply.read(property);
365 }
366 catch (const sdbusplus::exception_t& e)
367 {
368 // Quiescing the host is a "best effort" type function. If unable to
369 // read the host state or it comes back empty, just return.
370 // The boot block object will still be created and the associations to
371 // find the log will be present. Don't want a dependency with
372 // phosphor-state-manager service
373 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
374 e);
375 return;
376 }
377
378 auto hostState = std::get<Host::HostState>(property);
379 if (hostState != Host::HostState::Running)
380 {
381 return;
382 }
383
384 auto quiesce = this->busLog.new_method_call(
385 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
386 "org.freedesktop.systemd1.Manager", "StartUnit");
387
388 quiesce.append("obmc-host-graceful-quiesce@0.target");
389 quiesce.append("replace");
390
391 this->busLog.call_noreply(quiesce);
392 }
393
quiesceOnError(const uint32_t entryId)394 void Manager::quiesceOnError(const uint32_t entryId)
395 {
396 // Verify we don't already have this entry blocking
397 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
398 [&](const std::unique_ptr<Block>& obj) {
399 return obj->entryId == entryId;
400 });
401 if (it != this->blockingErrors.end())
402 {
403 // Already recorded so just return
404 lg2::debug(
405 "QuiesceOnError set and callout present but entry already logged");
406 return;
407 }
408
409 lg2::info("QuiesceOnError set and callout present");
410
411 auto blockPath =
412 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
413 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
414 this->blockingErrors.push_back(std::move(blockObj));
415
416 // Register call back if log is resolved
417 using namespace sdbusplus::bus::match::rules;
418 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
419 auto callback = std::make_unique<sdbusplus::bus::match_t>(
420 this->busLog,
421 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
422 std::bind(std::mem_fn(&Manager::onEntryResolve), this,
423 std::placeholders::_1));
424
425 propChangedEntryCallback.insert(
426 std::make_pair(entryId, std::move(callback)));
427
428 checkAndQuiesceHost();
429 }
430
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)431 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
432 {
433 // Make the association <endpointpath>/<endpointtype> paths
434 std::vector<std::string> assocs;
435 for (const auto& [forwardType, reverseType, endpoint] :
436 entry.associations())
437 {
438 std::string e{endpoint};
439 e += '/' + reverseType;
440 assocs.push_back(e);
441 }
442
443 for (auto& create : Extensions::getCreateFunctions())
444 {
445 try
446 {
447 create(entry.message(), entry.id(), entry.timestamp(),
448 entry.severity(), entry.additionalData(), assocs, ffdc);
449 }
450 catch (const std::exception& e)
451 {
452 lg2::error(
453 "An extension's create function threw an exception: {ERROR}",
454 "ERROR", e);
455 }
456 }
457 }
458
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const459 void Manager::processMetadata(const std::string& /*errorName*/,
460 const std::vector<std::string>& additionalData,
461 AssociationList& objects) const
462 {
463 // additionalData is a list of "metadata=value"
464 constexpr auto separator = '=';
465 for (const auto& entryItem : additionalData)
466 {
467 auto found = entryItem.find(separator);
468 if (std::string::npos != found)
469 {
470 auto metadata = entryItem.substr(0, found);
471 auto iter = meta.find(metadata);
472 if (meta.end() != iter)
473 {
474 (iter->second)(metadata, additionalData, objects);
475 }
476 }
477 }
478 }
479
checkAndRemoveBlockingError(uint32_t entryId)480 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
481 {
482 // First look for blocking object and remove
483 auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
484 [&](const std::unique_ptr<Block>& obj) {
485 return obj->entryId == entryId;
486 });
487 if (it != blockingErrors.end())
488 {
489 blockingErrors.erase(it);
490 }
491
492 // Now remove the callback looking for the error to be resolved
493 auto resolveFind = propChangedEntryCallback.find(entryId);
494 if (resolveFind != propChangedEntryCallback.end())
495 {
496 propChangedEntryCallback.erase(resolveFind);
497 }
498
499 return;
500 }
501
eraseAll()502 size_t Manager::eraseAll()
503 {
504 std::vector<uint32_t> logIDWithHwIsolation;
505 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
506 {
507 try
508 {
509 func(logIDWithHwIsolation);
510 }
511 catch (const std::exception& e)
512 {
513 lg2::error("An extension's LogIDWithHwIsolation function threw an "
514 "exception: {ERROR}",
515 "ERROR", e);
516 }
517 }
518 size_t entriesSize = entries.size();
519 auto iter = entries.begin();
520 if (logIDWithHwIsolation.empty())
521 {
522 while (iter != entries.end())
523 {
524 auto e = iter->first;
525 ++iter;
526 erase(e);
527 }
528 entryId = 0;
529 }
530 else
531 {
532 while (iter != entries.end())
533 {
534 auto e = iter->first;
535 ++iter;
536 try
537 {
538 if (!std::ranges::contains(logIDWithHwIsolation, e))
539 {
540 erase(e);
541 }
542 else
543 {
544 entriesSize--;
545 }
546 }
547 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
548 Unavailable& e)
549 {
550 entriesSize--;
551 }
552 }
553 if (!entries.empty())
554 {
555 entryId = std::ranges::max_element(entries, [](const auto& a,
556 const auto& b) {
557 return a.first < b.first;
558 })->first;
559 }
560 else
561 {
562 entryId = 0;
563 }
564 }
565 return entriesSize;
566 }
567
erase(uint32_t entryId)568 void Manager::erase(uint32_t entryId)
569 {
570 auto entryFound = entries.find(entryId);
571 if (entries.end() != entryFound)
572 {
573 for (auto& func : Extensions::getDeleteProhibitedFunctions())
574 {
575 try
576 {
577 bool prohibited = false;
578 func(entryId, prohibited);
579 if (prohibited)
580 {
581 throw sdbusplus::xyz::openbmc_project::Common::Error::
582 Unavailable();
583 }
584 }
585 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
586 Unavailable& e)
587 {
588 throw;
589 }
590 catch (const std::exception& e)
591 {
592 lg2::error("An extension's deleteProhibited function threw an "
593 "exception: {ERROR}",
594 "ERROR", e);
595 }
596 }
597
598 // Delete the persistent representation of this error.
599 fs::path errorPath(paths::error());
600 errorPath /= std::to_string(entryId);
601 fs::remove(errorPath);
602
603 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
604 auto it = std::find(ids.begin(), ids.end(), id);
605 if (it != ids.end())
606 {
607 ids.erase(it);
608 }
609 };
610 if (entryFound->second->severity() >= Entry::sevLowerLimit)
611 {
612 removeId(infoErrors, entryId);
613 }
614 else
615 {
616 removeId(realErrors, entryId);
617 }
618 entries.erase(entryFound);
619
620 checkAndRemoveBlockingError(entryId);
621
622 for (auto& remove : Extensions::getDeleteFunctions())
623 {
624 try
625 {
626 remove(entryId);
627 }
628 catch (const std::exception& e)
629 {
630 lg2::error("An extension's delete function threw an exception: "
631 "{ERROR}",
632 "ERROR", e);
633 }
634 }
635 }
636 else
637 {
638 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
639 }
640 }
641
restore()642 void Manager::restore()
643 {
644 auto sanity = [](const auto& id, const auto& restoredId) {
645 return id == restoredId;
646 };
647
648 fs::path dir(paths::error());
649 if (!fs::exists(dir) || fs::is_empty(dir))
650 {
651 return;
652 }
653
654 for (auto& file : fs::directory_iterator(dir))
655 {
656 auto id = file.path().filename().c_str();
657 auto idNum = std::stol(id);
658 auto e = std::make_unique<Entry>(
659 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
660 if (deserialize(file.path(), *e))
661 {
662 // validate the restored error entry id
663 if (sanity(static_cast<uint32_t>(idNum), e->id()))
664 {
665 e->path(file.path(), true);
666 if (e->severity() >= Entry::sevLowerLimit)
667 {
668 infoErrors.push_back(idNum);
669 }
670 else
671 {
672 realErrors.push_back(idNum);
673 }
674
675 entries.insert(std::make_pair(idNum, std::move(e)));
676 }
677 else
678 {
679 lg2::error(
680 "Failed in sanity check while restoring error entry. "
681 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
682 "ID_NUM", idNum, "ENTRY_ID", e->id());
683 }
684 }
685 }
686
687 if (!entries.empty())
688 {
689 entryId = entries.rbegin()->first;
690 }
691 }
692
readFWVersion()693 std::string Manager::readFWVersion()
694 {
695 auto version = util::getOSReleaseValue("VERSION_ID");
696
697 if (!version)
698 {
699 lg2::error("Unable to read BMC firmware version");
700 }
701
702 return version.value_or("");
703 }
704
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)705 auto Manager::create(const std::string& message, Entry::Level severity,
706 const std::map<std::string, std::string>& additionalData,
707 const FFDCEntries& ffdc) -> sdbusplus::message::object_path
708 {
709 // Convert the map into a vector of "key=value" strings
710 auto ad = util::additional_data::combine(additionalData);
711
712 return createEntry(message, severity, ad, ffdc);
713 }
714
715 } // namespace internal
716 } // namespace logging
717 } // namespace phosphor
718