1 #include "config.h"
2
3 #include "log_manager.hpp"
4
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "paths.hpp"
10 #include "util.hpp"
11
12 #include <systemd/sd-bus.h>
13 #include <systemd/sd-journal.h>
14 #include <unistd.h>
15
16 #include <phosphor-logging/lg2.hpp>
17 #include <sdbusplus/vtable.hpp>
18 #include <xyz/openbmc_project/State/Host/server.hpp>
19
20 #include <cassert>
21 #include <chrono>
22 #include <cstdio>
23 #include <cstring>
24 #include <fstream>
25 #include <functional>
26 #include <future>
27 #include <iostream>
28 #include <map>
29 #include <set>
30 #include <string>
31 #include <string_view>
32 #include <vector>
33
34 using namespace std::chrono;
35 extern const std::map<
36 phosphor::logging::metadata::Metadata,
37 std::function<phosphor::logging::metadata::associations::Type>>
38 meta;
39
40 namespace phosphor
41 {
42 namespace logging
43 {
44 namespace internal
45 {
getLevel(const std::string & errMsg)46 inline auto getLevel(const std::string& errMsg)
47 {
48 auto reqLevel = Entry::Level::Error; // Default to Error
49
50 auto levelmap = g_errLevelMap.find(errMsg);
51 if (levelmap != g_errLevelMap.end())
52 {
53 reqLevel = static_cast<Entry::Level>(levelmap->second);
54 }
55
56 return reqLevel;
57 }
58
getRealErrSize()59 int Manager::getRealErrSize()
60 {
61 return realErrors.size();
62 }
63
getInfoErrSize()64 int Manager::getInfoErrSize()
65 {
66 return infoErrors.size();
67 }
68
commit(uint64_t transactionId,std::string errMsg)69 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
70 {
71 auto level = getLevel(errMsg);
72 _commit(transactionId, std::move(errMsg), level);
73 return entryId;
74 }
75
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)76 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
77 uint32_t errLvl)
78 {
79 _commit(transactionId, std::move(errMsg),
80 static_cast<Entry::Level>(errLvl));
81 return entryId;
82 }
83
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)84 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
85 std::string&& errMsg, Entry::Level errLvl)
86 {
87 std::vector<std::string> additionalData{};
88
89 // When running as a test-case, the system may have a LOT of journal
90 // data and we may not have permissions to do some of the journal sync
91 // operations. Just skip over them.
92 if (!IS_UNIT_TEST)
93 {
94 static constexpr auto transactionIdVar =
95 std::string_view{"TRANSACTION_ID"};
96 // Length of 'TRANSACTION_ID' string.
97 static constexpr auto transactionIdVarSize = transactionIdVar.size();
98 // Length of 'TRANSACTION_ID=' string.
99 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
100
101 // Flush all the pending log messages into the journal
102 util::journalSync();
103
104 sd_journal* j = nullptr;
105 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
106 if (rc < 0)
107 {
108 lg2::error("Failed to open journal: {ERROR}", "ERROR",
109 strerror(-rc));
110 return;
111 }
112
113 std::string transactionIdStr = std::to_string(transactionId);
114 std::set<std::string> metalist;
115 auto metamap = g_errMetaMap.find(errMsg);
116 if (metamap != g_errMetaMap.end())
117 {
118 metalist.insert(metamap->second.begin(), metamap->second.end());
119 }
120
121 // Add _PID field information in AdditionalData.
122 metalist.insert("_PID");
123
124 // Read the journal from the end to get the most recent entry first.
125 // The result from the sd_journal_get_data() is of the form
126 // VARIABLE=value.
127 SD_JOURNAL_FOREACH_BACKWARDS(j)
128 {
129 const char* data = nullptr;
130 size_t length = 0;
131
132 // Look for the transaction id metadata variable
133 rc = sd_journal_get_data(j, transactionIdVar.data(),
134 (const void**)&data, &length);
135 if (rc < 0)
136 {
137 // This journal entry does not have the TRANSACTION_ID
138 // metadata variable.
139 continue;
140 }
141
142 // journald does not guarantee that sd_journal_get_data() returns
143 // NULL terminated strings, so need to specify the size to use to
144 // compare, use the returned length instead of anything that relies
145 // on NULL terminators like strlen(). The data variable is in the
146 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
147 // characters plus the (=) sign to do the comparison. 'data +
148 // transactionIdVarOffset' will be in the form of '1234'. 'length -
149 // transactionIdVarOffset' will be the length of '1234'.
150 if ((length <= (transactionIdVarOffset)) ||
151 (transactionIdStr.compare(
152 0, transactionIdStr.size(), data + transactionIdVarOffset,
153 length - transactionIdVarOffset) != 0))
154 {
155 // The value of the TRANSACTION_ID metadata is not the requested
156 // transaction id number.
157 continue;
158 }
159
160 // Search for all metadata variables in the current journal entry.
161 for (auto i = metalist.cbegin(); i != metalist.cend();)
162 {
163 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
164 &length);
165 if (rc < 0)
166 {
167 // Metadata variable not found, check next metadata
168 // variable.
169 i++;
170 continue;
171 }
172
173 // Metadata variable found, save it and remove it from the set.
174 additionalData.emplace_back(data, length);
175 i = metalist.erase(i);
176 }
177 if (metalist.empty())
178 {
179 // All metadata variables found, break out of journal loop.
180 break;
181 }
182 }
183 if (!metalist.empty())
184 {
185 // Not all the metadata variables were found in the journal.
186 for (auto& metaVarStr : metalist)
187 {
188 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
189 metaVarStr);
190 }
191 }
192
193 sd_journal_close(j);
194 }
195 createEntry(errMsg, errLvl, additionalData);
196 }
197
createEntry(std::string errMsg,Entry::Level errLvl,std::vector<std::string> additionalData,const FFDCEntries & ffdc)198 auto Manager::createEntry(
199 std::string errMsg, Entry::Level errLvl,
200 std::vector<std::string> additionalData,
201 const FFDCEntries& ffdc) -> sdbusplus::message::object_path
202 {
203 if (!Extensions::disableDefaultLogCaps())
204 {
205 if (errLvl < Entry::sevLowerLimit)
206 {
207 if (realErrors.size() >= ERROR_CAP)
208 {
209 erase(realErrors.front());
210 }
211 }
212 else
213 {
214 if (infoErrors.size() >= ERROR_INFO_CAP)
215 {
216 erase(infoErrors.front());
217 }
218 }
219 }
220
221 entryId++;
222 if (errLvl >= Entry::sevLowerLimit)
223 {
224 infoErrors.push_back(entryId);
225 }
226 else
227 {
228 realErrors.push_back(entryId);
229 }
230 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
231 std::chrono::system_clock::now().time_since_epoch())
232 .count();
233 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
234
235 AssociationList objects{};
236 processMetadata(errMsg, additionalData, objects);
237
238 auto e = std::make_unique<Entry>(
239 busLog, objPath, entryId,
240 ms, // Milliseconds since 1970
241 errLvl, std::move(errMsg), std::move(additionalData),
242 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
243
244 serialize(*e);
245
246 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
247 isCalloutPresent(*e))
248 {
249 quiesceOnError(entryId);
250 }
251
252 // Add entry before calling the extensions so that they have access to it
253 entries.insert(std::make_pair(entryId, std::move(e)));
254
255 doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
256
257 // Note: No need to close the file descriptors in the FFDC.
258
259 return objPath;
260 }
261
isQuiesceOnErrorEnabled()262 bool Manager::isQuiesceOnErrorEnabled()
263 {
264 // When running under tests, the Logging.Settings service will not be
265 // present. Assume false.
266 if (IS_UNIT_TEST)
267 {
268 return false;
269 }
270
271 std::variant<bool> property;
272
273 auto method = this->busLog.new_method_call(
274 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
275 "org.freedesktop.DBus.Properties", "Get");
276
277 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
278
279 try
280 {
281 auto reply = this->busLog.call(method);
282 reply.read(property);
283 }
284 catch (const sdbusplus::exception_t& e)
285 {
286 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
287 e);
288 return false;
289 }
290
291 return std::get<bool>(property);
292 }
293
isCalloutPresent(const Entry & entry)294 bool Manager::isCalloutPresent(const Entry& entry)
295 {
296 for (const auto& c : entry.additionalData())
297 {
298 if (c.find("CALLOUT_") != std::string::npos)
299 {
300 return true;
301 }
302 }
303
304 return false;
305 }
306
findAndRemoveResolvedBlocks()307 void Manager::findAndRemoveResolvedBlocks()
308 {
309 for (auto& entry : entries)
310 {
311 if (entry.second->resolved())
312 {
313 checkAndRemoveBlockingError(entry.first);
314 }
315 }
316 }
317
onEntryResolve(sdbusplus::message_t & msg)318 void Manager::onEntryResolve(sdbusplus::message_t& msg)
319 {
320 using Interface = std::string;
321 using Property = std::string;
322 using Value = std::string;
323 using Properties = std::map<Property, std::variant<Value>>;
324
325 Interface interface;
326 Properties properties;
327
328 msg.read(interface, properties);
329
330 for (const auto& p : properties)
331 {
332 if (p.first == "Resolved")
333 {
334 findAndRemoveResolvedBlocks();
335 return;
336 }
337 }
338 }
339
checkAndQuiesceHost()340 void Manager::checkAndQuiesceHost()
341 {
342 using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
343
344 // First check host state
345 std::variant<Host::HostState> property;
346
347 auto method = this->busLog.new_method_call(
348 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
349 "org.freedesktop.DBus.Properties", "Get");
350
351 method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
352
353 try
354 {
355 auto reply = this->busLog.call(method);
356 reply.read(property);
357 }
358 catch (const sdbusplus::exception_t& e)
359 {
360 // Quiescing the host is a "best effort" type function. If unable to
361 // read the host state or it comes back empty, just return.
362 // The boot block object will still be created and the associations to
363 // find the log will be present. Don't want a dependency with
364 // phosphor-state-manager service
365 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
366 e);
367 return;
368 }
369
370 auto hostState = std::get<Host::HostState>(property);
371 if (hostState != Host::HostState::Running)
372 {
373 return;
374 }
375
376 auto quiesce = this->busLog.new_method_call(
377 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
378 "org.freedesktop.systemd1.Manager", "StartUnit");
379
380 quiesce.append("obmc-host-graceful-quiesce@0.target");
381 quiesce.append("replace");
382
383 this->busLog.call_noreply(quiesce);
384 }
385
quiesceOnError(const uint32_t entryId)386 void Manager::quiesceOnError(const uint32_t entryId)
387 {
388 // Verify we don't already have this entry blocking
389 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
390 [&](const std::unique_ptr<Block>& obj) {
391 return obj->entryId == entryId;
392 });
393 if (it != this->blockingErrors.end())
394 {
395 // Already recorded so just return
396 lg2::debug(
397 "QuiesceOnError set and callout present but entry already logged");
398 return;
399 }
400
401 lg2::info("QuiesceOnError set and callout present");
402
403 auto blockPath =
404 std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
405 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
406 this->blockingErrors.push_back(std::move(blockObj));
407
408 // Register call back if log is resolved
409 using namespace sdbusplus::bus::match::rules;
410 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
411 auto callback = std::make_unique<sdbusplus::bus::match_t>(
412 this->busLog,
413 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
414 std::bind(std::mem_fn(&Manager::onEntryResolve), this,
415 std::placeholders::_1));
416
417 propChangedEntryCallback.insert(
418 std::make_pair(entryId, std::move(callback)));
419
420 checkAndQuiesceHost();
421 }
422
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)423 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
424 {
425 // Make the association <endpointpath>/<endpointtype> paths
426 std::vector<std::string> assocs;
427 for (const auto& [forwardType, reverseType, endpoint] :
428 entry.associations())
429 {
430 std::string e{endpoint};
431 e += '/' + reverseType;
432 assocs.push_back(e);
433 }
434
435 for (auto& create : Extensions::getCreateFunctions())
436 {
437 try
438 {
439 create(entry.message(), entry.id(), entry.timestamp(),
440 entry.severity(), entry.additionalData(), assocs, ffdc);
441 }
442 catch (const std::exception& e)
443 {
444 lg2::error(
445 "An extension's create function threw an exception: {ERROR}",
446 "ERROR", e);
447 }
448 }
449 }
450
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const451 void Manager::processMetadata(const std::string& /*errorName*/,
452 const std::vector<std::string>& additionalData,
453 AssociationList& objects) const
454 {
455 // additionalData is a list of "metadata=value"
456 constexpr auto separator = '=';
457 for (const auto& entryItem : additionalData)
458 {
459 auto found = entryItem.find(separator);
460 if (std::string::npos != found)
461 {
462 auto metadata = entryItem.substr(0, found);
463 auto iter = meta.find(metadata);
464 if (meta.end() != iter)
465 {
466 (iter->second)(metadata, additionalData, objects);
467 }
468 }
469 }
470 }
471
checkAndRemoveBlockingError(uint32_t entryId)472 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
473 {
474 // First look for blocking object and remove
475 auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
476 [&](const std::unique_ptr<Block>& obj) {
477 return obj->entryId == entryId;
478 });
479 if (it != blockingErrors.end())
480 {
481 blockingErrors.erase(it);
482 }
483
484 // Now remove the callback looking for the error to be resolved
485 auto resolveFind = propChangedEntryCallback.find(entryId);
486 if (resolveFind != propChangedEntryCallback.end())
487 {
488 propChangedEntryCallback.erase(resolveFind);
489 }
490
491 return;
492 }
493
eraseAll()494 size_t Manager::eraseAll()
495 {
496 std::vector<uint32_t> logIDWithHwIsolation;
497 for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
498 {
499 try
500 {
501 func(logIDWithHwIsolation);
502 }
503 catch (const std::exception& e)
504 {
505 lg2::error("An extension's LogIDWithHwIsolation function threw an "
506 "exception: {ERROR}",
507 "ERROR", e);
508 }
509 }
510 size_t entriesSize = entries.size();
511 auto iter = entries.begin();
512 if (logIDWithHwIsolation.empty())
513 {
514 while (iter != entries.end())
515 {
516 auto e = iter->first;
517 ++iter;
518 erase(e);
519 }
520 entryId = 0;
521 }
522 else
523 {
524 while (iter != entries.end())
525 {
526 auto e = iter->first;
527 ++iter;
528 try
529 {
530 if (!std::ranges::contains(logIDWithHwIsolation, e))
531 {
532 erase(e);
533 }
534 else
535 {
536 entriesSize--;
537 }
538 }
539 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
540 Unavailable& e)
541 {
542 entriesSize--;
543 }
544 }
545 if (!entries.empty())
546 {
547 entryId = std::ranges::max_element(entries, [](const auto& a,
548 const auto& b) {
549 return a.first < b.first;
550 })->first;
551 }
552 else
553 {
554 entryId = 0;
555 }
556 }
557 return entriesSize;
558 }
559
erase(uint32_t entryId)560 void Manager::erase(uint32_t entryId)
561 {
562 auto entryFound = entries.find(entryId);
563 if (entries.end() != entryFound)
564 {
565 for (auto& func : Extensions::getDeleteProhibitedFunctions())
566 {
567 try
568 {
569 bool prohibited = false;
570 func(entryId, prohibited);
571 if (prohibited)
572 {
573 throw sdbusplus::xyz::openbmc_project::Common::Error::
574 Unavailable();
575 }
576 }
577 catch (const sdbusplus::xyz::openbmc_project::Common::Error::
578 Unavailable& e)
579 {
580 throw;
581 }
582 catch (const std::exception& e)
583 {
584 lg2::error("An extension's deleteProhibited function threw an "
585 "exception: {ERROR}",
586 "ERROR", e);
587 }
588 }
589
590 // Delete the persistent representation of this error.
591 fs::path errorPath(paths::error());
592 errorPath /= std::to_string(entryId);
593 fs::remove(errorPath);
594
595 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
596 auto it = std::find(ids.begin(), ids.end(), id);
597 if (it != ids.end())
598 {
599 ids.erase(it);
600 }
601 };
602 if (entryFound->second->severity() >= Entry::sevLowerLimit)
603 {
604 removeId(infoErrors, entryId);
605 }
606 else
607 {
608 removeId(realErrors, entryId);
609 }
610 entries.erase(entryFound);
611
612 checkAndRemoveBlockingError(entryId);
613
614 for (auto& remove : Extensions::getDeleteFunctions())
615 {
616 try
617 {
618 remove(entryId);
619 }
620 catch (const std::exception& e)
621 {
622 lg2::error("An extension's delete function threw an exception: "
623 "{ERROR}",
624 "ERROR", e);
625 }
626 }
627 }
628 else
629 {
630 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
631 }
632 }
633
restore()634 void Manager::restore()
635 {
636 auto sanity = [](const auto& id, const auto& restoredId) {
637 return id == restoredId;
638 };
639
640 fs::path dir(paths::error());
641 if (!fs::exists(dir) || fs::is_empty(dir))
642 {
643 return;
644 }
645
646 for (auto& file : fs::directory_iterator(dir))
647 {
648 auto id = file.path().filename().c_str();
649 auto idNum = std::stol(id);
650 auto e = std::make_unique<Entry>(
651 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
652 if (deserialize(file.path(), *e))
653 {
654 // validate the restored error entry id
655 if (sanity(static_cast<uint32_t>(idNum), e->id()))
656 {
657 e->path(file.path(), true);
658 if (e->severity() >= Entry::sevLowerLimit)
659 {
660 infoErrors.push_back(idNum);
661 }
662 else
663 {
664 realErrors.push_back(idNum);
665 }
666
667 entries.insert(std::make_pair(idNum, std::move(e)));
668 }
669 else
670 {
671 lg2::error(
672 "Failed in sanity check while restoring error entry. "
673 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
674 "ID_NUM", idNum, "ENTRY_ID", e->id());
675 }
676 }
677 }
678
679 if (!entries.empty())
680 {
681 entryId = entries.rbegin()->first;
682 }
683 }
684
readFWVersion()685 std::string Manager::readFWVersion()
686 {
687 auto version = util::getOSReleaseValue("VERSION_ID");
688
689 if (!version)
690 {
691 lg2::error("Unable to read BMC firmware version");
692 }
693
694 return version.value_or("");
695 }
696
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)697 auto Manager::create(const std::string& message, Entry::Level severity,
698 const std::map<std::string, std::string>& additionalData,
699 const FFDCEntries& ffdc) -> sdbusplus::message::object_path
700 {
701 // Convert the map into a vector of "key=value" strings
702 std::vector<std::string> ad;
703 metadata::associations::combine(additionalData, ad);
704
705 return createEntry(message, severity, ad, ffdc);
706 }
707
708 } // namespace internal
709 } // namespace logging
710 } // namespace phosphor
711