xref: /openbmc/phosphor-logging/log_manager.cpp (revision 9cdb955d67edd3b1c2d373ebe68df5f0b0cd74b6)
1 #include "config.h"
2 
3 #include "log_manager.hpp"
4 
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "lib/lg2_commit.hpp"
10 #include "paths.hpp"
11 #include "util.hpp"
12 
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16 
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/vtable.hpp>
19 #include <xyz/openbmc_project/State/Host/server.hpp>
20 
21 #include <cassert>
22 #include <chrono>
23 #include <cstdio>
24 #include <cstring>
25 #include <functional>
26 #include <map>
27 #include <ranges>
28 #include <set>
29 #include <string>
30 #include <string_view>
31 #include <vector>
32 
33 using namespace std::chrono;
34 extern const std::map<
35     phosphor::logging::metadata::Metadata,
36     std::function<phosphor::logging::metadata::associations::Type>>
37     meta;
38 
39 namespace phosphor
40 {
41 namespace logging
42 {
43 namespace internal
44 {
getLevel(const std::string & errMsg)45 inline auto getLevel(const std::string& errMsg)
46 {
47     auto reqLevel = Entry::Level::Error; // Default to Error
48 
49     auto levelmap = g_errLevelMap.find(errMsg);
50     if (levelmap != g_errLevelMap.end())
51     {
52         reqLevel = static_cast<Entry::Level>(levelmap->second);
53     }
54 
55     return reqLevel;
56 }
57 
getRealErrSize()58 int Manager::getRealErrSize()
59 {
60     return realErrors.size();
61 }
62 
getInfoErrSize()63 int Manager::getInfoErrSize()
64 {
65     return infoErrors.size();
66 }
67 
commit(uint64_t transactionId,std::string errMsg)68 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
69 {
70     auto level = getLevel(errMsg);
71     _commit(transactionId, std::move(errMsg), level);
72     return entryId;
73 }
74 
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)75 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
76                                 uint32_t errLvl)
77 {
78     _commit(transactionId, std::move(errMsg),
79             static_cast<Entry::Level>(errLvl));
80     return entryId;
81 }
82 
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)83 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
84                       std::string&& errMsg, Entry::Level errLvl)
85 {
86     std::map<std::string, std::string> additionalData{};
87 
88     // When running as a test-case, the system may have a LOT of journal
89     // data and we may not have permissions to do some of the journal sync
90     // operations.  Just skip over them.
91     if (!IS_UNIT_TEST)
92     {
93         static constexpr auto transactionIdVar =
94             std::string_view{"TRANSACTION_ID"};
95         // Length of 'TRANSACTION_ID' string.
96         static constexpr auto transactionIdVarSize = transactionIdVar.size();
97         // Length of 'TRANSACTION_ID=' string.
98         static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
99 
100         // Flush all the pending log messages into the journal
101         util::journalSync();
102 
103         sd_journal* j = nullptr;
104         int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
105         if (rc < 0)
106         {
107             lg2::error("Failed to open journal: {ERROR}", "ERROR",
108                        strerror(-rc));
109             return;
110         }
111 
112         std::string transactionIdStr = std::to_string(transactionId);
113         std::set<std::string> metalist;
114         auto metamap = g_errMetaMap.find(errMsg);
115         if (metamap != g_errMetaMap.end())
116         {
117             metalist.insert(metamap->second.begin(), metamap->second.end());
118         }
119 
120         // Add _PID field information in AdditionalData.
121         metalist.insert("_PID");
122 
123         // Read the journal from the end to get the most recent entry first.
124         // The result from the sd_journal_get_data() is of the form
125         // VARIABLE=value.
126         SD_JOURNAL_FOREACH_BACKWARDS(j)
127         {
128             const char* data = nullptr;
129             size_t length = 0;
130 
131             // Look for the transaction id metadata variable
132             rc = sd_journal_get_data(j, transactionIdVar.data(),
133                                      (const void**)&data, &length);
134             if (rc < 0)
135             {
136                 // This journal entry does not have the TRANSACTION_ID
137                 // metadata variable.
138                 continue;
139             }
140 
141             // journald does not guarantee that sd_journal_get_data() returns
142             // NULL terminated strings, so need to specify the size to use to
143             // compare, use the returned length instead of anything that relies
144             // on NULL terminators like strlen(). The data variable is in the
145             // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
146             // characters plus the (=) sign to do the comparison. 'data +
147             // transactionIdVarOffset' will be in the form of '1234'. 'length -
148             // transactionIdVarOffset' will be the length of '1234'.
149             if ((length <= (transactionIdVarOffset)) ||
150                 (transactionIdStr.compare(
151                      0, transactionIdStr.size(), data + transactionIdVarOffset,
152                      length - transactionIdVarOffset) != 0))
153             {
154                 // The value of the TRANSACTION_ID metadata is not the requested
155                 // transaction id number.
156                 continue;
157             }
158 
159             // Search for all metadata variables in the current journal entry.
160             for (auto i = metalist.cbegin(); i != metalist.cend();)
161             {
162                 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
163                                          &length);
164                 if (rc < 0)
165                 {
166                     // Metadata variable not found, check next metadata
167                     // variable.
168                     i++;
169                     continue;
170                 }
171 
172                 // Metadata variable found, save it and remove it from the set.
173                 std::string metadata(data, length);
174                 if (auto pos = metadata.find('='); pos != std::string::npos)
175                 {
176                     auto key = metadata.substr(0, pos);
177                     auto value = metadata.substr(pos + 1);
178                     additionalData.emplace(std::move(key), std::move(value));
179                 }
180                 i = metalist.erase(i);
181             }
182             if (metalist.empty())
183             {
184                 // All metadata variables found, break out of journal loop.
185                 break;
186             }
187         }
188         if (!metalist.empty())
189         {
190             // Not all the metadata variables were found in the journal.
191             for (auto& metaVarStr : metalist)
192             {
193                 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
194                           metaVarStr);
195             }
196         }
197 
198         sd_journal_close(j);
199     }
200     createEntry(errMsg, errLvl, additionalData);
201 }
202 
createEntry(std::string errMsg,Entry::Level errLvl,std::map<std::string,std::string> additionalData,const FFDCEntries & ffdc)203 auto Manager::createEntry(std::string errMsg, Entry::Level errLvl,
204                           std::map<std::string, std::string> additionalData,
205                           const FFDCEntries& ffdc)
206     -> sdbusplus::message::object_path
207 {
208     if (!Extensions::disableDefaultLogCaps())
209     {
210         if (errLvl < Entry::sevLowerLimit)
211         {
212             if (realErrors.size() >= ERROR_CAP)
213             {
214                 erase(realErrors.front());
215             }
216         }
217         else
218         {
219             if (infoErrors.size() >= ERROR_INFO_CAP)
220             {
221                 erase(infoErrors.front());
222             }
223         }
224     }
225 
226     entryId++;
227     if (errLvl >= Entry::sevLowerLimit)
228     {
229         infoErrors.push_back(entryId);
230     }
231     else
232     {
233         realErrors.push_back(entryId);
234     }
235     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
236                   std::chrono::system_clock::now().time_since_epoch())
237                   .count();
238     auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
239 
240     AssociationList objects{};
241     auto additionalDataVec = util::additional_data::combine(additionalData);
242     processMetadata(errMsg, additionalDataVec, objects);
243 
244     auto e = std::make_unique<Entry>(
245         busLog, objPath, entryId,
246         ms, // Milliseconds since 1970
247         errLvl, std::move(errMsg), std::move(additionalData),
248         std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
249 
250     serialize(*e);
251 
252     if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
253         isCalloutPresent(*e))
254     {
255         quiesceOnError(entryId);
256     }
257 
258     // Add entry before calling the extensions so that they have access to it
259     entries.insert(std::make_pair(entryId, std::move(e)));
260 
261     doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
262 
263     // Note: No need to close the file descriptors in the FFDC.
264 
265     return objPath;
266 }
267 
createFromEvent(sdbusplus::exception::generated_event_base && event)268 auto Manager::createFromEvent(
269     sdbusplus::exception::generated_event_base&& event)
270     -> sdbusplus::message::object_path
271 {
272     auto [msg, level, data] = lg2::details::extractEvent(std::move(event));
273     return this->createEntry(msg, level, std::move(data));
274 }
275 
isQuiesceOnErrorEnabled()276 bool Manager::isQuiesceOnErrorEnabled()
277 {
278     // When running under tests, the Logging.Settings service will not be
279     // present.  Assume false.
280     if (IS_UNIT_TEST)
281     {
282         return false;
283     }
284 
285     std::variant<bool> property;
286 
287     auto method = this->busLog.new_method_call(
288         "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
289         "org.freedesktop.DBus.Properties", "Get");
290 
291     method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
292 
293     try
294     {
295         auto reply = this->busLog.call(method);
296         reply.read(property);
297     }
298     catch (const sdbusplus::exception_t& e)
299     {
300         lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
301                    e);
302         return false;
303     }
304 
305     return std::get<bool>(property);
306 }
307 
isCalloutPresent(const Entry & entry)308 bool Manager::isCalloutPresent(const Entry& entry)
309 {
310     for (const auto& c : std::views::keys(entry.additionalData()))
311     {
312         if (c.find("CALLOUT_") != std::string::npos)
313         {
314             return true;
315         }
316     }
317 
318     return false;
319 }
320 
findAndRemoveResolvedBlocks()321 void Manager::findAndRemoveResolvedBlocks()
322 {
323     for (auto& entry : entries)
324     {
325         if (entry.second->resolved())
326         {
327             checkAndRemoveBlockingError(entry.first);
328         }
329     }
330 }
331 
onEntryResolve(sdbusplus::message_t & msg)332 void Manager::onEntryResolve(sdbusplus::message_t& msg)
333 {
334     using Interface = std::string;
335     using Property = std::string;
336     using Value = std::string;
337     using Properties = std::map<Property, std::variant<Value>>;
338 
339     Interface interface;
340     Properties properties;
341 
342     msg.read(interface, properties);
343 
344     for (const auto& p : properties)
345     {
346         if (p.first == "Resolved")
347         {
348             findAndRemoveResolvedBlocks();
349             return;
350         }
351     }
352 }
353 
checkAndQuiesceHost()354 void Manager::checkAndQuiesceHost()
355 {
356     using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
357 
358     // First check host state
359     std::variant<Host::HostState> property;
360 
361     auto method = this->busLog.new_method_call(
362         "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
363         "org.freedesktop.DBus.Properties", "Get");
364 
365     method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
366 
367     try
368     {
369         auto reply = this->busLog.call(method);
370         reply.read(property);
371     }
372     catch (const sdbusplus::exception_t& e)
373     {
374         // Quiescing the host is a "best effort" type function. If unable to
375         // read the host state or it comes back empty, just return.
376         // The boot block object will still be created and the associations to
377         // find the log will be present. Don't want a dependency with
378         // phosphor-state-manager service
379         lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
380                   e);
381         return;
382     }
383 
384     auto hostState = std::get<Host::HostState>(property);
385     if (hostState != Host::HostState::Running)
386     {
387         return;
388     }
389 
390     auto quiesce = this->busLog.new_method_call(
391         "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
392         "org.freedesktop.systemd1.Manager", "StartUnit");
393 
394     quiesce.append("obmc-host-graceful-quiesce@0.target");
395     quiesce.append("replace");
396 
397     this->busLog.call_noreply(quiesce);
398 }
399 
quiesceOnError(const uint32_t entryId)400 void Manager::quiesceOnError(const uint32_t entryId)
401 {
402     // Verify we don't already have this entry blocking
403     auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
404                       [&](const std::unique_ptr<Block>& obj) {
405                           return obj->entryId == entryId;
406                       });
407     if (it != this->blockingErrors.end())
408     {
409         // Already recorded so just return
410         lg2::debug(
411             "QuiesceOnError set and callout present but entry already logged");
412         return;
413     }
414 
415     lg2::info("QuiesceOnError set and callout present");
416 
417     auto blockPath =
418         std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
419     auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
420     this->blockingErrors.push_back(std::move(blockObj));
421 
422     // Register call back if log is resolved
423     using namespace sdbusplus::bus::match::rules;
424     auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
425     auto callback = std::make_unique<sdbusplus::bus::match_t>(
426         this->busLog,
427         propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
428         std::bind(std::mem_fn(&Manager::onEntryResolve), this,
429                   std::placeholders::_1));
430 
431     propChangedEntryCallback.insert(
432         std::make_pair(entryId, std::move(callback)));
433 
434     checkAndQuiesceHost();
435 }
436 
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)437 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
438 {
439     // Make the association <endpointpath>/<endpointtype> paths
440     std::vector<std::string> assocs;
441     for (const auto& [forwardType, reverseType, endpoint] :
442          entry.associations())
443     {
444         std::string e{endpoint};
445         e += '/' + reverseType;
446         assocs.push_back(e);
447     }
448 
449     for (auto& create : Extensions::getCreateFunctions())
450     {
451         try
452         {
453             create(entry.message(), entry.id(), entry.timestamp(),
454                    entry.severity(), entry.additionalData(), assocs, ffdc);
455         }
456         catch (const std::exception& e)
457         {
458             lg2::error(
459                 "An extension's create function threw an exception: {ERROR}",
460                 "ERROR", e);
461         }
462     }
463 }
464 
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const465 void Manager::processMetadata(const std::string& /*errorName*/,
466                               const std::vector<std::string>& additionalData,
467                               AssociationList& objects) const
468 {
469     // additionalData is a list of "metadata=value"
470     constexpr auto separator = '=';
471     for (const auto& entryItem : additionalData)
472     {
473         auto found = entryItem.find(separator);
474         if (std::string::npos != found)
475         {
476             auto metadata = entryItem.substr(0, found);
477             auto iter = meta.find(metadata);
478             if (meta.end() != iter)
479             {
480                 (iter->second)(metadata, additionalData, objects);
481             }
482         }
483     }
484 }
485 
checkAndRemoveBlockingError(uint32_t entryId)486 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
487 {
488     // First look for blocking object and remove
489     auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
490                       [&](const std::unique_ptr<Block>& obj) {
491                           return obj->entryId == entryId;
492                       });
493     if (it != blockingErrors.end())
494     {
495         blockingErrors.erase(it);
496     }
497 
498     // Now remove the callback looking for the error to be resolved
499     auto resolveFind = propChangedEntryCallback.find(entryId);
500     if (resolveFind != propChangedEntryCallback.end())
501     {
502         propChangedEntryCallback.erase(resolveFind);
503     }
504 
505     return;
506 }
507 
eraseAll()508 size_t Manager::eraseAll()
509 {
510     std::vector<uint32_t> logIDWithHwIsolation;
511     for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
512     {
513         try
514         {
515             func(logIDWithHwIsolation);
516         }
517         catch (const std::exception& e)
518         {
519             lg2::error("An extension's LogIDWithHwIsolation function threw an "
520                        "exception: {ERROR}",
521                        "ERROR", e);
522         }
523     }
524     size_t entriesSize = entries.size();
525     auto iter = entries.begin();
526     if (logIDWithHwIsolation.empty())
527     {
528         while (iter != entries.end())
529         {
530             auto e = iter->first;
531             ++iter;
532             erase(e);
533         }
534         entryId = 0;
535     }
536     else
537     {
538         while (iter != entries.end())
539         {
540             auto e = iter->first;
541             ++iter;
542             try
543             {
544                 if (!std::ranges::contains(logIDWithHwIsolation, e))
545                 {
546                     erase(e);
547                 }
548                 else
549                 {
550                     entriesSize--;
551                 }
552             }
553             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
554                        Unavailable& e)
555             {
556                 entriesSize--;
557             }
558         }
559         if (!entries.empty())
560         {
561             entryId = std::ranges::max_element(entries, [](const auto& a,
562                                                            const auto& b) {
563                           return a.first < b.first;
564                       })->first;
565         }
566         else
567         {
568             entryId = 0;
569         }
570     }
571     return entriesSize;
572 }
573 
erase(uint32_t entryId)574 void Manager::erase(uint32_t entryId)
575 {
576     auto entryFound = entries.find(entryId);
577     if (entries.end() != entryFound)
578     {
579         for (auto& func : Extensions::getDeleteProhibitedFunctions())
580         {
581             try
582             {
583                 bool prohibited = false;
584                 func(entryId, prohibited);
585                 if (prohibited)
586                 {
587                     throw sdbusplus::xyz::openbmc_project::Common::Error::
588                         Unavailable();
589                 }
590             }
591             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
592                        Unavailable& e)
593             {
594                 throw;
595             }
596             catch (const std::exception& e)
597             {
598                 lg2::error("An extension's deleteProhibited function threw an "
599                            "exception: {ERROR}",
600                            "ERROR", e);
601             }
602         }
603 
604         // Delete the persistent representation of this error.
605         fs::path errorPath(paths::error());
606         errorPath /= std::to_string(entryId);
607         fs::remove(errorPath);
608 
609         auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
610             auto it = std::find(ids.begin(), ids.end(), id);
611             if (it != ids.end())
612             {
613                 ids.erase(it);
614             }
615         };
616         if (entryFound->second->severity() >= Entry::sevLowerLimit)
617         {
618             removeId(infoErrors, entryId);
619         }
620         else
621         {
622             removeId(realErrors, entryId);
623         }
624         entries.erase(entryFound);
625 
626         checkAndRemoveBlockingError(entryId);
627 
628         for (auto& remove : Extensions::getDeleteFunctions())
629         {
630             try
631             {
632                 remove(entryId);
633             }
634             catch (const std::exception& e)
635             {
636                 lg2::error("An extension's delete function threw an exception: "
637                            "{ERROR}",
638                            "ERROR", e);
639             }
640         }
641     }
642     else
643     {
644         lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
645     }
646 }
647 
restore()648 void Manager::restore()
649 {
650     auto sanity = [](const auto& id, const auto& restoredId) {
651         return id == restoredId;
652     };
653 
654     fs::path dir(paths::error());
655     if (!fs::exists(dir) || fs::is_empty(dir))
656     {
657         return;
658     }
659 
660     for (auto& file : fs::directory_iterator(dir))
661     {
662         auto id = file.path().filename().c_str();
663         auto idNum = std::stol(id);
664         auto e = std::make_unique<Entry>(
665             busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
666         if (deserialize(file.path(), *e))
667         {
668             // validate the restored error entry id
669             if (sanity(static_cast<uint32_t>(idNum), e->id()))
670             {
671                 e->path(file.path(), true);
672                 if (e->severity() >= Entry::sevLowerLimit)
673                 {
674                     infoErrors.push_back(idNum);
675                 }
676                 else
677                 {
678                     realErrors.push_back(idNum);
679                 }
680 
681                 entries.insert(std::make_pair(idNum, std::move(e)));
682             }
683             else
684             {
685                 lg2::error(
686                     "Failed in sanity check while restoring error entry. "
687                     "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
688                     "ID_NUM", idNum, "ENTRY_ID", e->id());
689             }
690         }
691     }
692 
693     if (!entries.empty())
694     {
695         entryId = entries.rbegin()->first;
696     }
697 }
698 
readFWVersion()699 std::string Manager::readFWVersion()
700 {
701     auto version = util::getOSReleaseValue("VERSION_ID");
702 
703     if (!version)
704     {
705         lg2::error("Unable to read BMC firmware version");
706     }
707 
708     return version.value_or("");
709 }
710 
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)711 auto Manager::create(const std::string& message, Entry::Level severity,
712                      const std::map<std::string, std::string>& additionalData,
713                      const FFDCEntries& ffdc) -> sdbusplus::message::object_path
714 {
715     return createEntry(message, severity, additionalData, ffdc);
716 }
717 
718 } // namespace internal
719 } // namespace logging
720 } // namespace phosphor
721