xref: /openbmc/phosphor-logging/log_manager.cpp (revision b6d3e2fc727801053925d99081486989fc320900)
1 #include "config.h"
2 
3 #include "log_manager.hpp"
4 
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "lib/lg2_commit.hpp"
10 #include "paths.hpp"
11 #include "util.hpp"
12 
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16 
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/vtable.hpp>
19 #include <xyz/openbmc_project/State/Host/server.hpp>
20 
21 #include <cassert>
22 #include <chrono>
23 #include <cstdio>
24 #include <cstring>
25 #include <fstream>
26 #include <functional>
27 #include <future>
28 #include <iostream>
29 #include <map>
30 #include <ranges>
31 #include <set>
32 #include <string>
33 #include <string_view>
34 #include <vector>
35 
36 using namespace std::chrono;
37 extern const std::map<
38     phosphor::logging::metadata::Metadata,
39     std::function<phosphor::logging::metadata::associations::Type>>
40     meta;
41 
42 namespace phosphor
43 {
44 namespace logging
45 {
46 namespace internal
47 {
getLevel(const std::string & errMsg)48 inline auto getLevel(const std::string& errMsg)
49 {
50     auto reqLevel = Entry::Level::Error; // Default to Error
51 
52     auto levelmap = g_errLevelMap.find(errMsg);
53     if (levelmap != g_errLevelMap.end())
54     {
55         reqLevel = static_cast<Entry::Level>(levelmap->second);
56     }
57 
58     return reqLevel;
59 }
60 
getRealErrSize()61 int Manager::getRealErrSize()
62 {
63     return realErrors.size();
64 }
65 
getInfoErrSize()66 int Manager::getInfoErrSize()
67 {
68     return infoErrors.size();
69 }
70 
commit(uint64_t transactionId,std::string errMsg)71 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
72 {
73     auto level = getLevel(errMsg);
74     _commit(transactionId, std::move(errMsg), level);
75     return entryId;
76 }
77 
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)78 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
79                                 uint32_t errLvl)
80 {
81     _commit(transactionId, std::move(errMsg),
82             static_cast<Entry::Level>(errLvl));
83     return entryId;
84 }
85 
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)86 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
87                       std::string&& errMsg, Entry::Level errLvl)
88 {
89     std::map<std::string, std::string> additionalData{};
90 
91     // When running as a test-case, the system may have a LOT of journal
92     // data and we may not have permissions to do some of the journal sync
93     // operations.  Just skip over them.
94     if (!IS_UNIT_TEST)
95     {
96         static constexpr auto transactionIdVar =
97             std::string_view{"TRANSACTION_ID"};
98         // Length of 'TRANSACTION_ID' string.
99         static constexpr auto transactionIdVarSize = transactionIdVar.size();
100         // Length of 'TRANSACTION_ID=' string.
101         static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
102 
103         // Flush all the pending log messages into the journal
104         util::journalSync();
105 
106         sd_journal* j = nullptr;
107         int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
108         if (rc < 0)
109         {
110             lg2::error("Failed to open journal: {ERROR}", "ERROR",
111                        strerror(-rc));
112             return;
113         }
114 
115         std::string transactionIdStr = std::to_string(transactionId);
116         std::set<std::string> metalist;
117         auto metamap = g_errMetaMap.find(errMsg);
118         if (metamap != g_errMetaMap.end())
119         {
120             metalist.insert(metamap->second.begin(), metamap->second.end());
121         }
122 
123         // Add _PID field information in AdditionalData.
124         metalist.insert("_PID");
125 
126         // Read the journal from the end to get the most recent entry first.
127         // The result from the sd_journal_get_data() is of the form
128         // VARIABLE=value.
129         SD_JOURNAL_FOREACH_BACKWARDS(j)
130         {
131             const char* data = nullptr;
132             size_t length = 0;
133 
134             // Look for the transaction id metadata variable
135             rc = sd_journal_get_data(j, transactionIdVar.data(),
136                                      (const void**)&data, &length);
137             if (rc < 0)
138             {
139                 // This journal entry does not have the TRANSACTION_ID
140                 // metadata variable.
141                 continue;
142             }
143 
144             // journald does not guarantee that sd_journal_get_data() returns
145             // NULL terminated strings, so need to specify the size to use to
146             // compare, use the returned length instead of anything that relies
147             // on NULL terminators like strlen(). The data variable is in the
148             // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
149             // characters plus the (=) sign to do the comparison. 'data +
150             // transactionIdVarOffset' will be in the form of '1234'. 'length -
151             // transactionIdVarOffset' will be the length of '1234'.
152             if ((length <= (transactionIdVarOffset)) ||
153                 (transactionIdStr.compare(
154                      0, transactionIdStr.size(), data + transactionIdVarOffset,
155                      length - transactionIdVarOffset) != 0))
156             {
157                 // The value of the TRANSACTION_ID metadata is not the requested
158                 // transaction id number.
159                 continue;
160             }
161 
162             // Search for all metadata variables in the current journal entry.
163             for (auto i = metalist.cbegin(); i != metalist.cend();)
164             {
165                 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
166                                          &length);
167                 if (rc < 0)
168                 {
169                     // Metadata variable not found, check next metadata
170                     // variable.
171                     i++;
172                     continue;
173                 }
174 
175                 // Metadata variable found, save it and remove it from the set.
176                 std::string metadata(data, length);
177                 if (auto pos = metadata.find('='); pos != std::string::npos)
178                 {
179                     auto key = metadata.substr(0, pos);
180                     auto value = metadata.substr(pos + 1);
181                     additionalData.emplace(std::move(key), std::move(value));
182                 }
183                 i = metalist.erase(i);
184             }
185             if (metalist.empty())
186             {
187                 // All metadata variables found, break out of journal loop.
188                 break;
189             }
190         }
191         if (!metalist.empty())
192         {
193             // Not all the metadata variables were found in the journal.
194             for (auto& metaVarStr : metalist)
195             {
196                 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
197                           metaVarStr);
198             }
199         }
200 
201         sd_journal_close(j);
202     }
203     createEntry(errMsg, errLvl, additionalData);
204 }
205 
createEntry(std::string errMsg,Entry::Level errLvl,std::map<std::string,std::string> additionalData,const FFDCEntries & ffdc)206 auto Manager::createEntry(std::string errMsg, Entry::Level errLvl,
207                           std::map<std::string, std::string> additionalData,
208                           const FFDCEntries& ffdc)
209     -> sdbusplus::message::object_path
210 {
211     if (!Extensions::disableDefaultLogCaps())
212     {
213         if (errLvl < Entry::sevLowerLimit)
214         {
215             if (realErrors.size() >= ERROR_CAP)
216             {
217                 erase(realErrors.front());
218             }
219         }
220         else
221         {
222             if (infoErrors.size() >= ERROR_INFO_CAP)
223             {
224                 erase(infoErrors.front());
225             }
226         }
227     }
228 
229     entryId++;
230     if (errLvl >= Entry::sevLowerLimit)
231     {
232         infoErrors.push_back(entryId);
233     }
234     else
235     {
236         realErrors.push_back(entryId);
237     }
238     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
239                   std::chrono::system_clock::now().time_since_epoch())
240                   .count();
241     auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
242 
243     AssociationList objects{};
244     auto additionalDataVec = util::additional_data::combine(additionalData);
245     processMetadata(errMsg, additionalDataVec, objects);
246 
247     auto e = std::make_unique<Entry>(
248         busLog, objPath, entryId,
249         ms, // Milliseconds since 1970
250         errLvl, std::move(errMsg), std::move(additionalData),
251         std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
252 
253     serialize(*e);
254 
255     if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
256         isCalloutPresent(*e))
257     {
258         quiesceOnError(entryId);
259     }
260 
261     // Add entry before calling the extensions so that they have access to it
262     entries.insert(std::make_pair(entryId, std::move(e)));
263 
264     doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
265 
266     // Note: No need to close the file descriptors in the FFDC.
267 
268     return objPath;
269 }
270 
271 auto
createFromEvent(sdbusplus::exception::generated_event_base && event)272     Manager::createFromEvent(sdbusplus::exception::generated_event_base&& event)
273         -> sdbusplus::message::object_path
274 {
275     auto [msg, level, data] = lg2::details::extractEvent(std::move(event));
276     return this->createEntry(msg, level, std::move(data));
277 }
278 
isQuiesceOnErrorEnabled()279 bool Manager::isQuiesceOnErrorEnabled()
280 {
281     // When running under tests, the Logging.Settings service will not be
282     // present.  Assume false.
283     if (IS_UNIT_TEST)
284     {
285         return false;
286     }
287 
288     std::variant<bool> property;
289 
290     auto method = this->busLog.new_method_call(
291         "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
292         "org.freedesktop.DBus.Properties", "Get");
293 
294     method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
295 
296     try
297     {
298         auto reply = this->busLog.call(method);
299         reply.read(property);
300     }
301     catch (const sdbusplus::exception_t& e)
302     {
303         lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
304                    e);
305         return false;
306     }
307 
308     return std::get<bool>(property);
309 }
310 
isCalloutPresent(const Entry & entry)311 bool Manager::isCalloutPresent(const Entry& entry)
312 {
313     for (const auto& c : std::views::keys(entry.additionalData()))
314     {
315         if (c.find("CALLOUT_") != std::string::npos)
316         {
317             return true;
318         }
319     }
320 
321     return false;
322 }
323 
findAndRemoveResolvedBlocks()324 void Manager::findAndRemoveResolvedBlocks()
325 {
326     for (auto& entry : entries)
327     {
328         if (entry.second->resolved())
329         {
330             checkAndRemoveBlockingError(entry.first);
331         }
332     }
333 }
334 
onEntryResolve(sdbusplus::message_t & msg)335 void Manager::onEntryResolve(sdbusplus::message_t& msg)
336 {
337     using Interface = std::string;
338     using Property = std::string;
339     using Value = std::string;
340     using Properties = std::map<Property, std::variant<Value>>;
341 
342     Interface interface;
343     Properties properties;
344 
345     msg.read(interface, properties);
346 
347     for (const auto& p : properties)
348     {
349         if (p.first == "Resolved")
350         {
351             findAndRemoveResolvedBlocks();
352             return;
353         }
354     }
355 }
356 
checkAndQuiesceHost()357 void Manager::checkAndQuiesceHost()
358 {
359     using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
360 
361     // First check host state
362     std::variant<Host::HostState> property;
363 
364     auto method = this->busLog.new_method_call(
365         "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
366         "org.freedesktop.DBus.Properties", "Get");
367 
368     method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
369 
370     try
371     {
372         auto reply = this->busLog.call(method);
373         reply.read(property);
374     }
375     catch (const sdbusplus::exception_t& e)
376     {
377         // Quiescing the host is a "best effort" type function. If unable to
378         // read the host state or it comes back empty, just return.
379         // The boot block object will still be created and the associations to
380         // find the log will be present. Don't want a dependency with
381         // phosphor-state-manager service
382         lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
383                   e);
384         return;
385     }
386 
387     auto hostState = std::get<Host::HostState>(property);
388     if (hostState != Host::HostState::Running)
389     {
390         return;
391     }
392 
393     auto quiesce = this->busLog.new_method_call(
394         "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
395         "org.freedesktop.systemd1.Manager", "StartUnit");
396 
397     quiesce.append("obmc-host-graceful-quiesce@0.target");
398     quiesce.append("replace");
399 
400     this->busLog.call_noreply(quiesce);
401 }
402 
quiesceOnError(const uint32_t entryId)403 void Manager::quiesceOnError(const uint32_t entryId)
404 {
405     // Verify we don't already have this entry blocking
406     auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
407                       [&](const std::unique_ptr<Block>& obj) {
408                           return obj->entryId == entryId;
409                       });
410     if (it != this->blockingErrors.end())
411     {
412         // Already recorded so just return
413         lg2::debug(
414             "QuiesceOnError set and callout present but entry already logged");
415         return;
416     }
417 
418     lg2::info("QuiesceOnError set and callout present");
419 
420     auto blockPath =
421         std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
422     auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
423     this->blockingErrors.push_back(std::move(blockObj));
424 
425     // Register call back if log is resolved
426     using namespace sdbusplus::bus::match::rules;
427     auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
428     auto callback = std::make_unique<sdbusplus::bus::match_t>(
429         this->busLog,
430         propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
431         std::bind(std::mem_fn(&Manager::onEntryResolve), this,
432                   std::placeholders::_1));
433 
434     propChangedEntryCallback.insert(
435         std::make_pair(entryId, std::move(callback)));
436 
437     checkAndQuiesceHost();
438 }
439 
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)440 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
441 {
442     // Make the association <endpointpath>/<endpointtype> paths
443     std::vector<std::string> assocs;
444     for (const auto& [forwardType, reverseType, endpoint] :
445          entry.associations())
446     {
447         std::string e{endpoint};
448         e += '/' + reverseType;
449         assocs.push_back(e);
450     }
451 
452     for (auto& create : Extensions::getCreateFunctions())
453     {
454         try
455         {
456             create(entry.message(), entry.id(), entry.timestamp(),
457                    entry.severity(), entry.additionalData2(), assocs, ffdc);
458         }
459         catch (const std::exception& e)
460         {
461             lg2::error(
462                 "An extension's create function threw an exception: {ERROR}",
463                 "ERROR", e);
464         }
465     }
466 }
467 
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const468 void Manager::processMetadata(const std::string& /*errorName*/,
469                               const std::vector<std::string>& additionalData,
470                               AssociationList& objects) const
471 {
472     // additionalData is a list of "metadata=value"
473     constexpr auto separator = '=';
474     for (const auto& entryItem : additionalData)
475     {
476         auto found = entryItem.find(separator);
477         if (std::string::npos != found)
478         {
479             auto metadata = entryItem.substr(0, found);
480             auto iter = meta.find(metadata);
481             if (meta.end() != iter)
482             {
483                 (iter->second)(metadata, additionalData, objects);
484             }
485         }
486     }
487 }
488 
checkAndRemoveBlockingError(uint32_t entryId)489 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
490 {
491     // First look for blocking object and remove
492     auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
493                       [&](const std::unique_ptr<Block>& obj) {
494                           return obj->entryId == entryId;
495                       });
496     if (it != blockingErrors.end())
497     {
498         blockingErrors.erase(it);
499     }
500 
501     // Now remove the callback looking for the error to be resolved
502     auto resolveFind = propChangedEntryCallback.find(entryId);
503     if (resolveFind != propChangedEntryCallback.end())
504     {
505         propChangedEntryCallback.erase(resolveFind);
506     }
507 
508     return;
509 }
510 
eraseAll()511 size_t Manager::eraseAll()
512 {
513     std::vector<uint32_t> logIDWithHwIsolation;
514     for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
515     {
516         try
517         {
518             func(logIDWithHwIsolation);
519         }
520         catch (const std::exception& e)
521         {
522             lg2::error("An extension's LogIDWithHwIsolation function threw an "
523                        "exception: {ERROR}",
524                        "ERROR", e);
525         }
526     }
527     size_t entriesSize = entries.size();
528     auto iter = entries.begin();
529     if (logIDWithHwIsolation.empty())
530     {
531         while (iter != entries.end())
532         {
533             auto e = iter->first;
534             ++iter;
535             erase(e);
536         }
537         entryId = 0;
538     }
539     else
540     {
541         while (iter != entries.end())
542         {
543             auto e = iter->first;
544             ++iter;
545             try
546             {
547                 if (!std::ranges::contains(logIDWithHwIsolation, e))
548                 {
549                     erase(e);
550                 }
551                 else
552                 {
553                     entriesSize--;
554                 }
555             }
556             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
557                        Unavailable& e)
558             {
559                 entriesSize--;
560             }
561         }
562         if (!entries.empty())
563         {
564             entryId = std::ranges::max_element(entries, [](const auto& a,
565                                                            const auto& b) {
566                           return a.first < b.first;
567                       })->first;
568         }
569         else
570         {
571             entryId = 0;
572         }
573     }
574     return entriesSize;
575 }
576 
erase(uint32_t entryId)577 void Manager::erase(uint32_t entryId)
578 {
579     auto entryFound = entries.find(entryId);
580     if (entries.end() != entryFound)
581     {
582         for (auto& func : Extensions::getDeleteProhibitedFunctions())
583         {
584             try
585             {
586                 bool prohibited = false;
587                 func(entryId, prohibited);
588                 if (prohibited)
589                 {
590                     throw sdbusplus::xyz::openbmc_project::Common::Error::
591                         Unavailable();
592                 }
593             }
594             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
595                        Unavailable& e)
596             {
597                 throw;
598             }
599             catch (const std::exception& e)
600             {
601                 lg2::error("An extension's deleteProhibited function threw an "
602                            "exception: {ERROR}",
603                            "ERROR", e);
604             }
605         }
606 
607         // Delete the persistent representation of this error.
608         fs::path errorPath(paths::error());
609         errorPath /= std::to_string(entryId);
610         fs::remove(errorPath);
611 
612         auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
613             auto it = std::find(ids.begin(), ids.end(), id);
614             if (it != ids.end())
615             {
616                 ids.erase(it);
617             }
618         };
619         if (entryFound->second->severity() >= Entry::sevLowerLimit)
620         {
621             removeId(infoErrors, entryId);
622         }
623         else
624         {
625             removeId(realErrors, entryId);
626         }
627         entries.erase(entryFound);
628 
629         checkAndRemoveBlockingError(entryId);
630 
631         for (auto& remove : Extensions::getDeleteFunctions())
632         {
633             try
634             {
635                 remove(entryId);
636             }
637             catch (const std::exception& e)
638             {
639                 lg2::error("An extension's delete function threw an exception: "
640                            "{ERROR}",
641                            "ERROR", e);
642             }
643         }
644     }
645     else
646     {
647         lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
648     }
649 }
650 
restore()651 void Manager::restore()
652 {
653     auto sanity = [](const auto& id, const auto& restoredId) {
654         return id == restoredId;
655     };
656 
657     fs::path dir(paths::error());
658     if (!fs::exists(dir) || fs::is_empty(dir))
659     {
660         return;
661     }
662 
663     for (auto& file : fs::directory_iterator(dir))
664     {
665         auto id = file.path().filename().c_str();
666         auto idNum = std::stol(id);
667         auto e = std::make_unique<Entry>(
668             busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
669         if (deserialize(file.path(), *e))
670         {
671             // validate the restored error entry id
672             if (sanity(static_cast<uint32_t>(idNum), e->id()))
673             {
674                 e->path(file.path(), true);
675                 if (e->severity() >= Entry::sevLowerLimit)
676                 {
677                     infoErrors.push_back(idNum);
678                 }
679                 else
680                 {
681                     realErrors.push_back(idNum);
682                 }
683 
684                 entries.insert(std::make_pair(idNum, std::move(e)));
685             }
686             else
687             {
688                 lg2::error(
689                     "Failed in sanity check while restoring error entry. "
690                     "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
691                     "ID_NUM", idNum, "ENTRY_ID", e->id());
692             }
693         }
694     }
695 
696     if (!entries.empty())
697     {
698         entryId = entries.rbegin()->first;
699     }
700 }
701 
readFWVersion()702 std::string Manager::readFWVersion()
703 {
704     auto version = util::getOSReleaseValue("VERSION_ID");
705 
706     if (!version)
707     {
708         lg2::error("Unable to read BMC firmware version");
709     }
710 
711     return version.value_or("");
712 }
713 
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)714 auto Manager::create(const std::string& message, Entry::Level severity,
715                      const std::map<std::string, std::string>& additionalData,
716                      const FFDCEntries& ffdc) -> sdbusplus::message::object_path
717 {
718     return createEntry(message, severity, additionalData, ffdc);
719 }
720 
721 } // namespace internal
722 } // namespace logging
723 } // namespace phosphor
724