1 #include "config.h"
2 
3 #include "log_manager.hpp"
4 
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "lib/lg2_commit.hpp"
10 #include "paths.hpp"
11 #include "util.hpp"
12 
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16 
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/vtable.hpp>
19 #include <xyz/openbmc_project/State/Host/server.hpp>
20 
21 #include <cassert>
22 #include <chrono>
23 #include <cstdio>
24 #include <cstring>
25 #include <fstream>
26 #include <functional>
27 #include <future>
28 #include <iostream>
29 #include <map>
30 #include <ranges>
31 #include <set>
32 #include <string>
33 #include <string_view>
34 #include <vector>
35 
36 using namespace std::chrono;
37 extern const std::map<
38     phosphor::logging::metadata::Metadata,
39     std::function<phosphor::logging::metadata::associations::Type>>
40     meta;
41 
42 namespace phosphor
43 {
44 namespace logging
45 {
46 namespace internal
47 {
getLevel(const std::string & errMsg)48 inline auto getLevel(const std::string& errMsg)
49 {
50     auto reqLevel = Entry::Level::Error; // Default to Error
51 
52     auto levelmap = g_errLevelMap.find(errMsg);
53     if (levelmap != g_errLevelMap.end())
54     {
55         reqLevel = static_cast<Entry::Level>(levelmap->second);
56     }
57 
58     return reqLevel;
59 }
60 
getRealErrSize()61 int Manager::getRealErrSize()
62 {
63     return realErrors.size();
64 }
65 
getInfoErrSize()66 int Manager::getInfoErrSize()
67 {
68     return infoErrors.size();
69 }
70 
commit(uint64_t transactionId,std::string errMsg)71 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
72 {
73     auto level = getLevel(errMsg);
74     _commit(transactionId, std::move(errMsg), level);
75     return entryId;
76 }
77 
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)78 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
79                                 uint32_t errLvl)
80 {
81     _commit(transactionId, std::move(errMsg),
82             static_cast<Entry::Level>(errLvl));
83     return entryId;
84 }
85 
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)86 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
87                       std::string&& errMsg, Entry::Level errLvl)
88 {
89     std::map<std::string, std::string> additionalData{};
90 
91     // When running as a test-case, the system may have a LOT of journal
92     // data and we may not have permissions to do some of the journal sync
93     // operations.  Just skip over them.
94     if (!IS_UNIT_TEST)
95     {
96         static constexpr auto transactionIdVar =
97             std::string_view{"TRANSACTION_ID"};
98         // Length of 'TRANSACTION_ID' string.
99         static constexpr auto transactionIdVarSize = transactionIdVar.size();
100         // Length of 'TRANSACTION_ID=' string.
101         static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
102 
103         // Flush all the pending log messages into the journal
104         util::journalSync();
105 
106         sd_journal* j = nullptr;
107         int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
108         if (rc < 0)
109         {
110             lg2::error("Failed to open journal: {ERROR}", "ERROR",
111                        strerror(-rc));
112             return;
113         }
114 
115         std::string transactionIdStr = std::to_string(transactionId);
116         std::set<std::string> metalist;
117         auto metamap = g_errMetaMap.find(errMsg);
118         if (metamap != g_errMetaMap.end())
119         {
120             metalist.insert(metamap->second.begin(), metamap->second.end());
121         }
122 
123         // Add _PID field information in AdditionalData.
124         metalist.insert("_PID");
125 
126         // Read the journal from the end to get the most recent entry first.
127         // The result from the sd_journal_get_data() is of the form
128         // VARIABLE=value.
129         SD_JOURNAL_FOREACH_BACKWARDS(j)
130         {
131             const char* data = nullptr;
132             size_t length = 0;
133 
134             // Look for the transaction id metadata variable
135             rc = sd_journal_get_data(j, transactionIdVar.data(),
136                                      (const void**)&data, &length);
137             if (rc < 0)
138             {
139                 // This journal entry does not have the TRANSACTION_ID
140                 // metadata variable.
141                 continue;
142             }
143 
144             // journald does not guarantee that sd_journal_get_data() returns
145             // NULL terminated strings, so need to specify the size to use to
146             // compare, use the returned length instead of anything that relies
147             // on NULL terminators like strlen(). The data variable is in the
148             // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
149             // characters plus the (=) sign to do the comparison. 'data +
150             // transactionIdVarOffset' will be in the form of '1234'. 'length -
151             // transactionIdVarOffset' will be the length of '1234'.
152             if ((length <= (transactionIdVarOffset)) ||
153                 (transactionIdStr.compare(
154                      0, transactionIdStr.size(), data + transactionIdVarOffset,
155                      length - transactionIdVarOffset) != 0))
156             {
157                 // The value of the TRANSACTION_ID metadata is not the requested
158                 // transaction id number.
159                 continue;
160             }
161 
162             // Search for all metadata variables in the current journal entry.
163             for (auto i = metalist.cbegin(); i != metalist.cend();)
164             {
165                 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
166                                          &length);
167                 if (rc < 0)
168                 {
169                     // Metadata variable not found, check next metadata
170                     // variable.
171                     i++;
172                     continue;
173                 }
174 
175                 // Metadata variable found, save it and remove it from the set.
176                 std::string metadata(data, length);
177                 if (auto pos = metadata.find('='); pos != std::string::npos)
178                 {
179                     auto key = metadata.substr(0, pos);
180                     auto value = metadata.substr(pos + 1);
181                     additionalData.emplace(std::move(key), std::move(value));
182                 }
183                 i = metalist.erase(i);
184             }
185             if (metalist.empty())
186             {
187                 // All metadata variables found, break out of journal loop.
188                 break;
189             }
190         }
191         if (!metalist.empty())
192         {
193             // Not all the metadata variables were found in the journal.
194             for (auto& metaVarStr : metalist)
195             {
196                 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
197                           metaVarStr);
198             }
199         }
200 
201         sd_journal_close(j);
202     }
203     createEntry(errMsg, errLvl, additionalData);
204 }
205 
createEntry(std::string errMsg,Entry::Level errLvl,std::map<std::string,std::string> additionalData,const FFDCEntries & ffdc)206 auto Manager::createEntry(
207     std::string errMsg, Entry::Level errLvl,
208     std::map<std::string, std::string> additionalData,
209     const FFDCEntries& ffdc) -> sdbusplus::message::object_path
210 {
211     if (!Extensions::disableDefaultLogCaps())
212     {
213         if (errLvl < Entry::sevLowerLimit)
214         {
215             if (realErrors.size() >= ERROR_CAP)
216             {
217                 erase(realErrors.front());
218             }
219         }
220         else
221         {
222             if (infoErrors.size() >= ERROR_INFO_CAP)
223             {
224                 erase(infoErrors.front());
225             }
226         }
227     }
228 
229     entryId++;
230     if (errLvl >= Entry::sevLowerLimit)
231     {
232         infoErrors.push_back(entryId);
233     }
234     else
235     {
236         realErrors.push_back(entryId);
237     }
238     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
239                   std::chrono::system_clock::now().time_since_epoch())
240                   .count();
241     auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
242 
243     AssociationList objects{};
244     auto additionalDataVec = util::additional_data::combine(additionalData);
245     processMetadata(errMsg, additionalDataVec, objects);
246 
247     auto e = std::make_unique<Entry>(
248         busLog, objPath, entryId,
249         ms, // Milliseconds since 1970
250         errLvl, std::move(errMsg), std::move(additionalData),
251         std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
252 
253     serialize(*e);
254 
255     if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
256         isCalloutPresent(*e))
257     {
258         quiesceOnError(entryId);
259     }
260 
261     // Add entry before calling the extensions so that they have access to it
262     entries.insert(std::make_pair(entryId, std::move(e)));
263 
264     doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
265 
266     // Note: No need to close the file descriptors in the FFDC.
267 
268     return objPath;
269 }
270 
createFromEvent(sdbusplus::exception::generated_event_base && event)271 auto Manager::createFromEvent(sdbusplus::exception::generated_event_base&&
272                                   event) -> sdbusplus::message::object_path
273 {
274     auto [msg, level, data] = lg2::details::extractEvent(std::move(event));
275     return this->createEntry(msg, level, std::move(data));
276 }
277 
isQuiesceOnErrorEnabled()278 bool Manager::isQuiesceOnErrorEnabled()
279 {
280     // When running under tests, the Logging.Settings service will not be
281     // present.  Assume false.
282     if (IS_UNIT_TEST)
283     {
284         return false;
285     }
286 
287     std::variant<bool> property;
288 
289     auto method = this->busLog.new_method_call(
290         "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
291         "org.freedesktop.DBus.Properties", "Get");
292 
293     method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
294 
295     try
296     {
297         auto reply = this->busLog.call(method);
298         reply.read(property);
299     }
300     catch (const sdbusplus::exception_t& e)
301     {
302         lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
303                    e);
304         return false;
305     }
306 
307     return std::get<bool>(property);
308 }
309 
isCalloutPresent(const Entry & entry)310 bool Manager::isCalloutPresent(const Entry& entry)
311 {
312     for (const auto& c : std::views::keys(entry.additionalData()))
313     {
314         if (c.find("CALLOUT_") != std::string::npos)
315         {
316             return true;
317         }
318     }
319 
320     return false;
321 }
322 
findAndRemoveResolvedBlocks()323 void Manager::findAndRemoveResolvedBlocks()
324 {
325     for (auto& entry : entries)
326     {
327         if (entry.second->resolved())
328         {
329             checkAndRemoveBlockingError(entry.first);
330         }
331     }
332 }
333 
onEntryResolve(sdbusplus::message_t & msg)334 void Manager::onEntryResolve(sdbusplus::message_t& msg)
335 {
336     using Interface = std::string;
337     using Property = std::string;
338     using Value = std::string;
339     using Properties = std::map<Property, std::variant<Value>>;
340 
341     Interface interface;
342     Properties properties;
343 
344     msg.read(interface, properties);
345 
346     for (const auto& p : properties)
347     {
348         if (p.first == "Resolved")
349         {
350             findAndRemoveResolvedBlocks();
351             return;
352         }
353     }
354 }
355 
checkAndQuiesceHost()356 void Manager::checkAndQuiesceHost()
357 {
358     using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
359 
360     // First check host state
361     std::variant<Host::HostState> property;
362 
363     auto method = this->busLog.new_method_call(
364         "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
365         "org.freedesktop.DBus.Properties", "Get");
366 
367     method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
368 
369     try
370     {
371         auto reply = this->busLog.call(method);
372         reply.read(property);
373     }
374     catch (const sdbusplus::exception_t& e)
375     {
376         // Quiescing the host is a "best effort" type function. If unable to
377         // read the host state or it comes back empty, just return.
378         // The boot block object will still be created and the associations to
379         // find the log will be present. Don't want a dependency with
380         // phosphor-state-manager service
381         lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
382                   e);
383         return;
384     }
385 
386     auto hostState = std::get<Host::HostState>(property);
387     if (hostState != Host::HostState::Running)
388     {
389         return;
390     }
391 
392     auto quiesce = this->busLog.new_method_call(
393         "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
394         "org.freedesktop.systemd1.Manager", "StartUnit");
395 
396     quiesce.append("obmc-host-graceful-quiesce@0.target");
397     quiesce.append("replace");
398 
399     this->busLog.call_noreply(quiesce);
400 }
401 
quiesceOnError(const uint32_t entryId)402 void Manager::quiesceOnError(const uint32_t entryId)
403 {
404     // Verify we don't already have this entry blocking
405     auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
406                       [&](const std::unique_ptr<Block>& obj) {
407                           return obj->entryId == entryId;
408                       });
409     if (it != this->blockingErrors.end())
410     {
411         // Already recorded so just return
412         lg2::debug(
413             "QuiesceOnError set and callout present but entry already logged");
414         return;
415     }
416 
417     lg2::info("QuiesceOnError set and callout present");
418 
419     auto blockPath =
420         std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
421     auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
422     this->blockingErrors.push_back(std::move(blockObj));
423 
424     // Register call back if log is resolved
425     using namespace sdbusplus::bus::match::rules;
426     auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
427     auto callback = std::make_unique<sdbusplus::bus::match_t>(
428         this->busLog,
429         propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
430         std::bind(std::mem_fn(&Manager::onEntryResolve), this,
431                   std::placeholders::_1));
432 
433     propChangedEntryCallback.insert(
434         std::make_pair(entryId, std::move(callback)));
435 
436     checkAndQuiesceHost();
437 }
438 
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)439 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
440 {
441     // Make the association <endpointpath>/<endpointtype> paths
442     std::vector<std::string> assocs;
443     for (const auto& [forwardType, reverseType, endpoint] :
444          entry.associations())
445     {
446         std::string e{endpoint};
447         e += '/' + reverseType;
448         assocs.push_back(e);
449     }
450 
451     for (auto& create : Extensions::getCreateFunctions())
452     {
453         try
454         {
455             create(entry.message(), entry.id(), entry.timestamp(),
456                    entry.severity(), entry.additionalData2(), assocs, ffdc);
457         }
458         catch (const std::exception& e)
459         {
460             lg2::error(
461                 "An extension's create function threw an exception: {ERROR}",
462                 "ERROR", e);
463         }
464     }
465 }
466 
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const467 void Manager::processMetadata(const std::string& /*errorName*/,
468                               const std::vector<std::string>& additionalData,
469                               AssociationList& objects) const
470 {
471     // additionalData is a list of "metadata=value"
472     constexpr auto separator = '=';
473     for (const auto& entryItem : additionalData)
474     {
475         auto found = entryItem.find(separator);
476         if (std::string::npos != found)
477         {
478             auto metadata = entryItem.substr(0, found);
479             auto iter = meta.find(metadata);
480             if (meta.end() != iter)
481             {
482                 (iter->second)(metadata, additionalData, objects);
483             }
484         }
485     }
486 }
487 
checkAndRemoveBlockingError(uint32_t entryId)488 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
489 {
490     // First look for blocking object and remove
491     auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
492                       [&](const std::unique_ptr<Block>& obj) {
493                           return obj->entryId == entryId;
494                       });
495     if (it != blockingErrors.end())
496     {
497         blockingErrors.erase(it);
498     }
499 
500     // Now remove the callback looking for the error to be resolved
501     auto resolveFind = propChangedEntryCallback.find(entryId);
502     if (resolveFind != propChangedEntryCallback.end())
503     {
504         propChangedEntryCallback.erase(resolveFind);
505     }
506 
507     return;
508 }
509 
eraseAll()510 size_t Manager::eraseAll()
511 {
512     std::vector<uint32_t> logIDWithHwIsolation;
513     for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
514     {
515         try
516         {
517             func(logIDWithHwIsolation);
518         }
519         catch (const std::exception& e)
520         {
521             lg2::error("An extension's LogIDWithHwIsolation function threw an "
522                        "exception: {ERROR}",
523                        "ERROR", e);
524         }
525     }
526     size_t entriesSize = entries.size();
527     auto iter = entries.begin();
528     if (logIDWithHwIsolation.empty())
529     {
530         while (iter != entries.end())
531         {
532             auto e = iter->first;
533             ++iter;
534             erase(e);
535         }
536         entryId = 0;
537     }
538     else
539     {
540         while (iter != entries.end())
541         {
542             auto e = iter->first;
543             ++iter;
544             try
545             {
546                 if (!std::ranges::contains(logIDWithHwIsolation, e))
547                 {
548                     erase(e);
549                 }
550                 else
551                 {
552                     entriesSize--;
553                 }
554             }
555             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
556                        Unavailable& e)
557             {
558                 entriesSize--;
559             }
560         }
561         if (!entries.empty())
562         {
563             entryId = std::ranges::max_element(entries, [](const auto& a,
564                                                            const auto& b) {
565                           return a.first < b.first;
566                       })->first;
567         }
568         else
569         {
570             entryId = 0;
571         }
572     }
573     return entriesSize;
574 }
575 
erase(uint32_t entryId)576 void Manager::erase(uint32_t entryId)
577 {
578     auto entryFound = entries.find(entryId);
579     if (entries.end() != entryFound)
580     {
581         for (auto& func : Extensions::getDeleteProhibitedFunctions())
582         {
583             try
584             {
585                 bool prohibited = false;
586                 func(entryId, prohibited);
587                 if (prohibited)
588                 {
589                     throw sdbusplus::xyz::openbmc_project::Common::Error::
590                         Unavailable();
591                 }
592             }
593             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
594                        Unavailable& e)
595             {
596                 throw;
597             }
598             catch (const std::exception& e)
599             {
600                 lg2::error("An extension's deleteProhibited function threw an "
601                            "exception: {ERROR}",
602                            "ERROR", e);
603             }
604         }
605 
606         // Delete the persistent representation of this error.
607         fs::path errorPath(paths::error());
608         errorPath /= std::to_string(entryId);
609         fs::remove(errorPath);
610 
611         auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
612             auto it = std::find(ids.begin(), ids.end(), id);
613             if (it != ids.end())
614             {
615                 ids.erase(it);
616             }
617         };
618         if (entryFound->second->severity() >= Entry::sevLowerLimit)
619         {
620             removeId(infoErrors, entryId);
621         }
622         else
623         {
624             removeId(realErrors, entryId);
625         }
626         entries.erase(entryFound);
627 
628         checkAndRemoveBlockingError(entryId);
629 
630         for (auto& remove : Extensions::getDeleteFunctions())
631         {
632             try
633             {
634                 remove(entryId);
635             }
636             catch (const std::exception& e)
637             {
638                 lg2::error("An extension's delete function threw an exception: "
639                            "{ERROR}",
640                            "ERROR", e);
641             }
642         }
643     }
644     else
645     {
646         lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
647     }
648 }
649 
restore()650 void Manager::restore()
651 {
652     auto sanity = [](const auto& id, const auto& restoredId) {
653         return id == restoredId;
654     };
655 
656     fs::path dir(paths::error());
657     if (!fs::exists(dir) || fs::is_empty(dir))
658     {
659         return;
660     }
661 
662     for (auto& file : fs::directory_iterator(dir))
663     {
664         auto id = file.path().filename().c_str();
665         auto idNum = std::stol(id);
666         auto e = std::make_unique<Entry>(
667             busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
668         if (deserialize(file.path(), *e))
669         {
670             // validate the restored error entry id
671             if (sanity(static_cast<uint32_t>(idNum), e->id()))
672             {
673                 e->path(file.path(), true);
674                 if (e->severity() >= Entry::sevLowerLimit)
675                 {
676                     infoErrors.push_back(idNum);
677                 }
678                 else
679                 {
680                     realErrors.push_back(idNum);
681                 }
682 
683                 entries.insert(std::make_pair(idNum, std::move(e)));
684             }
685             else
686             {
687                 lg2::error(
688                     "Failed in sanity check while restoring error entry. "
689                     "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
690                     "ID_NUM", idNum, "ENTRY_ID", e->id());
691             }
692         }
693     }
694 
695     if (!entries.empty())
696     {
697         entryId = entries.rbegin()->first;
698     }
699 }
700 
readFWVersion()701 std::string Manager::readFWVersion()
702 {
703     auto version = util::getOSReleaseValue("VERSION_ID");
704 
705     if (!version)
706     {
707         lg2::error("Unable to read BMC firmware version");
708     }
709 
710     return version.value_or("");
711 }
712 
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)713 auto Manager::create(const std::string& message, Entry::Level severity,
714                      const std::map<std::string, std::string>& additionalData,
715                      const FFDCEntries& ffdc) -> sdbusplus::message::object_path
716 {
717     return createEntry(message, severity, additionalData, ffdc);
718 }
719 
720 } // namespace internal
721 } // namespace logging
722 } // namespace phosphor
723