1 #include "config.h"
2 
3 #include "log_manager.hpp"
4 
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "paths.hpp"
10 #include "util.hpp"
11 
12 #include <systemd/sd-bus.h>
13 #include <systemd/sd-journal.h>
14 #include <unistd.h>
15 
16 #include <phosphor-logging/lg2.hpp>
17 #include <sdbusplus/vtable.hpp>
18 #include <xyz/openbmc_project/State/Host/server.hpp>
19 
20 #include <cassert>
21 #include <chrono>
22 #include <cstdio>
23 #include <cstring>
24 #include <fstream>
25 #include <functional>
26 #include <future>
27 #include <iostream>
28 #include <map>
29 #include <set>
30 #include <string>
31 #include <string_view>
32 #include <vector>
33 
34 using namespace std::chrono;
35 extern const std::map<
36     phosphor::logging::metadata::Metadata,
37     std::function<phosphor::logging::metadata::associations::Type>>
38     meta;
39 
40 namespace phosphor
41 {
42 namespace logging
43 {
44 namespace internal
45 {
getLevel(const std::string & errMsg)46 inline auto getLevel(const std::string& errMsg)
47 {
48     auto reqLevel = Entry::Level::Error; // Default to Error
49 
50     auto levelmap = g_errLevelMap.find(errMsg);
51     if (levelmap != g_errLevelMap.end())
52     {
53         reqLevel = static_cast<Entry::Level>(levelmap->second);
54     }
55 
56     return reqLevel;
57 }
58 
getRealErrSize()59 int Manager::getRealErrSize()
60 {
61     return realErrors.size();
62 }
63 
getInfoErrSize()64 int Manager::getInfoErrSize()
65 {
66     return infoErrors.size();
67 }
68 
commit(uint64_t transactionId,std::string errMsg)69 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
70 {
71     auto level = getLevel(errMsg);
72     _commit(transactionId, std::move(errMsg), level);
73     return entryId;
74 }
75 
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)76 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
77                                 uint32_t errLvl)
78 {
79     _commit(transactionId, std::move(errMsg),
80             static_cast<Entry::Level>(errLvl));
81     return entryId;
82 }
83 
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)84 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
85                       std::string&& errMsg, Entry::Level errLvl)
86 {
87     std::vector<std::string> additionalData{};
88 
89     // When running as a test-case, the system may have a LOT of journal
90     // data and we may not have permissions to do some of the journal sync
91     // operations.  Just skip over them.
92     if (!IS_UNIT_TEST)
93     {
94         static constexpr auto transactionIdVar =
95             std::string_view{"TRANSACTION_ID"};
96         // Length of 'TRANSACTION_ID' string.
97         static constexpr auto transactionIdVarSize = transactionIdVar.size();
98         // Length of 'TRANSACTION_ID=' string.
99         static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
100 
101         // Flush all the pending log messages into the journal
102         util::journalSync();
103 
104         sd_journal* j = nullptr;
105         int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
106         if (rc < 0)
107         {
108             lg2::error("Failed to open journal: {ERROR}", "ERROR",
109                        strerror(-rc));
110             return;
111         }
112 
113         std::string transactionIdStr = std::to_string(transactionId);
114         std::set<std::string> metalist;
115         auto metamap = g_errMetaMap.find(errMsg);
116         if (metamap != g_errMetaMap.end())
117         {
118             metalist.insert(metamap->second.begin(), metamap->second.end());
119         }
120 
121         // Add _PID field information in AdditionalData.
122         metalist.insert("_PID");
123 
124         // Read the journal from the end to get the most recent entry first.
125         // The result from the sd_journal_get_data() is of the form
126         // VARIABLE=value.
127         SD_JOURNAL_FOREACH_BACKWARDS(j)
128         {
129             const char* data = nullptr;
130             size_t length = 0;
131 
132             // Look for the transaction id metadata variable
133             rc = sd_journal_get_data(j, transactionIdVar.data(),
134                                      (const void**)&data, &length);
135             if (rc < 0)
136             {
137                 // This journal entry does not have the TRANSACTION_ID
138                 // metadata variable.
139                 continue;
140             }
141 
142             // journald does not guarantee that sd_journal_get_data() returns
143             // NULL terminated strings, so need to specify the size to use to
144             // compare, use the returned length instead of anything that relies
145             // on NULL terminators like strlen(). The data variable is in the
146             // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
147             // characters plus the (=) sign to do the comparison. 'data +
148             // transactionIdVarOffset' will be in the form of '1234'. 'length -
149             // transactionIdVarOffset' will be the length of '1234'.
150             if ((length <= (transactionIdVarOffset)) ||
151                 (transactionIdStr.compare(
152                      0, transactionIdStr.size(), data + transactionIdVarOffset,
153                      length - transactionIdVarOffset) != 0))
154             {
155                 // The value of the TRANSACTION_ID metadata is not the requested
156                 // transaction id number.
157                 continue;
158             }
159 
160             // Search for all metadata variables in the current journal entry.
161             for (auto i = metalist.cbegin(); i != metalist.cend();)
162             {
163                 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
164                                          &length);
165                 if (rc < 0)
166                 {
167                     // Metadata variable not found, check next metadata
168                     // variable.
169                     i++;
170                     continue;
171                 }
172 
173                 // Metadata variable found, save it and remove it from the set.
174                 additionalData.emplace_back(data, length);
175                 i = metalist.erase(i);
176             }
177             if (metalist.empty())
178             {
179                 // All metadata variables found, break out of journal loop.
180                 break;
181             }
182         }
183         if (!metalist.empty())
184         {
185             // Not all the metadata variables were found in the journal.
186             for (auto& metaVarStr : metalist)
187             {
188                 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
189                           metaVarStr);
190             }
191         }
192 
193         sd_journal_close(j);
194     }
195     createEntry(errMsg, errLvl, additionalData);
196 }
197 
createEntry(std::string errMsg,Entry::Level errLvl,std::vector<std::string> additionalData,const FFDCEntries & ffdc)198 auto Manager::createEntry(
199     std::string errMsg, Entry::Level errLvl,
200     std::vector<std::string> additionalData,
201     const FFDCEntries& ffdc) -> sdbusplus::message::object_path
202 {
203     if (!Extensions::disableDefaultLogCaps())
204     {
205         if (errLvl < Entry::sevLowerLimit)
206         {
207             if (realErrors.size() >= ERROR_CAP)
208             {
209                 erase(realErrors.front());
210             }
211         }
212         else
213         {
214             if (infoErrors.size() >= ERROR_INFO_CAP)
215             {
216                 erase(infoErrors.front());
217             }
218         }
219     }
220 
221     entryId++;
222     if (errLvl >= Entry::sevLowerLimit)
223     {
224         infoErrors.push_back(entryId);
225     }
226     else
227     {
228         realErrors.push_back(entryId);
229     }
230     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
231                   std::chrono::system_clock::now().time_since_epoch())
232                   .count();
233     auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
234 
235     AssociationList objects{};
236     processMetadata(errMsg, additionalData, objects);
237 
238     auto e = std::make_unique<Entry>(
239         busLog, objPath, entryId,
240         ms, // Milliseconds since 1970
241         errLvl, std::move(errMsg), std::move(additionalData),
242         std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
243 
244     serialize(*e);
245 
246     if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
247         isCalloutPresent(*e))
248     {
249         quiesceOnError(entryId);
250     }
251 
252     // Add entry before calling the extensions so that they have access to it
253     entries.insert(std::make_pair(entryId, std::move(e)));
254 
255     doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
256 
257     // Note: No need to close the file descriptors in the FFDC.
258 
259     return objPath;
260 }
261 
isQuiesceOnErrorEnabled()262 bool Manager::isQuiesceOnErrorEnabled()
263 {
264     // When running under tests, the Logging.Settings service will not be
265     // present.  Assume false.
266     if (IS_UNIT_TEST)
267     {
268         return false;
269     }
270 
271     std::variant<bool> property;
272 
273     auto method = this->busLog.new_method_call(
274         "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
275         "org.freedesktop.DBus.Properties", "Get");
276 
277     method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
278 
279     try
280     {
281         auto reply = this->busLog.call(method);
282         reply.read(property);
283     }
284     catch (const sdbusplus::exception_t& e)
285     {
286         lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
287                    e);
288         return false;
289     }
290 
291     return std::get<bool>(property);
292 }
293 
isCalloutPresent(const Entry & entry)294 bool Manager::isCalloutPresent(const Entry& entry)
295 {
296     for (const auto& c : entry.additionalData())
297     {
298         if (c.find("CALLOUT_") != std::string::npos)
299         {
300             return true;
301         }
302     }
303 
304     return false;
305 }
306 
findAndRemoveResolvedBlocks()307 void Manager::findAndRemoveResolvedBlocks()
308 {
309     for (auto& entry : entries)
310     {
311         if (entry.second->resolved())
312         {
313             checkAndRemoveBlockingError(entry.first);
314         }
315     }
316 }
317 
onEntryResolve(sdbusplus::message_t & msg)318 void Manager::onEntryResolve(sdbusplus::message_t& msg)
319 {
320     using Interface = std::string;
321     using Property = std::string;
322     using Value = std::string;
323     using Properties = std::map<Property, std::variant<Value>>;
324 
325     Interface interface;
326     Properties properties;
327 
328     msg.read(interface, properties);
329 
330     for (const auto& p : properties)
331     {
332         if (p.first == "Resolved")
333         {
334             findAndRemoveResolvedBlocks();
335             return;
336         }
337     }
338 }
339 
checkAndQuiesceHost()340 void Manager::checkAndQuiesceHost()
341 {
342     using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
343 
344     // First check host state
345     std::variant<Host::HostState> property;
346 
347     auto method = this->busLog.new_method_call(
348         "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
349         "org.freedesktop.DBus.Properties", "Get");
350 
351     method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
352 
353     try
354     {
355         auto reply = this->busLog.call(method);
356         reply.read(property);
357     }
358     catch (const sdbusplus::exception_t& e)
359     {
360         // Quiescing the host is a "best effort" type function. If unable to
361         // read the host state or it comes back empty, just return.
362         // The boot block object will still be created and the associations to
363         // find the log will be present. Don't want a dependency with
364         // phosphor-state-manager service
365         lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
366                   e);
367         return;
368     }
369 
370     auto hostState = std::get<Host::HostState>(property);
371     if (hostState != Host::HostState::Running)
372     {
373         return;
374     }
375 
376     auto quiesce = this->busLog.new_method_call(
377         "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
378         "org.freedesktop.systemd1.Manager", "StartUnit");
379 
380     quiesce.append("obmc-host-graceful-quiesce@0.target");
381     quiesce.append("replace");
382 
383     this->busLog.call_noreply(quiesce);
384 }
385 
quiesceOnError(const uint32_t entryId)386 void Manager::quiesceOnError(const uint32_t entryId)
387 {
388     // Verify we don't already have this entry blocking
389     auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
390                       [&](const std::unique_ptr<Block>& obj) {
391                           return obj->entryId == entryId;
392                       });
393     if (it != this->blockingErrors.end())
394     {
395         // Already recorded so just return
396         lg2::debug(
397             "QuiesceOnError set and callout present but entry already logged");
398         return;
399     }
400 
401     lg2::info("QuiesceOnError set and callout present");
402 
403     auto blockPath =
404         std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
405     auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
406     this->blockingErrors.push_back(std::move(blockObj));
407 
408     // Register call back if log is resolved
409     using namespace sdbusplus::bus::match::rules;
410     auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
411     auto callback = std::make_unique<sdbusplus::bus::match_t>(
412         this->busLog,
413         propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
414         std::bind(std::mem_fn(&Manager::onEntryResolve), this,
415                   std::placeholders::_1));
416 
417     propChangedEntryCallback.insert(
418         std::make_pair(entryId, std::move(callback)));
419 
420     checkAndQuiesceHost();
421 }
422 
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)423 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
424 {
425     // Make the association <endpointpath>/<endpointtype> paths
426     std::vector<std::string> assocs;
427     for (const auto& [forwardType, reverseType, endpoint] :
428          entry.associations())
429     {
430         std::string e{endpoint};
431         e += '/' + reverseType;
432         assocs.push_back(e);
433     }
434 
435     for (auto& create : Extensions::getCreateFunctions())
436     {
437         try
438         {
439             create(entry.message(), entry.id(), entry.timestamp(),
440                    entry.severity(), entry.additionalData(), assocs, ffdc);
441         }
442         catch (const std::exception& e)
443         {
444             lg2::error(
445                 "An extension's create function threw an exception: {ERROR}",
446                 "ERROR", e);
447         }
448     }
449 }
450 
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const451 void Manager::processMetadata(const std::string& /*errorName*/,
452                               const std::vector<std::string>& additionalData,
453                               AssociationList& objects) const
454 {
455     // additionalData is a list of "metadata=value"
456     constexpr auto separator = '=';
457     for (const auto& entryItem : additionalData)
458     {
459         auto found = entryItem.find(separator);
460         if (std::string::npos != found)
461         {
462             auto metadata = entryItem.substr(0, found);
463             auto iter = meta.find(metadata);
464             if (meta.end() != iter)
465             {
466                 (iter->second)(metadata, additionalData, objects);
467             }
468         }
469     }
470 }
471 
checkAndRemoveBlockingError(uint32_t entryId)472 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
473 {
474     // First look for blocking object and remove
475     auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
476                       [&](const std::unique_ptr<Block>& obj) {
477                           return obj->entryId == entryId;
478                       });
479     if (it != blockingErrors.end())
480     {
481         blockingErrors.erase(it);
482     }
483 
484     // Now remove the callback looking for the error to be resolved
485     auto resolveFind = propChangedEntryCallback.find(entryId);
486     if (resolveFind != propChangedEntryCallback.end())
487     {
488         propChangedEntryCallback.erase(resolveFind);
489     }
490 
491     return;
492 }
493 
eraseAll()494 size_t Manager::eraseAll()
495 {
496     std::vector<uint32_t> logIDWithHwIsolation;
497     for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
498     {
499         try
500         {
501             func(logIDWithHwIsolation);
502         }
503         catch (const std::exception& e)
504         {
505             lg2::error("An extension's LogIDWithHwIsolation function threw an "
506                        "exception: {ERROR}",
507                        "ERROR", e);
508         }
509     }
510     size_t entriesSize = entries.size();
511     auto iter = entries.begin();
512     if (logIDWithHwIsolation.empty())
513     {
514         while (iter != entries.end())
515         {
516             auto e = iter->first;
517             ++iter;
518             erase(e);
519         }
520         entryId = 0;
521     }
522     else
523     {
524         while (iter != entries.end())
525         {
526             auto e = iter->first;
527             ++iter;
528             try
529             {
530                 if (!std::ranges::contains(logIDWithHwIsolation, e))
531                 {
532                     erase(e);
533                 }
534                 else
535                 {
536                     entriesSize--;
537                 }
538             }
539             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
540                        Unavailable& e)
541             {
542                 entriesSize--;
543             }
544         }
545         if (!entries.empty())
546         {
547             entryId = std::ranges::max_element(entries, [](const auto& a,
548                                                            const auto& b) {
549                           return a.first < b.first;
550                       })->first;
551         }
552         else
553         {
554             entryId = 0;
555         }
556     }
557     return entriesSize;
558 }
559 
erase(uint32_t entryId)560 void Manager::erase(uint32_t entryId)
561 {
562     auto entryFound = entries.find(entryId);
563     if (entries.end() != entryFound)
564     {
565         for (auto& func : Extensions::getDeleteProhibitedFunctions())
566         {
567             try
568             {
569                 bool prohibited = false;
570                 func(entryId, prohibited);
571                 if (prohibited)
572                 {
573                     throw sdbusplus::xyz::openbmc_project::Common::Error::
574                         Unavailable();
575                 }
576             }
577             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
578                        Unavailable& e)
579             {
580                 throw;
581             }
582             catch (const std::exception& e)
583             {
584                 lg2::error("An extension's deleteProhibited function threw an "
585                            "exception: {ERROR}",
586                            "ERROR", e);
587             }
588         }
589 
590         // Delete the persistent representation of this error.
591         fs::path errorPath(paths::error());
592         errorPath /= std::to_string(entryId);
593         fs::remove(errorPath);
594 
595         auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
596             auto it = std::find(ids.begin(), ids.end(), id);
597             if (it != ids.end())
598             {
599                 ids.erase(it);
600             }
601         };
602         if (entryFound->second->severity() >= Entry::sevLowerLimit)
603         {
604             removeId(infoErrors, entryId);
605         }
606         else
607         {
608             removeId(realErrors, entryId);
609         }
610         entries.erase(entryFound);
611 
612         checkAndRemoveBlockingError(entryId);
613 
614         for (auto& remove : Extensions::getDeleteFunctions())
615         {
616             try
617             {
618                 remove(entryId);
619             }
620             catch (const std::exception& e)
621             {
622                 lg2::error("An extension's delete function threw an exception: "
623                            "{ERROR}",
624                            "ERROR", e);
625             }
626         }
627     }
628     else
629     {
630         lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
631     }
632 }
633 
restore()634 void Manager::restore()
635 {
636     auto sanity = [](const auto& id, const auto& restoredId) {
637         return id == restoredId;
638     };
639 
640     fs::path dir(paths::error());
641     if (!fs::exists(dir) || fs::is_empty(dir))
642     {
643         return;
644     }
645 
646     for (auto& file : fs::directory_iterator(dir))
647     {
648         auto id = file.path().filename().c_str();
649         auto idNum = std::stol(id);
650         auto e = std::make_unique<Entry>(
651             busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
652         if (deserialize(file.path(), *e))
653         {
654             // validate the restored error entry id
655             if (sanity(static_cast<uint32_t>(idNum), e->id()))
656             {
657                 e->path(file.path(), true);
658                 if (e->severity() >= Entry::sevLowerLimit)
659                 {
660                     infoErrors.push_back(idNum);
661                 }
662                 else
663                 {
664                     realErrors.push_back(idNum);
665                 }
666 
667                 entries.insert(std::make_pair(idNum, std::move(e)));
668             }
669             else
670             {
671                 lg2::error(
672                     "Failed in sanity check while restoring error entry. "
673                     "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
674                     "ID_NUM", idNum, "ENTRY_ID", e->id());
675             }
676         }
677     }
678 
679     if (!entries.empty())
680     {
681         entryId = entries.rbegin()->first;
682     }
683 }
684 
readFWVersion()685 std::string Manager::readFWVersion()
686 {
687     auto version = util::getOSReleaseValue("VERSION_ID");
688 
689     if (!version)
690     {
691         lg2::error("Unable to read BMC firmware version");
692     }
693 
694     return version.value_or("");
695 }
696 
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)697 auto Manager::create(const std::string& message, Entry::Level severity,
698                      const std::map<std::string, std::string>& additionalData,
699                      const FFDCEntries& ffdc) -> sdbusplus::message::object_path
700 {
701     // Convert the map into a vector of "key=value" strings
702     std::vector<std::string> ad;
703     metadata::associations::combine(additionalData, ad);
704 
705     return createEntry(message, severity, ad, ffdc);
706 }
707 
708 } // namespace internal
709 } // namespace logging
710 } // namespace phosphor
711