xref: /openbmc/phosphor-logging/log_manager.cpp (revision 9ca4d137ad0f0dd03074652c55186a035fba27d1)
1 #include "config.h"
2 
3 #include "log_manager.hpp"
4 
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "lib/lg2_commit.hpp"
10 #include "paths.hpp"
11 #include "util.hpp"
12 
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16 
17 #include <phosphor-logging/lg2.hpp>
18 #include <sdbusplus/vtable.hpp>
19 #include <xyz/openbmc_project/State/Host/server.hpp>
20 
21 #include <cassert>
22 #include <chrono>
23 #include <cstdio>
24 #include <cstring>
25 #include <fstream>
26 #include <functional>
27 #include <future>
28 #include <iostream>
29 #include <map>
30 #include <set>
31 #include <string>
32 #include <string_view>
33 #include <vector>
34 
35 using namespace std::chrono;
36 extern const std::map<
37     phosphor::logging::metadata::Metadata,
38     std::function<phosphor::logging::metadata::associations::Type>>
39     meta;
40 
41 namespace phosphor
42 {
43 namespace logging
44 {
45 namespace internal
46 {
47 inline auto getLevel(const std::string& errMsg)
48 {
49     auto reqLevel = Entry::Level::Error; // Default to Error
50 
51     auto levelmap = g_errLevelMap.find(errMsg);
52     if (levelmap != g_errLevelMap.end())
53     {
54         reqLevel = static_cast<Entry::Level>(levelmap->second);
55     }
56 
57     return reqLevel;
58 }
59 
60 int Manager::getRealErrSize()
61 {
62     return realErrors.size();
63 }
64 
65 int Manager::getInfoErrSize()
66 {
67     return infoErrors.size();
68 }
69 
70 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
71 {
72     auto level = getLevel(errMsg);
73     _commit(transactionId, std::move(errMsg), level);
74     return entryId;
75 }
76 
77 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
78                                 uint32_t errLvl)
79 {
80     _commit(transactionId, std::move(errMsg),
81             static_cast<Entry::Level>(errLvl));
82     return entryId;
83 }
84 
85 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
86                       std::string&& errMsg, Entry::Level errLvl)
87 {
88     std::vector<std::string> additionalData{};
89 
90     // When running as a test-case, the system may have a LOT of journal
91     // data and we may not have permissions to do some of the journal sync
92     // operations.  Just skip over them.
93     if (!IS_UNIT_TEST)
94     {
95         static constexpr auto transactionIdVar =
96             std::string_view{"TRANSACTION_ID"};
97         // Length of 'TRANSACTION_ID' string.
98         static constexpr auto transactionIdVarSize = transactionIdVar.size();
99         // Length of 'TRANSACTION_ID=' string.
100         static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
101 
102         // Flush all the pending log messages into the journal
103         util::journalSync();
104 
105         sd_journal* j = nullptr;
106         int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
107         if (rc < 0)
108         {
109             lg2::error("Failed to open journal: {ERROR}", "ERROR",
110                        strerror(-rc));
111             return;
112         }
113 
114         std::string transactionIdStr = std::to_string(transactionId);
115         std::set<std::string> metalist;
116         auto metamap = g_errMetaMap.find(errMsg);
117         if (metamap != g_errMetaMap.end())
118         {
119             metalist.insert(metamap->second.begin(), metamap->second.end());
120         }
121 
122         // Add _PID field information in AdditionalData.
123         metalist.insert("_PID");
124 
125         // Read the journal from the end to get the most recent entry first.
126         // The result from the sd_journal_get_data() is of the form
127         // VARIABLE=value.
128         SD_JOURNAL_FOREACH_BACKWARDS(j)
129         {
130             const char* data = nullptr;
131             size_t length = 0;
132 
133             // Look for the transaction id metadata variable
134             rc = sd_journal_get_data(j, transactionIdVar.data(),
135                                      (const void**)&data, &length);
136             if (rc < 0)
137             {
138                 // This journal entry does not have the TRANSACTION_ID
139                 // metadata variable.
140                 continue;
141             }
142 
143             // journald does not guarantee that sd_journal_get_data() returns
144             // NULL terminated strings, so need to specify the size to use to
145             // compare, use the returned length instead of anything that relies
146             // on NULL terminators like strlen(). The data variable is in the
147             // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
148             // characters plus the (=) sign to do the comparison. 'data +
149             // transactionIdVarOffset' will be in the form of '1234'. 'length -
150             // transactionIdVarOffset' will be the length of '1234'.
151             if ((length <= (transactionIdVarOffset)) ||
152                 (transactionIdStr.compare(
153                      0, transactionIdStr.size(), data + transactionIdVarOffset,
154                      length - transactionIdVarOffset) != 0))
155             {
156                 // The value of the TRANSACTION_ID metadata is not the requested
157                 // transaction id number.
158                 continue;
159             }
160 
161             // Search for all metadata variables in the current journal entry.
162             for (auto i = metalist.cbegin(); i != metalist.cend();)
163             {
164                 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
165                                          &length);
166                 if (rc < 0)
167                 {
168                     // Metadata variable not found, check next metadata
169                     // variable.
170                     i++;
171                     continue;
172                 }
173 
174                 // Metadata variable found, save it and remove it from the set.
175                 additionalData.emplace_back(data, length);
176                 i = metalist.erase(i);
177             }
178             if (metalist.empty())
179             {
180                 // All metadata variables found, break out of journal loop.
181                 break;
182             }
183         }
184         if (!metalist.empty())
185         {
186             // Not all the metadata variables were found in the journal.
187             for (auto& metaVarStr : metalist)
188             {
189                 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
190                           metaVarStr);
191             }
192         }
193 
194         sd_journal_close(j);
195     }
196     createEntry(errMsg, errLvl, additionalData);
197 }
198 
199 auto Manager::createEntry(
200     std::string errMsg, Entry::Level errLvl,
201     std::vector<std::string> additionalData,
202     const FFDCEntries& ffdc) -> sdbusplus::message::object_path
203 {
204     if (!Extensions::disableDefaultLogCaps())
205     {
206         if (errLvl < Entry::sevLowerLimit)
207         {
208             if (realErrors.size() >= ERROR_CAP)
209             {
210                 erase(realErrors.front());
211             }
212         }
213         else
214         {
215             if (infoErrors.size() >= ERROR_INFO_CAP)
216             {
217                 erase(infoErrors.front());
218             }
219         }
220     }
221 
222     entryId++;
223     if (errLvl >= Entry::sevLowerLimit)
224     {
225         infoErrors.push_back(entryId);
226     }
227     else
228     {
229         realErrors.push_back(entryId);
230     }
231     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
232                   std::chrono::system_clock::now().time_since_epoch())
233                   .count();
234     auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
235 
236     AssociationList objects{};
237     processMetadata(errMsg, additionalData, objects);
238 
239     auto e = std::make_unique<Entry>(
240         busLog, objPath, entryId,
241         ms, // Milliseconds since 1970
242         errLvl, std::move(errMsg), std::move(additionalData),
243         std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
244 
245     serialize(*e);
246 
247     if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
248         isCalloutPresent(*e))
249     {
250         quiesceOnError(entryId);
251     }
252 
253     // Add entry before calling the extensions so that they have access to it
254     entries.insert(std::make_pair(entryId, std::move(e)));
255 
256     doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
257 
258     // Note: No need to close the file descriptors in the FFDC.
259 
260     return objPath;
261 }
262 
263 auto Manager::createFromEvent(sdbusplus::exception::generated_event_base&&
264                                   event) -> sdbusplus::message::object_path
265 {
266     auto [msg, level, data] = lg2::details::extractEvent(std::move(event));
267     return this->createEntry(msg, level, std::move(data));
268 }
269 
270 bool Manager::isQuiesceOnErrorEnabled()
271 {
272     // When running under tests, the Logging.Settings service will not be
273     // present.  Assume false.
274     if (IS_UNIT_TEST)
275     {
276         return false;
277     }
278 
279     std::variant<bool> property;
280 
281     auto method = this->busLog.new_method_call(
282         "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
283         "org.freedesktop.DBus.Properties", "Get");
284 
285     method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
286 
287     try
288     {
289         auto reply = this->busLog.call(method);
290         reply.read(property);
291     }
292     catch (const sdbusplus::exception_t& e)
293     {
294         lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
295                    e);
296         return false;
297     }
298 
299     return std::get<bool>(property);
300 }
301 
302 bool Manager::isCalloutPresent(const Entry& entry)
303 {
304     for (const auto& c : entry.additionalData())
305     {
306         if (c.find("CALLOUT_") != std::string::npos)
307         {
308             return true;
309         }
310     }
311 
312     return false;
313 }
314 
315 void Manager::findAndRemoveResolvedBlocks()
316 {
317     for (auto& entry : entries)
318     {
319         if (entry.second->resolved())
320         {
321             checkAndRemoveBlockingError(entry.first);
322         }
323     }
324 }
325 
326 void Manager::onEntryResolve(sdbusplus::message_t& msg)
327 {
328     using Interface = std::string;
329     using Property = std::string;
330     using Value = std::string;
331     using Properties = std::map<Property, std::variant<Value>>;
332 
333     Interface interface;
334     Properties properties;
335 
336     msg.read(interface, properties);
337 
338     for (const auto& p : properties)
339     {
340         if (p.first == "Resolved")
341         {
342             findAndRemoveResolvedBlocks();
343             return;
344         }
345     }
346 }
347 
348 void Manager::checkAndQuiesceHost()
349 {
350     using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
351 
352     // First check host state
353     std::variant<Host::HostState> property;
354 
355     auto method = this->busLog.new_method_call(
356         "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
357         "org.freedesktop.DBus.Properties", "Get");
358 
359     method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
360 
361     try
362     {
363         auto reply = this->busLog.call(method);
364         reply.read(property);
365     }
366     catch (const sdbusplus::exception_t& e)
367     {
368         // Quiescing the host is a "best effort" type function. If unable to
369         // read the host state or it comes back empty, just return.
370         // The boot block object will still be created and the associations to
371         // find the log will be present. Don't want a dependency with
372         // phosphor-state-manager service
373         lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
374                   e);
375         return;
376     }
377 
378     auto hostState = std::get<Host::HostState>(property);
379     if (hostState != Host::HostState::Running)
380     {
381         return;
382     }
383 
384     auto quiesce = this->busLog.new_method_call(
385         "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
386         "org.freedesktop.systemd1.Manager", "StartUnit");
387 
388     quiesce.append("obmc-host-graceful-quiesce@0.target");
389     quiesce.append("replace");
390 
391     this->busLog.call_noreply(quiesce);
392 }
393 
394 void Manager::quiesceOnError(const uint32_t entryId)
395 {
396     // Verify we don't already have this entry blocking
397     auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
398                       [&](const std::unique_ptr<Block>& obj) {
399                           return obj->entryId == entryId;
400                       });
401     if (it != this->blockingErrors.end())
402     {
403         // Already recorded so just return
404         lg2::debug(
405             "QuiesceOnError set and callout present but entry already logged");
406         return;
407     }
408 
409     lg2::info("QuiesceOnError set and callout present");
410 
411     auto blockPath =
412         std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
413     auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
414     this->blockingErrors.push_back(std::move(blockObj));
415 
416     // Register call back if log is resolved
417     using namespace sdbusplus::bus::match::rules;
418     auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
419     auto callback = std::make_unique<sdbusplus::bus::match_t>(
420         this->busLog,
421         propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
422         std::bind(std::mem_fn(&Manager::onEntryResolve), this,
423                   std::placeholders::_1));
424 
425     propChangedEntryCallback.insert(
426         std::make_pair(entryId, std::move(callback)));
427 
428     checkAndQuiesceHost();
429 }
430 
431 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
432 {
433     // Make the association <endpointpath>/<endpointtype> paths
434     std::vector<std::string> assocs;
435     for (const auto& [forwardType, reverseType, endpoint] :
436          entry.associations())
437     {
438         std::string e{endpoint};
439         e += '/' + reverseType;
440         assocs.push_back(e);
441     }
442 
443     for (auto& create : Extensions::getCreateFunctions())
444     {
445         try
446         {
447             create(entry.message(), entry.id(), entry.timestamp(),
448                    entry.severity(), entry.additionalData(), assocs, ffdc);
449         }
450         catch (const std::exception& e)
451         {
452             lg2::error(
453                 "An extension's create function threw an exception: {ERROR}",
454                 "ERROR", e);
455         }
456     }
457 }
458 
459 void Manager::processMetadata(const std::string& /*errorName*/,
460                               const std::vector<std::string>& additionalData,
461                               AssociationList& objects) const
462 {
463     // additionalData is a list of "metadata=value"
464     constexpr auto separator = '=';
465     for (const auto& entryItem : additionalData)
466     {
467         auto found = entryItem.find(separator);
468         if (std::string::npos != found)
469         {
470             auto metadata = entryItem.substr(0, found);
471             auto iter = meta.find(metadata);
472             if (meta.end() != iter)
473             {
474                 (iter->second)(metadata, additionalData, objects);
475             }
476         }
477     }
478 }
479 
480 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
481 {
482     // First look for blocking object and remove
483     auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
484                       [&](const std::unique_ptr<Block>& obj) {
485                           return obj->entryId == entryId;
486                       });
487     if (it != blockingErrors.end())
488     {
489         blockingErrors.erase(it);
490     }
491 
492     // Now remove the callback looking for the error to be resolved
493     auto resolveFind = propChangedEntryCallback.find(entryId);
494     if (resolveFind != propChangedEntryCallback.end())
495     {
496         propChangedEntryCallback.erase(resolveFind);
497     }
498 
499     return;
500 }
501 
502 size_t Manager::eraseAll()
503 {
504     std::vector<uint32_t> logIDWithHwIsolation;
505     for (auto& func : Extensions::getLogIDWithHwIsolationFunctions())
506     {
507         try
508         {
509             func(logIDWithHwIsolation);
510         }
511         catch (const std::exception& e)
512         {
513             lg2::error("An extension's LogIDWithHwIsolation function threw an "
514                        "exception: {ERROR}",
515                        "ERROR", e);
516         }
517     }
518     size_t entriesSize = entries.size();
519     auto iter = entries.begin();
520     if (logIDWithHwIsolation.empty())
521     {
522         while (iter != entries.end())
523         {
524             auto e = iter->first;
525             ++iter;
526             erase(e);
527         }
528         entryId = 0;
529     }
530     else
531     {
532         while (iter != entries.end())
533         {
534             auto e = iter->first;
535             ++iter;
536             try
537             {
538                 if (!std::ranges::contains(logIDWithHwIsolation, e))
539                 {
540                     erase(e);
541                 }
542                 else
543                 {
544                     entriesSize--;
545                 }
546             }
547             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
548                        Unavailable& e)
549             {
550                 entriesSize--;
551             }
552         }
553         if (!entries.empty())
554         {
555             entryId = std::ranges::max_element(entries, [](const auto& a,
556                                                            const auto& b) {
557                           return a.first < b.first;
558                       })->first;
559         }
560         else
561         {
562             entryId = 0;
563         }
564     }
565     return entriesSize;
566 }
567 
568 void Manager::erase(uint32_t entryId)
569 {
570     auto entryFound = entries.find(entryId);
571     if (entries.end() != entryFound)
572     {
573         for (auto& func : Extensions::getDeleteProhibitedFunctions())
574         {
575             try
576             {
577                 bool prohibited = false;
578                 func(entryId, prohibited);
579                 if (prohibited)
580                 {
581                     throw sdbusplus::xyz::openbmc_project::Common::Error::
582                         Unavailable();
583                 }
584             }
585             catch (const sdbusplus::xyz::openbmc_project::Common::Error::
586                        Unavailable& e)
587             {
588                 throw;
589             }
590             catch (const std::exception& e)
591             {
592                 lg2::error("An extension's deleteProhibited function threw an "
593                            "exception: {ERROR}",
594                            "ERROR", e);
595             }
596         }
597 
598         // Delete the persistent representation of this error.
599         fs::path errorPath(paths::error());
600         errorPath /= std::to_string(entryId);
601         fs::remove(errorPath);
602 
603         auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
604             auto it = std::find(ids.begin(), ids.end(), id);
605             if (it != ids.end())
606             {
607                 ids.erase(it);
608             }
609         };
610         if (entryFound->second->severity() >= Entry::sevLowerLimit)
611         {
612             removeId(infoErrors, entryId);
613         }
614         else
615         {
616             removeId(realErrors, entryId);
617         }
618         entries.erase(entryFound);
619 
620         checkAndRemoveBlockingError(entryId);
621 
622         for (auto& remove : Extensions::getDeleteFunctions())
623         {
624             try
625             {
626                 remove(entryId);
627             }
628             catch (const std::exception& e)
629             {
630                 lg2::error("An extension's delete function threw an exception: "
631                            "{ERROR}",
632                            "ERROR", e);
633             }
634         }
635     }
636     else
637     {
638         lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
639     }
640 }
641 
642 void Manager::restore()
643 {
644     auto sanity = [](const auto& id, const auto& restoredId) {
645         return id == restoredId;
646     };
647 
648     fs::path dir(paths::error());
649     if (!fs::exists(dir) || fs::is_empty(dir))
650     {
651         return;
652     }
653 
654     for (auto& file : fs::directory_iterator(dir))
655     {
656         auto id = file.path().filename().c_str();
657         auto idNum = std::stol(id);
658         auto e = std::make_unique<Entry>(
659             busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
660         if (deserialize(file.path(), *e))
661         {
662             // validate the restored error entry id
663             if (sanity(static_cast<uint32_t>(idNum), e->id()))
664             {
665                 e->path(file.path(), true);
666                 if (e->severity() >= Entry::sevLowerLimit)
667                 {
668                     infoErrors.push_back(idNum);
669                 }
670                 else
671                 {
672                     realErrors.push_back(idNum);
673                 }
674 
675                 entries.insert(std::make_pair(idNum, std::move(e)));
676             }
677             else
678             {
679                 lg2::error(
680                     "Failed in sanity check while restoring error entry. "
681                     "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
682                     "ID_NUM", idNum, "ENTRY_ID", e->id());
683             }
684         }
685     }
686 
687     if (!entries.empty())
688     {
689         entryId = entries.rbegin()->first;
690     }
691 }
692 
693 std::string Manager::readFWVersion()
694 {
695     auto version = util::getOSReleaseValue("VERSION_ID");
696 
697     if (!version)
698     {
699         lg2::error("Unable to read BMC firmware version");
700     }
701 
702     return version.value_or("");
703 }
704 
705 auto Manager::create(const std::string& message, Entry::Level severity,
706                      const std::map<std::string, std::string>& additionalData,
707                      const FFDCEntries& ffdc) -> sdbusplus::message::object_path
708 {
709     // Convert the map into a vector of "key=value" strings
710     std::vector<std::string> ad;
711     metadata::associations::combine(additionalData, ad);
712 
713     return createEntry(message, severity, ad, ffdc);
714 }
715 
716 } // namespace internal
717 } // namespace logging
718 } // namespace phosphor
719