1 #include "config.h"
2 
3 #include "log_manager.hpp"
4 
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "util.hpp"
10 
11 #include <poll.h>
12 #include <sys/inotify.h>
13 #include <systemd/sd-bus.h>
14 #include <systemd/sd-journal.h>
15 #include <unistd.h>
16 
17 #include <cassert>
18 #include <chrono>
19 #include <cstdio>
20 #include <cstring>
21 #include <fstream>
22 #include <functional>
23 #include <future>
24 #include <iostream>
25 #include <map>
26 #include <phosphor-logging/log.hpp>
27 #include <sdbusplus/vtable.hpp>
28 #include <set>
29 #include <string>
30 #include <vector>
31 #include <xyz/openbmc_project/State/Host/server.hpp>
32 
33 using namespace phosphor::logging;
34 using namespace std::chrono;
35 using sdbusplus::exception::SdBusError;
36 extern const std::map<metadata::Metadata,
37                       std::function<metadata::associations::Type>>
38     meta;
39 
40 namespace phosphor
41 {
42 namespace logging
43 {
44 namespace internal
45 {
46 inline auto getLevel(const std::string& errMsg)
47 {
48     auto reqLevel = Entry::Level::Error; // Default to Error
49 
50     auto levelmap = g_errLevelMap.find(errMsg);
51     if (levelmap != g_errLevelMap.end())
52     {
53         reqLevel = static_cast<Entry::Level>(levelmap->second);
54     }
55 
56     return reqLevel;
57 }
58 
59 int Manager::getRealErrSize()
60 {
61     return realErrors.size();
62 }
63 
64 int Manager::getInfoErrSize()
65 {
66     return infoErrors.size();
67 }
68 
69 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
70 {
71     auto level = getLevel(errMsg);
72     _commit(transactionId, std::move(errMsg), level);
73     return entryId;
74 }
75 
76 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
77                                 uint32_t errLvl)
78 {
79     _commit(transactionId, std::move(errMsg),
80             static_cast<Entry::Level>(errLvl));
81     return entryId;
82 }
83 
84 void Manager::_commit(uint64_t transactionId, std::string&& errMsg,
85                       Entry::Level errLvl)
86 {
87     constexpr const auto transactionIdVar = "TRANSACTION_ID";
88     // Length of 'TRANSACTION_ID' string.
89     constexpr const auto transactionIdVarSize = std::strlen(transactionIdVar);
90     // Length of 'TRANSACTION_ID=' string.
91     constexpr const auto transactionIdVarOffset = transactionIdVarSize + 1;
92 
93     // Flush all the pending log messages into the journal
94     journalSync();
95 
96     sd_journal* j = nullptr;
97     int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
98     if (rc < 0)
99     {
100         logging::log<logging::level::ERR>(
101             "Failed to open journal",
102             logging::entry("DESCRIPTION=%s", strerror(-rc)));
103         return;
104     }
105 
106     std::string transactionIdStr = std::to_string(transactionId);
107     std::set<std::string> metalist;
108     auto metamap = g_errMetaMap.find(errMsg);
109     if (metamap != g_errMetaMap.end())
110     {
111         metalist.insert(metamap->second.begin(), metamap->second.end());
112     }
113 
114     // Add _PID field information in AdditionalData.
115     metalist.insert("_PID");
116 
117     std::vector<std::string> additionalData;
118 
119     // Read the journal from the end to get the most recent entry first.
120     // The result from the sd_journal_get_data() is of the form VARIABLE=value.
121     SD_JOURNAL_FOREACH_BACKWARDS(j)
122     {
123         const char* data = nullptr;
124         size_t length = 0;
125 
126         // Look for the transaction id metadata variable
127         rc = sd_journal_get_data(j, transactionIdVar, (const void**)&data,
128                                  &length);
129         if (rc < 0)
130         {
131             // This journal entry does not have the TRANSACTION_ID
132             // metadata variable.
133             continue;
134         }
135 
136         // journald does not guarantee that sd_journal_get_data() returns NULL
137         // terminated strings, so need to specify the size to use to compare,
138         // use the returned length instead of anything that relies on NULL
139         // terminators like strlen().
140         // The data variable is in the form of 'TRANSACTION_ID=1234'. Remove
141         // the TRANSACTION_ID characters plus the (=) sign to do the comparison.
142         // 'data + transactionIdVarOffset' will be in the form of '1234'.
143         // 'length - transactionIdVarOffset' will be the length of '1234'.
144         if ((length <= (transactionIdVarOffset)) ||
145             (transactionIdStr.compare(0, transactionIdStr.size(),
146                                       data + transactionIdVarOffset,
147                                       length - transactionIdVarOffset) != 0))
148         {
149             // The value of the TRANSACTION_ID metadata is not the requested
150             // transaction id number.
151             continue;
152         }
153 
154         // Search for all metadata variables in the current journal entry.
155         for (auto i = metalist.cbegin(); i != metalist.cend();)
156         {
157             rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
158                                      &length);
159             if (rc < 0)
160             {
161                 // Metadata variable not found, check next metadata variable.
162                 i++;
163                 continue;
164             }
165 
166             // Metadata variable found, save it and remove it from the set.
167             additionalData.emplace_back(data, length);
168             i = metalist.erase(i);
169         }
170         if (metalist.empty())
171         {
172             // All metadata variables found, break out of journal loop.
173             break;
174         }
175     }
176     if (!metalist.empty())
177     {
178         // Not all the metadata variables were found in the journal.
179         for (auto& metaVarStr : metalist)
180         {
181             logging::log<logging::level::INFO>(
182                 "Failed to find metadata",
183                 logging::entry("META_FIELD=%s", metaVarStr.c_str()));
184         }
185     }
186 
187     sd_journal_close(j);
188 
189     createEntry(errMsg, errLvl, additionalData);
190 }
191 
192 void Manager::createEntry(std::string errMsg, Entry::Level errLvl,
193                           std::vector<std::string> additionalData,
194                           const FFDCEntries& ffdc)
195 {
196     if (!Extensions::disableDefaultLogCaps())
197     {
198         if (errLvl < Entry::sevLowerLimit)
199         {
200             if (realErrors.size() >= ERROR_CAP)
201             {
202                 erase(realErrors.front());
203             }
204         }
205         else
206         {
207             if (infoErrors.size() >= ERROR_INFO_CAP)
208             {
209                 erase(infoErrors.front());
210             }
211         }
212     }
213 
214     entryId++;
215     if (errLvl >= Entry::sevLowerLimit)
216     {
217         infoErrors.push_back(entryId);
218     }
219     else
220     {
221         realErrors.push_back(entryId);
222     }
223     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
224                   std::chrono::system_clock::now().time_since_epoch())
225                   .count();
226     auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
227 
228     AssociationList objects{};
229     processMetadata(errMsg, additionalData, objects);
230 
231     auto e = std::make_unique<Entry>(busLog, objPath, entryId,
232                                      ms, // Milliseconds since 1970
233                                      errLvl, std::move(errMsg),
234                                      std::move(additionalData),
235                                      std::move(objects), fwVersion, *this);
236     auto path = serialize(*e);
237     e->path(path);
238 
239     if (isQuiesceOnErrorEnabled() && isCalloutPresent(*e))
240     {
241         quiesceOnError(entryId);
242     }
243 
244     // Add entry before calling the extensions so that they have access to it
245     entries.insert(std::make_pair(entryId, std::move(e)));
246 
247     doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
248 
249     // Note: No need to close the file descriptors in the FFDC.
250 }
251 
252 bool Manager::isQuiesceOnErrorEnabled()
253 {
254     std::variant<bool> property;
255 
256     auto method = this->busLog.new_method_call(
257         "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
258         "org.freedesktop.DBus.Properties", "Get");
259 
260     method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
261 
262     try
263     {
264         auto reply = this->busLog.call(method);
265         reply.read(property);
266     }
267     catch (const SdBusError& e)
268     {
269         log<level::ERR>("Error reading QuiesceOnHwError property",
270                         entry("ERROR=%s", e.what()));
271         throw;
272     }
273 
274     return std::get<bool>(property);
275 }
276 
277 bool Manager::isCalloutPresent(const Entry& entry)
278 {
279     for (const auto& c : entry.additionalData())
280     {
281         if (c.find("CALLOUT_") != std::string::npos)
282         {
283             return true;
284         }
285     }
286 
287     return false;
288 }
289 
290 void Manager::findAndRemoveResolvedBlocks()
291 {
292     for (auto& entry : entries)
293     {
294         if (entry.second->resolved())
295         {
296             checkAndRemoveBlockingError(entry.first);
297         }
298     }
299 }
300 
301 void Manager::onEntryResolve(sdbusplus::message::message& msg)
302 {
303     using Interface = std::string;
304     using Property = std::string;
305     using Value = std::string;
306     using Properties = std::map<Property, std::variant<Value>>;
307 
308     Interface interface;
309     Properties properties;
310 
311     msg.read(interface, properties);
312 
313     for (const auto& p : properties)
314     {
315         if (p.first == "Resolved")
316         {
317             findAndRemoveResolvedBlocks();
318             return;
319         }
320     }
321 }
322 
323 void Manager::checkAndQuiesceHost()
324 {
325     // First check host state
326     std::variant<std::string> property;
327 
328     auto method = this->busLog.new_method_call(
329         "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
330         "org.freedesktop.DBus.Properties", "Get");
331 
332     method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
333 
334     try
335     {
336         auto reply = this->busLog.call(method);
337         reply.read(property);
338     }
339     catch (const SdBusError& e)
340     {
341         // Quiescing the host is a "best effort" type function. If unable to
342         // read the host state or it comes back empty, just return.
343         // The boot block object will still be created and the associations to
344         // find the log will be present. Don't want a dependency with
345         // phosphor-state-manager service
346         log<level::INFO>("Error reading QuiesceOnHwError property",
347                          entry("ERROR=%s", e.what()));
348         return;
349     }
350 
351     std::string hostState = std::get<std::string>(property);
352 
353     // If host state is empty, do nothing
354     if (hostState.empty())
355     {
356         return;
357     }
358 
359     using Host = sdbusplus::xyz::openbmc_project::State::server::Host;
360     auto state = Host::convertHostStateFromString(hostState);
361     if (state != Host::HostState::Running)
362     {
363         return;
364     }
365 
366     auto quiesce = this->busLog.new_method_call(
367         "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
368         "org.freedesktop.systemd1.Manager", "StartUnit");
369 
370     quiesce.append("obmc-host-quiesce@0.target");
371     quiesce.append("replace");
372 
373     this->busLog.call_noreply(quiesce);
374 }
375 
376 void Manager::quiesceOnError(const uint32_t entryId)
377 {
378     // Verify we don't already have this entry blocking
379     auto it = find_if(
380         this->blockingErrors.begin(), this->blockingErrors.end(),
381         [&](std::unique_ptr<Block>& obj) { return obj->entryId == entryId; });
382     if (it != this->blockingErrors.end())
383     {
384         // Already recorded so just return
385         logging::log<logging::level::DEBUG>(
386             "QuiesceOnError set and callout present but entry already logged");
387         return;
388     }
389 
390     logging::log<logging::level::INFO>(
391         "QuiesceOnError set and callout present");
392 
393     auto blockPath =
394         std::string(OBJ_LOGGING) + "/block" + std::to_string(entryId);
395     auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
396     this->blockingErrors.push_back(std::move(blockObj));
397 
398     // Register call back if log is resolved
399     using namespace sdbusplus::bus::match::rules;
400     auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
401     auto callback = std::make_unique<sdbusplus::bus::match::match>(
402         this->busLog,
403         propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
404         std::bind(std::mem_fn(&Manager::onEntryResolve), this,
405                   std::placeholders::_1));
406 
407     propChangedEntryCallback.insert(
408         std::make_pair(entryId, std::move(callback)));
409 
410     checkAndQuiesceHost();
411 }
412 
413 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
414 {
415     // Make the association <endpointpath>/<endpointtype> paths
416     std::vector<std::string> assocs;
417     for (const auto& [forwardType, reverseType, endpoint] :
418          entry.associations())
419     {
420         std::string e{endpoint};
421         e += '/' + reverseType;
422         assocs.push_back(e);
423     }
424 
425     for (auto& create : Extensions::getCreateFunctions())
426     {
427         try
428         {
429             create(entry.message(), entry.id(), entry.timestamp(),
430                    entry.severity(), entry.additionalData(), assocs, ffdc);
431         }
432         catch (std::exception& e)
433         {
434             log<level::ERR>("An extension's create function threw an exception",
435                             phosphor::logging::entry("ERROR=%s", e.what()));
436         }
437     }
438 }
439 
440 void Manager::processMetadata(const std::string& errorName,
441                               const std::vector<std::string>& additionalData,
442                               AssociationList& objects) const
443 {
444     // additionalData is a list of "metadata=value"
445     constexpr auto separator = '=';
446     for (const auto& entryItem : additionalData)
447     {
448         auto found = entryItem.find(separator);
449         if (std::string::npos != found)
450         {
451             auto metadata = entryItem.substr(0, found);
452             auto iter = meta.find(metadata);
453             if (meta.end() != iter)
454             {
455                 (iter->second)(metadata, additionalData, objects);
456             }
457         }
458     }
459 }
460 
461 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
462 {
463     // First look for blocking object and remove
464     auto it = find_if(
465         blockingErrors.begin(), blockingErrors.end(),
466         [&](std::unique_ptr<Block>& obj) { return obj->entryId == entryId; });
467     if (it != blockingErrors.end())
468     {
469         blockingErrors.erase(it);
470     }
471 
472     // Now remove the callback looking for the error to be resolved
473     auto resolveFind = propChangedEntryCallback.find(entryId);
474     if (resolveFind != propChangedEntryCallback.end())
475     {
476         propChangedEntryCallback.erase(resolveFind);
477     }
478 
479     return;
480 }
481 
482 void Manager::erase(uint32_t entryId)
483 {
484     auto entryFound = entries.find(entryId);
485     if (entries.end() != entryFound)
486     {
487         for (auto& func : Extensions::getDeleteProhibitedFunctions())
488         {
489             try
490             {
491                 bool prohibited = false;
492                 func(entryId, prohibited);
493                 if (prohibited)
494                 {
495                     // Future work remains to throw an error here.
496                     return;
497                 }
498             }
499             catch (std::exception& e)
500             {
501                 log<level::ERR>(
502                     "An extension's deleteProhibited function threw "
503                     "an exception",
504                     entry("ERROR=%s", e.what()));
505             }
506         }
507 
508         // Delete the persistent representation of this error.
509         fs::path errorPath(ERRLOG_PERSIST_PATH);
510         errorPath /= std::to_string(entryId);
511         fs::remove(errorPath);
512 
513         auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
514             auto it = std::find(ids.begin(), ids.end(), id);
515             if (it != ids.end())
516             {
517                 ids.erase(it);
518             }
519         };
520         if (entryFound->second->severity() >= Entry::sevLowerLimit)
521         {
522             removeId(infoErrors, entryId);
523         }
524         else
525         {
526             removeId(realErrors, entryId);
527         }
528         entries.erase(entryFound);
529 
530         checkAndRemoveBlockingError(entryId);
531 
532         for (auto& remove : Extensions::getDeleteFunctions())
533         {
534             try
535             {
536                 remove(entryId);
537             }
538             catch (std::exception& e)
539             {
540                 log<level::ERR>("An extension's delete function threw an "
541                                 "exception",
542                                 entry("ERROR=%s", e.what()));
543             }
544         }
545     }
546     else
547     {
548         logging::log<level::ERR>("Invalid entry ID to delete",
549                                  logging::entry("ID=%d", entryId));
550     }
551 }
552 
553 void Manager::restore()
554 {
555     auto sanity = [](const auto& id, const auto& restoredId) {
556         return id == restoredId;
557     };
558     std::vector<uint32_t> errorIds;
559 
560     fs::path dir(ERRLOG_PERSIST_PATH);
561     if (!fs::exists(dir) || fs::is_empty(dir))
562     {
563         return;
564     }
565 
566     for (auto& file : fs::directory_iterator(dir))
567     {
568         auto id = file.path().filename().c_str();
569         auto idNum = std::stol(id);
570         auto e = std::make_unique<Entry>(
571             busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
572         if (deserialize(file.path(), *e))
573         {
574             // validate the restored error entry id
575             if (sanity(static_cast<uint32_t>(idNum), e->id()))
576             {
577                 e->path(file.path());
578                 e->emit_object_added();
579                 if (e->severity() >= Entry::sevLowerLimit)
580                 {
581                     infoErrors.push_back(idNum);
582                 }
583                 else
584                 {
585                     realErrors.push_back(idNum);
586                 }
587 
588                 entries.insert(std::make_pair(idNum, std::move(e)));
589                 errorIds.push_back(idNum);
590             }
591             else
592             {
593                 logging::log<logging::level::ERR>(
594                     "Failed in sanity check while restoring error entry. "
595                     "Ignoring error entry",
596                     logging::entry("ID_NUM=%d", idNum),
597                     logging::entry("ENTRY_ID=%d", e->id()));
598             }
599         }
600     }
601 
602     if (!errorIds.empty())
603     {
604         entryId = *(std::max_element(errorIds.begin(), errorIds.end()));
605     }
606 }
607 
608 void Manager::journalSync()
609 {
610     bool syncRequested = false;
611     auto fd = -1;
612     auto rc = -1;
613     auto wd = -1;
614     auto bus = sdbusplus::bus::new_default();
615 
616     auto start =
617         duration_cast<microseconds>(steady_clock::now().time_since_epoch())
618             .count();
619 
620     // Each time an error log is committed, a request to sync the journal
621     // must occur and block that error log commit until it completes. A 5sec
622     // block is done to allow sufficient time for the journal to be synced.
623     //
624     // Number of loop iterations = 3 for the following reasons:
625     // Iteration #1: Requests a journal sync by killing the journald service.
626     // Iteration #2: Setup an inotify watch to monitor the synced file that
627     //               journald updates with the timestamp the last time the
628     //               journal was flushed.
629     // Iteration #3: Poll to wait until inotify reports an event which blocks
630     //               the error log from being commited until the sync completes.
631     constexpr auto maxRetry = 3;
632     for (int i = 0; i < maxRetry; i++)
633     {
634         // Read timestamp from synced file
635         constexpr auto syncedPath = "/run/systemd/journal/synced";
636         std::ifstream syncedFile(syncedPath);
637         if (syncedFile.fail())
638         {
639             // If the synced file doesn't exist, a sync request will create it.
640             if (errno != ENOENT)
641             {
642                 log<level::ERR>("Failed to open journal synced file",
643                                 entry("FILENAME=%s", syncedPath),
644                                 entry("ERRNO=%d", errno));
645                 return;
646             }
647         }
648         else
649         {
650             // Only read the synced file if it exists.
651             // See if a sync happened by now
652             std::string timestampStr;
653             std::getline(syncedFile, timestampStr);
654             auto timestamp = std::stoll(timestampStr);
655             if (timestamp >= start)
656             {
657                 break;
658             }
659         }
660 
661         // Let's ask for a sync, but only once
662         if (!syncRequested)
663         {
664             syncRequested = true;
665 
666             constexpr auto JOURNAL_UNIT = "systemd-journald.service";
667             auto signal = SIGRTMIN + 1;
668 
669             auto method = bus.new_method_call(SYSTEMD_BUSNAME, SYSTEMD_PATH,
670                                               SYSTEMD_INTERFACE, "KillUnit");
671             method.append(JOURNAL_UNIT, "main", signal);
672             bus.call(method);
673             if (method.is_method_error())
674             {
675                 log<level::ERR>("Failed to kill journal service");
676                 break;
677             }
678             continue;
679         }
680 
681         // Let's install the inotify watch, if we didn't do that yet. This watch
682         // monitors the syncedFile for when journald updates it with a newer
683         // timestamp. This means the journal has been flushed.
684         if (fd < 0)
685         {
686             fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
687             if (fd < 0)
688             {
689                 log<level::ERR>("Failed to create inotify watch",
690                                 entry("ERRNO=%d", errno));
691                 return;
692             }
693 
694             constexpr auto JOURNAL_RUN_PATH = "/run/systemd/journal";
695             wd = inotify_add_watch(fd, JOURNAL_RUN_PATH,
696                                    IN_MOVED_TO | IN_DONT_FOLLOW | IN_ONLYDIR);
697             if (wd < 0)
698             {
699                 log<level::ERR>("Failed to watch journal directory",
700                                 entry("PATH=%s", JOURNAL_RUN_PATH),
701                                 entry("ERRNO=%d", errno));
702                 close(fd);
703                 return;
704             }
705             continue;
706         }
707 
708         // Let's wait until inotify reports an event
709         struct pollfd fds = {
710             .fd = fd,
711             .events = POLLIN,
712         };
713         constexpr auto pollTimeout = 5; // 5 seconds
714         rc = poll(&fds, 1, pollTimeout * 1000);
715         if (rc < 0)
716         {
717             log<level::ERR>("Failed to add event", entry("ERRNO=%d", errno),
718                             entry("ERR=%s", strerror(-rc)));
719             inotify_rm_watch(fd, wd);
720             close(fd);
721             return;
722         }
723         else if (rc == 0)
724         {
725             log<level::INFO>("Poll timeout, no new journal synced data",
726                              entry("TIMEOUT=%d", pollTimeout));
727             break;
728         }
729 
730         // Read from the specified file descriptor until there is no new data,
731         // throwing away everything read since the timestamp will be read at the
732         // beginning of the loop.
733         constexpr auto maxBytes = 64;
734         uint8_t buffer[maxBytes];
735         while (read(fd, buffer, maxBytes) > 0)
736             ;
737     }
738 
739     if (fd != -1)
740     {
741         if (wd != -1)
742         {
743             inotify_rm_watch(fd, wd);
744         }
745         close(fd);
746     }
747 
748     return;
749 }
750 
751 std::string Manager::readFWVersion()
752 {
753     auto version = util::getOSReleaseValue("VERSION_ID");
754 
755     if (!version)
756     {
757         log<level::ERR>("Unable to read BMC firmware version");
758     }
759 
760     return version.value_or("");
761 }
762 
763 void Manager::create(const std::string& message, Entry::Level severity,
764                      const std::map<std::string, std::string>& additionalData)
765 {
766     // Convert the map into a vector of "key=value" strings
767     std::vector<std::string> ad;
768     metadata::associations::combine(additionalData, ad);
769 
770     createEntry(message, severity, ad);
771 }
772 
773 void Manager::createWithFFDC(
774     const std::string& message, Entry::Level severity,
775     const std::map<std::string, std::string>& additionalData,
776     const FFDCEntries& ffdc)
777 {
778     // Convert the map into a vector of "key=value" strings
779     std::vector<std::string> ad;
780     metadata::associations::combine(additionalData, ad);
781 
782     createEntry(message, severity, ad, ffdc);
783 }
784 
785 } // namespace internal
786 } // namespace logging
787 } // namespace phosphor
788