1 #include "config.h"
2 
3 #include "log_manager.hpp"
4 
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 
10 #include <poll.h>
11 #include <sys/inotify.h>
12 #include <systemd/sd-bus.h>
13 #include <systemd/sd-journal.h>
14 #include <unistd.h>
15 
16 #include <cassert>
17 #include <chrono>
18 #include <cstdio>
19 #include <cstring>
20 #include <fstream>
21 #include <functional>
22 #include <future>
23 #include <iostream>
24 #include <map>
25 #include <phosphor-logging/log.hpp>
26 #include <sdbusplus/vtable.hpp>
27 #include <set>
28 #include <string>
29 #include <vector>
30 
31 using namespace phosphor::logging;
32 using namespace std::chrono;
33 extern const std::map<metadata::Metadata,
34                       std::function<metadata::associations::Type>>
35     meta;
36 
37 namespace phosphor
38 {
39 namespace logging
40 {
41 namespace internal
42 {
43 inline auto getLevel(const std::string& errMsg)
44 {
45     auto reqLevel = Entry::Level::Error; // Default to Error
46 
47     auto levelmap = g_errLevelMap.find(errMsg);
48     if (levelmap != g_errLevelMap.end())
49     {
50         reqLevel = static_cast<Entry::Level>(levelmap->second);
51     }
52 
53     return reqLevel;
54 }
55 
56 int Manager::getRealErrSize()
57 {
58     return realErrors.size();
59 }
60 
61 int Manager::getInfoErrSize()
62 {
63     return infoErrors.size();
64 }
65 
66 void Manager::commit(uint64_t transactionId, std::string errMsg)
67 {
68     auto level = getLevel(errMsg);
69     _commit(transactionId, std::move(errMsg), level);
70 }
71 
72 void Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
73                             uint32_t errLvl)
74 {
75     _commit(transactionId, std::move(errMsg),
76             static_cast<Entry::Level>(errLvl));
77 }
78 
79 void Manager::_commit(uint64_t transactionId, std::string&& errMsg,
80                       Entry::Level errLvl)
81 {
82     constexpr const auto transactionIdVar = "TRANSACTION_ID";
83     // Length of 'TRANSACTION_ID' string.
84     constexpr const auto transactionIdVarSize = std::strlen(transactionIdVar);
85     // Length of 'TRANSACTION_ID=' string.
86     constexpr const auto transactionIdVarOffset = transactionIdVarSize + 1;
87 
88     // Flush all the pending log messages into the journal
89     journalSync();
90 
91     sd_journal* j = nullptr;
92     int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
93     if (rc < 0)
94     {
95         logging::log<logging::level::ERR>(
96             "Failed to open journal",
97             logging::entry("DESCRIPTION=%s", strerror(-rc)));
98         return;
99     }
100 
101     std::string transactionIdStr = std::to_string(transactionId);
102     std::set<std::string> metalist;
103     auto metamap = g_errMetaMap.find(errMsg);
104     if (metamap != g_errMetaMap.end())
105     {
106         metalist.insert(metamap->second.begin(), metamap->second.end());
107     }
108 
109     // Add _PID field information in AdditionalData.
110     metalist.insert("_PID");
111 
112     std::vector<std::string> additionalData;
113 
114     // Read the journal from the end to get the most recent entry first.
115     // The result from the sd_journal_get_data() is of the form VARIABLE=value.
116     SD_JOURNAL_FOREACH_BACKWARDS(j)
117     {
118         const char* data = nullptr;
119         size_t length = 0;
120 
121         // Look for the transaction id metadata variable
122         rc = sd_journal_get_data(j, transactionIdVar, (const void**)&data,
123                                  &length);
124         if (rc < 0)
125         {
126             // This journal entry does not have the TRANSACTION_ID
127             // metadata variable.
128             continue;
129         }
130 
131         // journald does not guarantee that sd_journal_get_data() returns NULL
132         // terminated strings, so need to specify the size to use to compare,
133         // use the returned length instead of anything that relies on NULL
134         // terminators like strlen().
135         // The data variable is in the form of 'TRANSACTION_ID=1234'. Remove
136         // the TRANSACTION_ID characters plus the (=) sign to do the comparison.
137         // 'data + transactionIdVarOffset' will be in the form of '1234'.
138         // 'length - transactionIdVarOffset' will be the length of '1234'.
139         if ((length <= (transactionIdVarOffset)) ||
140             (transactionIdStr.compare(0, transactionIdStr.size(),
141                                       data + transactionIdVarOffset,
142                                       length - transactionIdVarOffset) != 0))
143         {
144             // The value of the TRANSACTION_ID metadata is not the requested
145             // transaction id number.
146             continue;
147         }
148 
149         // Search for all metadata variables in the current journal entry.
150         for (auto i = metalist.cbegin(); i != metalist.cend();)
151         {
152             rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
153                                      &length);
154             if (rc < 0)
155             {
156                 // Metadata variable not found, check next metadata variable.
157                 i++;
158                 continue;
159             }
160 
161             // Metadata variable found, save it and remove it from the set.
162             additionalData.emplace_back(data, length);
163             i = metalist.erase(i);
164         }
165         if (metalist.empty())
166         {
167             // All metadata variables found, break out of journal loop.
168             break;
169         }
170     }
171     if (!metalist.empty())
172     {
173         // Not all the metadata variables were found in the journal.
174         for (auto& metaVarStr : metalist)
175         {
176             logging::log<logging::level::INFO>(
177                 "Failed to find metadata",
178                 logging::entry("META_FIELD=%s", metaVarStr.c_str()));
179         }
180     }
181 
182     sd_journal_close(j);
183 
184     createEntry(errMsg, errLvl, additionalData);
185 }
186 
187 void Manager::createEntry(std::string errMsg, Entry::Level errLvl,
188                           std::vector<std::string> additionalData)
189 {
190     if (!Extensions::disableDefaultLogCaps())
191     {
192         if (errLvl < Entry::sevLowerLimit)
193         {
194             if (realErrors.size() >= ERROR_CAP)
195             {
196                 erase(realErrors.front());
197             }
198         }
199         else
200         {
201             if (infoErrors.size() >= ERROR_INFO_CAP)
202             {
203                 erase(infoErrors.front());
204             }
205         }
206     }
207 
208     entryId++;
209     if (errLvl >= Entry::sevLowerLimit)
210     {
211         infoErrors.push_back(entryId);
212     }
213     else
214     {
215         realErrors.push_back(entryId);
216     }
217     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
218                   std::chrono::system_clock::now().time_since_epoch())
219                   .count();
220     auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
221 
222     AssociationList objects{};
223     processMetadata(errMsg, additionalData, objects);
224 
225     auto e = std::make_unique<Entry>(busLog, objPath, entryId,
226                                      ms, // Milliseconds since 1970
227                                      errLvl, std::move(errMsg),
228                                      std::move(additionalData),
229                                      std::move(objects), fwVersion, *this);
230     serialize(*e);
231 
232     doExtensionLogCreate(*e);
233 
234     entries.insert(std::make_pair(entryId, std::move(e)));
235 }
236 
237 void Manager::doExtensionLogCreate(const Entry& entry)
238 {
239     // Make the association <endpointpath>/<endpointtype> paths
240     std::vector<std::string> assocs;
241     for (const auto& [forwardType, reverseType, endpoint] :
242          entry.associations())
243     {
244         std::string e{endpoint};
245         e += '/' + reverseType;
246         assocs.push_back(e);
247     }
248 
249     for (auto& create : Extensions::getCreateFunctions())
250     {
251         try
252         {
253             create(entry.message(), entry.id(), entry.timestamp(),
254                    entry.severity(), entry.additionalData(), assocs);
255         }
256         catch (std::exception& e)
257         {
258             log<level::ERR>("An extension's create function threw an exception",
259                             phosphor::logging::entry("ERROR=%s", e.what()));
260         }
261     }
262 }
263 
264 void Manager::processMetadata(const std::string& errorName,
265                               const std::vector<std::string>& additionalData,
266                               AssociationList& objects) const
267 {
268     // additionalData is a list of "metadata=value"
269     constexpr auto separator = '=';
270     for (const auto& entryItem : additionalData)
271     {
272         auto found = entryItem.find(separator);
273         if (std::string::npos != found)
274         {
275             auto metadata = entryItem.substr(0, found);
276             auto iter = meta.find(metadata);
277             if (meta.end() != iter)
278             {
279                 (iter->second)(metadata, additionalData, objects);
280             }
281         }
282     }
283 }
284 
285 void Manager::erase(uint32_t entryId)
286 {
287     auto entryFound = entries.find(entryId);
288     if (entries.end() != entryFound)
289     {
290         for (auto& func : Extensions::getDeleteProhibitedFunctions())
291         {
292             try
293             {
294                 bool prohibited = false;
295                 func(entryId, prohibited);
296                 if (prohibited)
297                 {
298                     // Future work remains to throw an error here.
299                     return;
300                 }
301             }
302             catch (std::exception& e)
303             {
304                 log<level::ERR>(
305                     "An extension's deleteProhibited function threw "
306                     "an exception",
307                     entry("ERROR=%s", e.what()));
308             }
309         }
310 
311         // Delete the persistent representation of this error.
312         fs::path errorPath(ERRLOG_PERSIST_PATH);
313         errorPath /= std::to_string(entryId);
314         fs::remove(errorPath);
315 
316         auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
317             auto it = std::find(ids.begin(), ids.end(), id);
318             if (it != ids.end())
319             {
320                 ids.erase(it);
321             }
322         };
323         if (entryFound->second->severity() >= Entry::sevLowerLimit)
324         {
325             removeId(infoErrors, entryId);
326         }
327         else
328         {
329             removeId(realErrors, entryId);
330         }
331         entries.erase(entryFound);
332 
333         for (auto& remove : Extensions::getDeleteFunctions())
334         {
335             try
336             {
337                 remove(entryId);
338             }
339             catch (std::exception& e)
340             {
341                 log<level::ERR>("An extension's delete function threw an "
342                                 "exception",
343                                 entry("ERROR=%s", e.what()));
344             }
345         }
346     }
347     else
348     {
349         logging::log<level::ERR>("Invalid entry ID to delete",
350                                  logging::entry("ID=%d", entryId));
351     }
352 }
353 
354 void Manager::restore()
355 {
356     auto sanity = [](const auto& id, const auto& restoredId) {
357         return id == restoredId;
358     };
359     std::vector<uint32_t> errorIds;
360 
361     fs::path dir(ERRLOG_PERSIST_PATH);
362     if (!fs::exists(dir) || fs::is_empty(dir))
363     {
364         return;
365     }
366 
367     for (auto& file : fs::directory_iterator(dir))
368     {
369         auto id = file.path().filename().c_str();
370         auto idNum = std::stol(id);
371         auto e = std::make_unique<Entry>(
372             busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
373         if (deserialize(file.path(), *e))
374         {
375             // validate the restored error entry id
376             if (sanity(static_cast<uint32_t>(idNum), e->id()))
377             {
378                 e->emit_object_added();
379                 if (e->severity() >= Entry::sevLowerLimit)
380                 {
381                     infoErrors.push_back(idNum);
382                 }
383                 else
384                 {
385                     realErrors.push_back(idNum);
386                 }
387 
388                 entries.insert(std::make_pair(idNum, std::move(e)));
389                 errorIds.push_back(idNum);
390             }
391             else
392             {
393                 logging::log<logging::level::ERR>(
394                     "Failed in sanity check while restoring error entry. "
395                     "Ignoring error entry",
396                     logging::entry("ID_NUM=%d", idNum),
397                     logging::entry("ENTRY_ID=%d", e->id()));
398             }
399         }
400     }
401 
402     if (!errorIds.empty())
403     {
404         entryId = *(std::max_element(errorIds.begin(), errorIds.end()));
405     }
406 }
407 
408 void Manager::journalSync()
409 {
410     bool syncRequested = false;
411     auto fd = -1;
412     auto rc = -1;
413     auto wd = -1;
414     auto bus = sdbusplus::bus::new_default();
415 
416     auto start =
417         duration_cast<microseconds>(steady_clock::now().time_since_epoch())
418             .count();
419 
420     // Each time an error log is committed, a request to sync the journal
421     // must occur and block that error log commit until it completes. A 5sec
422     // block is done to allow sufficient time for the journal to be synced.
423     //
424     // Number of loop iterations = 3 for the following reasons:
425     // Iteration #1: Requests a journal sync by killing the journald service.
426     // Iteration #2: Setup an inotify watch to monitor the synced file that
427     //               journald updates with the timestamp the last time the
428     //               journal was flushed.
429     // Iteration #3: Poll to wait until inotify reports an event which blocks
430     //               the error log from being commited until the sync completes.
431     constexpr auto maxRetry = 3;
432     for (int i = 0; i < maxRetry; i++)
433     {
434         // Read timestamp from synced file
435         constexpr auto syncedPath = "/run/systemd/journal/synced";
436         std::ifstream syncedFile(syncedPath);
437         if (syncedFile.fail())
438         {
439             // If the synced file doesn't exist, a sync request will create it.
440             if (errno != ENOENT)
441             {
442                 log<level::ERR>("Failed to open journal synced file",
443                                 entry("FILENAME=%s", syncedPath),
444                                 entry("ERRNO=%d", errno));
445                 return;
446             }
447         }
448         else
449         {
450             // Only read the synced file if it exists.
451             // See if a sync happened by now
452             std::string timestampStr;
453             std::getline(syncedFile, timestampStr);
454             auto timestamp = std::stoll(timestampStr);
455             if (timestamp >= start)
456             {
457                 break;
458             }
459         }
460 
461         // Let's ask for a sync, but only once
462         if (!syncRequested)
463         {
464             syncRequested = true;
465 
466             constexpr auto JOURNAL_UNIT = "systemd-journald.service";
467             auto signal = SIGRTMIN + 1;
468 
469             auto method = bus.new_method_call(SYSTEMD_BUSNAME, SYSTEMD_PATH,
470                                               SYSTEMD_INTERFACE, "KillUnit");
471             method.append(JOURNAL_UNIT, "main", signal);
472             bus.call(method);
473             if (method.is_method_error())
474             {
475                 log<level::ERR>("Failed to kill journal service");
476                 break;
477             }
478             continue;
479         }
480 
481         // Let's install the inotify watch, if we didn't do that yet. This watch
482         // monitors the syncedFile for when journald updates it with a newer
483         // timestamp. This means the journal has been flushed.
484         if (fd < 0)
485         {
486             fd = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);
487             if (fd < 0)
488             {
489                 log<level::ERR>("Failed to create inotify watch",
490                                 entry("ERRNO=%d", errno));
491                 return;
492             }
493 
494             constexpr auto JOURNAL_RUN_PATH = "/run/systemd/journal";
495             wd = inotify_add_watch(fd, JOURNAL_RUN_PATH,
496                                    IN_MOVED_TO | IN_DONT_FOLLOW | IN_ONLYDIR);
497             if (wd < 0)
498             {
499                 log<level::ERR>("Failed to watch journal directory",
500                                 entry("PATH=%s", JOURNAL_RUN_PATH),
501                                 entry("ERRNO=%d", errno));
502                 close(fd);
503                 return;
504             }
505             continue;
506         }
507 
508         // Let's wait until inotify reports an event
509         struct pollfd fds = {
510             .fd = fd,
511             .events = POLLIN,
512         };
513         constexpr auto pollTimeout = 5; // 5 seconds
514         rc = poll(&fds, 1, pollTimeout * 1000);
515         if (rc < 0)
516         {
517             log<level::ERR>("Failed to add event", entry("ERRNO=%d", errno),
518                             entry("ERR=%s", strerror(-rc)));
519             inotify_rm_watch(fd, wd);
520             close(fd);
521             return;
522         }
523         else if (rc == 0)
524         {
525             log<level::INFO>("Poll timeout, no new journal synced data",
526                              entry("TIMEOUT=%d", pollTimeout));
527             break;
528         }
529 
530         // Read from the specified file descriptor until there is no new data,
531         // throwing away everything read since the timestamp will be read at the
532         // beginning of the loop.
533         constexpr auto maxBytes = 64;
534         uint8_t buffer[maxBytes];
535         while (read(fd, buffer, maxBytes) > 0)
536             ;
537     }
538 
539     if (fd != -1)
540     {
541         if (wd != -1)
542         {
543             inotify_rm_watch(fd, wd);
544         }
545         close(fd);
546     }
547 
548     return;
549 }
550 
551 std::string Manager::readFWVersion()
552 {
553     std::string version;
554     std::ifstream versionFile{BMC_VERSION_FILE};
555     std::string line;
556     static constexpr auto VERSION_ID = "VERSION_ID=";
557 
558     while (std::getline(versionFile, line))
559     {
560         if (line.find(VERSION_ID) != std::string::npos)
561         {
562             auto pos = line.find_first_of('"') + 1;
563             version = line.substr(pos, line.find_last_of('"') - pos);
564             break;
565         }
566     }
567 
568     if (version.empty())
569     {
570         log<level::ERR>("Unable to read BMC firmware version");
571     }
572 
573     return version;
574 }
575 
576 void Manager::create(const std::string& message, Entry::Level severity,
577                      const std::map<std::string, std::string>& additionalData)
578 {
579     // Convert the map into a vector of "key=value" strings
580     std::vector<std::string> ad;
581     metadata::associations::combine(additionalData, ad);
582 
583     createEntry(message, severity, ad);
584 }
585 
586 } // namespace internal
587 } // namespace logging
588 } // namespace phosphor
589