1 #include "config.h"
2 
3 #include "log_manager.hpp"
4 
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "util.hpp"
10 
11 #include <systemd/sd-bus.h>
12 #include <systemd/sd-journal.h>
13 #include <unistd.h>
14 
15 #include <phosphor-logging/lg2.hpp>
16 #include <sdbusplus/vtable.hpp>
17 #include <xyz/openbmc_project/State/Host/server.hpp>
18 
19 #include <cassert>
20 #include <chrono>
21 #include <cstdio>
22 #include <cstring>
23 #include <fstream>
24 #include <functional>
25 #include <future>
26 #include <iostream>
27 #include <map>
28 #include <set>
29 #include <string>
30 #include <string_view>
31 #include <vector>
32 
33 using namespace std::chrono;
34 extern const std::map<
35     phosphor::logging::metadata::Metadata,
36     std::function<phosphor::logging::metadata::associations::Type>>
37     meta;
38 
39 namespace phosphor
40 {
41 namespace logging
42 {
43 namespace internal
44 {
getLevel(const std::string & errMsg)45 inline auto getLevel(const std::string& errMsg)
46 {
47     auto reqLevel = Entry::Level::Error; // Default to Error
48 
49     auto levelmap = g_errLevelMap.find(errMsg);
50     if (levelmap != g_errLevelMap.end())
51     {
52         reqLevel = static_cast<Entry::Level>(levelmap->second);
53     }
54 
55     return reqLevel;
56 }
57 
getRealErrSize()58 int Manager::getRealErrSize()
59 {
60     return realErrors.size();
61 }
62 
getInfoErrSize()63 int Manager::getInfoErrSize()
64 {
65     return infoErrors.size();
66 }
67 
commit(uint64_t transactionId,std::string errMsg)68 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
69 {
70     auto level = getLevel(errMsg);
71     _commit(transactionId, std::move(errMsg), level);
72     return entryId;
73 }
74 
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)75 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
76                                 uint32_t errLvl)
77 {
78     _commit(transactionId, std::move(errMsg),
79             static_cast<Entry::Level>(errLvl));
80     return entryId;
81 }
82 
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)83 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
84                       std::string&& errMsg, Entry::Level errLvl)
85 {
86     std::vector<std::string> additionalData{};
87 
88     // When running as a test-case, the system may have a LOT of journal
89     // data and we may not have permissions to do some of the journal sync
90     // operations.  Just skip over them.
91     if (!IS_UNIT_TEST)
92     {
93         static constexpr auto transactionIdVar =
94             std::string_view{"TRANSACTION_ID"};
95         // Length of 'TRANSACTION_ID' string.
96         static constexpr auto transactionIdVarSize = transactionIdVar.size();
97         // Length of 'TRANSACTION_ID=' string.
98         static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
99 
100         // Flush all the pending log messages into the journal
101         util::journalSync();
102 
103         sd_journal* j = nullptr;
104         int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
105         if (rc < 0)
106         {
107             lg2::error("Failed to open journal: {ERROR}", "ERROR",
108                        strerror(-rc));
109             return;
110         }
111 
112         std::string transactionIdStr = std::to_string(transactionId);
113         std::set<std::string> metalist;
114         auto metamap = g_errMetaMap.find(errMsg);
115         if (metamap != g_errMetaMap.end())
116         {
117             metalist.insert(metamap->second.begin(), metamap->second.end());
118         }
119 
120         // Add _PID field information in AdditionalData.
121         metalist.insert("_PID");
122 
123         // Read the journal from the end to get the most recent entry first.
124         // The result from the sd_journal_get_data() is of the form
125         // VARIABLE=value.
126         SD_JOURNAL_FOREACH_BACKWARDS(j)
127         {
128             const char* data = nullptr;
129             size_t length = 0;
130 
131             // Look for the transaction id metadata variable
132             rc = sd_journal_get_data(j, transactionIdVar.data(),
133                                      (const void**)&data, &length);
134             if (rc < 0)
135             {
136                 // This journal entry does not have the TRANSACTION_ID
137                 // metadata variable.
138                 continue;
139             }
140 
141             // journald does not guarantee that sd_journal_get_data() returns
142             // NULL terminated strings, so need to specify the size to use to
143             // compare, use the returned length instead of anything that relies
144             // on NULL terminators like strlen(). The data variable is in the
145             // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
146             // characters plus the (=) sign to do the comparison. 'data +
147             // transactionIdVarOffset' will be in the form of '1234'. 'length -
148             // transactionIdVarOffset' will be the length of '1234'.
149             if ((length <= (transactionIdVarOffset)) ||
150                 (transactionIdStr.compare(
151                      0, transactionIdStr.size(), data + transactionIdVarOffset,
152                      length - transactionIdVarOffset) != 0))
153             {
154                 // The value of the TRANSACTION_ID metadata is not the requested
155                 // transaction id number.
156                 continue;
157             }
158 
159             // Search for all metadata variables in the current journal entry.
160             for (auto i = metalist.cbegin(); i != metalist.cend();)
161             {
162                 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
163                                          &length);
164                 if (rc < 0)
165                 {
166                     // Metadata variable not found, check next metadata
167                     // variable.
168                     i++;
169                     continue;
170                 }
171 
172                 // Metadata variable found, save it and remove it from the set.
173                 additionalData.emplace_back(data, length);
174                 i = metalist.erase(i);
175             }
176             if (metalist.empty())
177             {
178                 // All metadata variables found, break out of journal loop.
179                 break;
180             }
181         }
182         if (!metalist.empty())
183         {
184             // Not all the metadata variables were found in the journal.
185             for (auto& metaVarStr : metalist)
186             {
187                 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
188                           metaVarStr);
189             }
190         }
191 
192         sd_journal_close(j);
193     }
194     createEntry(errMsg, errLvl, additionalData);
195 }
196 
createEntry(std::string errMsg,Entry::Level errLvl,std::vector<std::string> additionalData,const FFDCEntries & ffdc)197 void Manager::createEntry(std::string errMsg, Entry::Level errLvl,
198                           std::vector<std::string> additionalData,
199                           const FFDCEntries& ffdc)
200 {
201     if (!Extensions::disableDefaultLogCaps())
202     {
203         if (errLvl < Entry::sevLowerLimit)
204         {
205             if (realErrors.size() >= ERROR_CAP)
206             {
207                 erase(realErrors.front());
208             }
209         }
210         else
211         {
212             if (infoErrors.size() >= ERROR_INFO_CAP)
213             {
214                 erase(infoErrors.front());
215             }
216         }
217     }
218 
219     entryId++;
220     if (errLvl >= Entry::sevLowerLimit)
221     {
222         infoErrors.push_back(entryId);
223     }
224     else
225     {
226         realErrors.push_back(entryId);
227     }
228     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
229                   std::chrono::system_clock::now().time_since_epoch())
230                   .count();
231     auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
232 
233     AssociationList objects{};
234     processMetadata(errMsg, additionalData, objects);
235 
236     auto e = std::make_unique<Entry>(
237         busLog, objPath, entryId,
238         ms, // Milliseconds since 1970
239         errLvl, std::move(errMsg), std::move(additionalData),
240         std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
241 
242     serialize(*e);
243 
244     if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
245         isCalloutPresent(*e))
246     {
247         quiesceOnError(entryId);
248     }
249 
250     // Add entry before calling the extensions so that they have access to it
251     entries.insert(std::make_pair(entryId, std::move(e)));
252 
253     doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
254 
255     // Note: No need to close the file descriptors in the FFDC.
256 }
257 
isQuiesceOnErrorEnabled()258 bool Manager::isQuiesceOnErrorEnabled()
259 {
260     // When running under tests, the Logging.Settings service will not be
261     // present.  Assume false.
262     if (IS_UNIT_TEST)
263     {
264         return false;
265     }
266 
267     std::variant<bool> property;
268 
269     auto method = this->busLog.new_method_call(
270         "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
271         "org.freedesktop.DBus.Properties", "Get");
272 
273     method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
274 
275     try
276     {
277         auto reply = this->busLog.call(method);
278         reply.read(property);
279     }
280     catch (const sdbusplus::exception_t& e)
281     {
282         lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
283                    e);
284         return false;
285     }
286 
287     return std::get<bool>(property);
288 }
289 
isCalloutPresent(const Entry & entry)290 bool Manager::isCalloutPresent(const Entry& entry)
291 {
292     for (const auto& c : entry.additionalData())
293     {
294         if (c.find("CALLOUT_") != std::string::npos)
295         {
296             return true;
297         }
298     }
299 
300     return false;
301 }
302 
findAndRemoveResolvedBlocks()303 void Manager::findAndRemoveResolvedBlocks()
304 {
305     for (auto& entry : entries)
306     {
307         if (entry.second->resolved())
308         {
309             checkAndRemoveBlockingError(entry.first);
310         }
311     }
312 }
313 
onEntryResolve(sdbusplus::message_t & msg)314 void Manager::onEntryResolve(sdbusplus::message_t& msg)
315 {
316     using Interface = std::string;
317     using Property = std::string;
318     using Value = std::string;
319     using Properties = std::map<Property, std::variant<Value>>;
320 
321     Interface interface;
322     Properties properties;
323 
324     msg.read(interface, properties);
325 
326     for (const auto& p : properties)
327     {
328         if (p.first == "Resolved")
329         {
330             findAndRemoveResolvedBlocks();
331             return;
332         }
333     }
334 }
335 
checkAndQuiesceHost()336 void Manager::checkAndQuiesceHost()
337 {
338     using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
339 
340     // First check host state
341     std::variant<Host::HostState> property;
342 
343     auto method = this->busLog.new_method_call(
344         "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
345         "org.freedesktop.DBus.Properties", "Get");
346 
347     method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
348 
349     try
350     {
351         auto reply = this->busLog.call(method);
352         reply.read(property);
353     }
354     catch (const sdbusplus::exception_t& e)
355     {
356         // Quiescing the host is a "best effort" type function. If unable to
357         // read the host state or it comes back empty, just return.
358         // The boot block object will still be created and the associations to
359         // find the log will be present. Don't want a dependency with
360         // phosphor-state-manager service
361         lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
362                   e);
363         return;
364     }
365 
366     auto hostState = std::get<Host::HostState>(property);
367     if (hostState != Host::HostState::Running)
368     {
369         return;
370     }
371 
372     auto quiesce = this->busLog.new_method_call(
373         "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
374         "org.freedesktop.systemd1.Manager", "StartUnit");
375 
376     quiesce.append("obmc-host-graceful-quiesce@0.target");
377     quiesce.append("replace");
378 
379     this->busLog.call_noreply(quiesce);
380 }
381 
quiesceOnError(const uint32_t entryId)382 void Manager::quiesceOnError(const uint32_t entryId)
383 {
384     // Verify we don't already have this entry blocking
385     auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
386                       [&](const std::unique_ptr<Block>& obj) {
387         return obj->entryId == entryId;
388     });
389     if (it != this->blockingErrors.end())
390     {
391         // Already recorded so just return
392         lg2::debug(
393             "QuiesceOnError set and callout present but entry already logged");
394         return;
395     }
396 
397     lg2::info("QuiesceOnError set and callout present");
398 
399     auto blockPath = std::string(OBJ_LOGGING) + "/block" +
400                      std::to_string(entryId);
401     auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
402     this->blockingErrors.push_back(std::move(blockObj));
403 
404     // Register call back if log is resolved
405     using namespace sdbusplus::bus::match::rules;
406     auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
407     auto callback = std::make_unique<sdbusplus::bus::match_t>(
408         this->busLog,
409         propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
410         std::bind(std::mem_fn(&Manager::onEntryResolve), this,
411                   std::placeholders::_1));
412 
413     propChangedEntryCallback.insert(
414         std::make_pair(entryId, std::move(callback)));
415 
416     checkAndQuiesceHost();
417 }
418 
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)419 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
420 {
421     // Make the association <endpointpath>/<endpointtype> paths
422     std::vector<std::string> assocs;
423     for (const auto& [forwardType, reverseType, endpoint] :
424          entry.associations())
425     {
426         std::string e{endpoint};
427         e += '/' + reverseType;
428         assocs.push_back(e);
429     }
430 
431     for (auto& create : Extensions::getCreateFunctions())
432     {
433         try
434         {
435             create(entry.message(), entry.id(), entry.timestamp(),
436                    entry.severity(), entry.additionalData(), assocs, ffdc);
437         }
438         catch (const std::exception& e)
439         {
440             lg2::error(
441                 "An extension's create function threw an exception: {ERROR}",
442                 "ERROR", e);
443         }
444     }
445 }
446 
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const447 void Manager::processMetadata(const std::string& /*errorName*/,
448                               const std::vector<std::string>& additionalData,
449                               AssociationList& objects) const
450 {
451     // additionalData is a list of "metadata=value"
452     constexpr auto separator = '=';
453     for (const auto& entryItem : additionalData)
454     {
455         auto found = entryItem.find(separator);
456         if (std::string::npos != found)
457         {
458             auto metadata = entryItem.substr(0, found);
459             auto iter = meta.find(metadata);
460             if (meta.end() != iter)
461             {
462                 (iter->second)(metadata, additionalData, objects);
463             }
464         }
465     }
466 }
467 
checkAndRemoveBlockingError(uint32_t entryId)468 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
469 {
470     // First look for blocking object and remove
471     auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
472                       [&](const std::unique_ptr<Block>& obj) {
473         return obj->entryId == entryId;
474     });
475     if (it != blockingErrors.end())
476     {
477         blockingErrors.erase(it);
478     }
479 
480     // Now remove the callback looking for the error to be resolved
481     auto resolveFind = propChangedEntryCallback.find(entryId);
482     if (resolveFind != propChangedEntryCallback.end())
483     {
484         propChangedEntryCallback.erase(resolveFind);
485     }
486 
487     return;
488 }
489 
erase(uint32_t entryId)490 void Manager::erase(uint32_t entryId)
491 {
492     auto entryFound = entries.find(entryId);
493     if (entries.end() != entryFound)
494     {
495         for (auto& func : Extensions::getDeleteProhibitedFunctions())
496         {
497             try
498             {
499                 bool prohibited = false;
500                 func(entryId, prohibited);
501                 if (prohibited)
502                 {
503                     // Future work remains to throw an error here.
504                     return;
505                 }
506             }
507             catch (const std::exception& e)
508             {
509                 lg2::error("An extension's deleteProhibited function threw an "
510                            "exception: {ERROR}",
511                            "ERROR", e);
512             }
513         }
514 
515         // Delete the persistent representation of this error.
516         fs::path errorPath(ERRLOG_PERSIST_PATH);
517         errorPath /= std::to_string(entryId);
518         fs::remove(errorPath);
519 
520         auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
521             auto it = std::find(ids.begin(), ids.end(), id);
522             if (it != ids.end())
523             {
524                 ids.erase(it);
525             }
526         };
527         if (entryFound->second->severity() >= Entry::sevLowerLimit)
528         {
529             removeId(infoErrors, entryId);
530         }
531         else
532         {
533             removeId(realErrors, entryId);
534         }
535         entries.erase(entryFound);
536 
537         checkAndRemoveBlockingError(entryId);
538 
539         for (auto& remove : Extensions::getDeleteFunctions())
540         {
541             try
542             {
543                 remove(entryId);
544             }
545             catch (const std::exception& e)
546             {
547                 lg2::error("An extension's delete function threw an exception: "
548                            "{ERROR}",
549                            "ERROR", e);
550             }
551         }
552     }
553     else
554     {
555         lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
556     }
557 }
558 
restore()559 void Manager::restore()
560 {
561     auto sanity = [](const auto& id, const auto& restoredId) {
562         return id == restoredId;
563     };
564 
565     fs::path dir(ERRLOG_PERSIST_PATH);
566     if (!fs::exists(dir) || fs::is_empty(dir))
567     {
568         return;
569     }
570 
571     for (auto& file : fs::directory_iterator(dir))
572     {
573         auto id = file.path().filename().c_str();
574         auto idNum = std::stol(id);
575         auto e = std::make_unique<Entry>(
576             busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
577         if (deserialize(file.path(), *e))
578         {
579             // validate the restored error entry id
580             if (sanity(static_cast<uint32_t>(idNum), e->id()))
581             {
582                 e->path(file.path(), true);
583                 if (e->severity() >= Entry::sevLowerLimit)
584                 {
585                     infoErrors.push_back(idNum);
586                 }
587                 else
588                 {
589                     realErrors.push_back(idNum);
590                 }
591 
592                 entries.insert(std::make_pair(idNum, std::move(e)));
593             }
594             else
595             {
596                 lg2::error(
597                     "Failed in sanity check while restoring error entry. "
598                     "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
599                     "ID_NUM", idNum, "ENTRY_ID", e->id());
600             }
601         }
602     }
603 
604     if (!entries.empty())
605     {
606         entryId = entries.rbegin()->first;
607     }
608 }
609 
readFWVersion()610 std::string Manager::readFWVersion()
611 {
612     auto version = util::getOSReleaseValue("VERSION_ID");
613 
614     if (!version)
615     {
616         lg2::error("Unable to read BMC firmware version");
617     }
618 
619     return version.value_or("");
620 }
621 
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)622 void Manager::create(const std::string& message, Entry::Level severity,
623                      const std::map<std::string, std::string>& additionalData,
624                      const FFDCEntries& ffdc)
625 {
626     // Convert the map into a vector of "key=value" strings
627     std::vector<std::string> ad;
628     metadata::associations::combine(additionalData, ad);
629 
630     createEntry(message, severity, ad, ffdc);
631 }
632 
633 } // namespace internal
634 } // namespace logging
635 } // namespace phosphor
636