1 #include "config.h"
2
3 #include "log_manager.hpp"
4
5 #include "elog_entry.hpp"
6 #include "elog_meta.hpp"
7 #include "elog_serialize.hpp"
8 #include "extensions.hpp"
9 #include "util.hpp"
10
11 #include <systemd/sd-bus.h>
12 #include <systemd/sd-journal.h>
13 #include <unistd.h>
14
15 #include <phosphor-logging/lg2.hpp>
16 #include <sdbusplus/vtable.hpp>
17 #include <xyz/openbmc_project/State/Host/server.hpp>
18
19 #include <cassert>
20 #include <chrono>
21 #include <cstdio>
22 #include <cstring>
23 #include <fstream>
24 #include <functional>
25 #include <future>
26 #include <iostream>
27 #include <map>
28 #include <set>
29 #include <string>
30 #include <string_view>
31 #include <vector>
32
33 using namespace std::chrono;
34 extern const std::map<
35 phosphor::logging::metadata::Metadata,
36 std::function<phosphor::logging::metadata::associations::Type>>
37 meta;
38
39 namespace phosphor
40 {
41 namespace logging
42 {
43 namespace internal
44 {
getLevel(const std::string & errMsg)45 inline auto getLevel(const std::string& errMsg)
46 {
47 auto reqLevel = Entry::Level::Error; // Default to Error
48
49 auto levelmap = g_errLevelMap.find(errMsg);
50 if (levelmap != g_errLevelMap.end())
51 {
52 reqLevel = static_cast<Entry::Level>(levelmap->second);
53 }
54
55 return reqLevel;
56 }
57
getRealErrSize()58 int Manager::getRealErrSize()
59 {
60 return realErrors.size();
61 }
62
getInfoErrSize()63 int Manager::getInfoErrSize()
64 {
65 return infoErrors.size();
66 }
67
commit(uint64_t transactionId,std::string errMsg)68 uint32_t Manager::commit(uint64_t transactionId, std::string errMsg)
69 {
70 auto level = getLevel(errMsg);
71 _commit(transactionId, std::move(errMsg), level);
72 return entryId;
73 }
74
commitWithLvl(uint64_t transactionId,std::string errMsg,uint32_t errLvl)75 uint32_t Manager::commitWithLvl(uint64_t transactionId, std::string errMsg,
76 uint32_t errLvl)
77 {
78 _commit(transactionId, std::move(errMsg),
79 static_cast<Entry::Level>(errLvl));
80 return entryId;
81 }
82
_commit(uint64_t transactionId,std::string && errMsg,Entry::Level errLvl)83 void Manager::_commit(uint64_t transactionId [[maybe_unused]],
84 std::string&& errMsg, Entry::Level errLvl)
85 {
86 std::vector<std::string> additionalData{};
87
88 // When running as a test-case, the system may have a LOT of journal
89 // data and we may not have permissions to do some of the journal sync
90 // operations. Just skip over them.
91 if (!IS_UNIT_TEST)
92 {
93 static constexpr auto transactionIdVar =
94 std::string_view{"TRANSACTION_ID"};
95 // Length of 'TRANSACTION_ID' string.
96 static constexpr auto transactionIdVarSize = transactionIdVar.size();
97 // Length of 'TRANSACTION_ID=' string.
98 static constexpr auto transactionIdVarOffset = transactionIdVarSize + 1;
99
100 // Flush all the pending log messages into the journal
101 util::journalSync();
102
103 sd_journal* j = nullptr;
104 int rc = sd_journal_open(&j, SD_JOURNAL_LOCAL_ONLY);
105 if (rc < 0)
106 {
107 lg2::error("Failed to open journal: {ERROR}", "ERROR",
108 strerror(-rc));
109 return;
110 }
111
112 std::string transactionIdStr = std::to_string(transactionId);
113 std::set<std::string> metalist;
114 auto metamap = g_errMetaMap.find(errMsg);
115 if (metamap != g_errMetaMap.end())
116 {
117 metalist.insert(metamap->second.begin(), metamap->second.end());
118 }
119
120 // Add _PID field information in AdditionalData.
121 metalist.insert("_PID");
122
123 // Read the journal from the end to get the most recent entry first.
124 // The result from the sd_journal_get_data() is of the form
125 // VARIABLE=value.
126 SD_JOURNAL_FOREACH_BACKWARDS(j)
127 {
128 const char* data = nullptr;
129 size_t length = 0;
130
131 // Look for the transaction id metadata variable
132 rc = sd_journal_get_data(j, transactionIdVar.data(),
133 (const void**)&data, &length);
134 if (rc < 0)
135 {
136 // This journal entry does not have the TRANSACTION_ID
137 // metadata variable.
138 continue;
139 }
140
141 // journald does not guarantee that sd_journal_get_data() returns
142 // NULL terminated strings, so need to specify the size to use to
143 // compare, use the returned length instead of anything that relies
144 // on NULL terminators like strlen(). The data variable is in the
145 // form of 'TRANSACTION_ID=1234'. Remove the TRANSACTION_ID
146 // characters plus the (=) sign to do the comparison. 'data +
147 // transactionIdVarOffset' will be in the form of '1234'. 'length -
148 // transactionIdVarOffset' will be the length of '1234'.
149 if ((length <= (transactionIdVarOffset)) ||
150 (transactionIdStr.compare(
151 0, transactionIdStr.size(), data + transactionIdVarOffset,
152 length - transactionIdVarOffset) != 0))
153 {
154 // The value of the TRANSACTION_ID metadata is not the requested
155 // transaction id number.
156 continue;
157 }
158
159 // Search for all metadata variables in the current journal entry.
160 for (auto i = metalist.cbegin(); i != metalist.cend();)
161 {
162 rc = sd_journal_get_data(j, (*i).c_str(), (const void**)&data,
163 &length);
164 if (rc < 0)
165 {
166 // Metadata variable not found, check next metadata
167 // variable.
168 i++;
169 continue;
170 }
171
172 // Metadata variable found, save it and remove it from the set.
173 additionalData.emplace_back(data, length);
174 i = metalist.erase(i);
175 }
176 if (metalist.empty())
177 {
178 // All metadata variables found, break out of journal loop.
179 break;
180 }
181 }
182 if (!metalist.empty())
183 {
184 // Not all the metadata variables were found in the journal.
185 for (auto& metaVarStr : metalist)
186 {
187 lg2::info("Failed to find metadata: {META_FIELD}", "META_FIELD",
188 metaVarStr);
189 }
190 }
191
192 sd_journal_close(j);
193 }
194 createEntry(errMsg, errLvl, additionalData);
195 }
196
createEntry(std::string errMsg,Entry::Level errLvl,std::vector<std::string> additionalData,const FFDCEntries & ffdc)197 void Manager::createEntry(std::string errMsg, Entry::Level errLvl,
198 std::vector<std::string> additionalData,
199 const FFDCEntries& ffdc)
200 {
201 if (!Extensions::disableDefaultLogCaps())
202 {
203 if (errLvl < Entry::sevLowerLimit)
204 {
205 if (realErrors.size() >= ERROR_CAP)
206 {
207 erase(realErrors.front());
208 }
209 }
210 else
211 {
212 if (infoErrors.size() >= ERROR_INFO_CAP)
213 {
214 erase(infoErrors.front());
215 }
216 }
217 }
218
219 entryId++;
220 if (errLvl >= Entry::sevLowerLimit)
221 {
222 infoErrors.push_back(entryId);
223 }
224 else
225 {
226 realErrors.push_back(entryId);
227 }
228 auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(
229 std::chrono::system_clock::now().time_since_epoch())
230 .count();
231 auto objPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
232
233 AssociationList objects{};
234 processMetadata(errMsg, additionalData, objects);
235
236 auto e = std::make_unique<Entry>(
237 busLog, objPath, entryId,
238 ms, // Milliseconds since 1970
239 errLvl, std::move(errMsg), std::move(additionalData),
240 std::move(objects), fwVersion, getEntrySerializePath(entryId), *this);
241
242 serialize(*e);
243
244 if (isQuiesceOnErrorEnabled() && (errLvl < Entry::sevLowerLimit) &&
245 isCalloutPresent(*e))
246 {
247 quiesceOnError(entryId);
248 }
249
250 // Add entry before calling the extensions so that they have access to it
251 entries.insert(std::make_pair(entryId, std::move(e)));
252
253 doExtensionLogCreate(*entries.find(entryId)->second, ffdc);
254
255 // Note: No need to close the file descriptors in the FFDC.
256 }
257
isQuiesceOnErrorEnabled()258 bool Manager::isQuiesceOnErrorEnabled()
259 {
260 // When running under tests, the Logging.Settings service will not be
261 // present. Assume false.
262 if (IS_UNIT_TEST)
263 {
264 return false;
265 }
266
267 std::variant<bool> property;
268
269 auto method = this->busLog.new_method_call(
270 "xyz.openbmc_project.Settings", "/xyz/openbmc_project/logging/settings",
271 "org.freedesktop.DBus.Properties", "Get");
272
273 method.append("xyz.openbmc_project.Logging.Settings", "QuiesceOnHwError");
274
275 try
276 {
277 auto reply = this->busLog.call(method);
278 reply.read(property);
279 }
280 catch (const sdbusplus::exception_t& e)
281 {
282 lg2::error("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
283 e);
284 return false;
285 }
286
287 return std::get<bool>(property);
288 }
289
isCalloutPresent(const Entry & entry)290 bool Manager::isCalloutPresent(const Entry& entry)
291 {
292 for (const auto& c : entry.additionalData())
293 {
294 if (c.find("CALLOUT_") != std::string::npos)
295 {
296 return true;
297 }
298 }
299
300 return false;
301 }
302
findAndRemoveResolvedBlocks()303 void Manager::findAndRemoveResolvedBlocks()
304 {
305 for (auto& entry : entries)
306 {
307 if (entry.second->resolved())
308 {
309 checkAndRemoveBlockingError(entry.first);
310 }
311 }
312 }
313
onEntryResolve(sdbusplus::message_t & msg)314 void Manager::onEntryResolve(sdbusplus::message_t& msg)
315 {
316 using Interface = std::string;
317 using Property = std::string;
318 using Value = std::string;
319 using Properties = std::map<Property, std::variant<Value>>;
320
321 Interface interface;
322 Properties properties;
323
324 msg.read(interface, properties);
325
326 for (const auto& p : properties)
327 {
328 if (p.first == "Resolved")
329 {
330 findAndRemoveResolvedBlocks();
331 return;
332 }
333 }
334 }
335
checkAndQuiesceHost()336 void Manager::checkAndQuiesceHost()
337 {
338 using Host = sdbusplus::server::xyz::openbmc_project::state::Host;
339
340 // First check host state
341 std::variant<Host::HostState> property;
342
343 auto method = this->busLog.new_method_call(
344 "xyz.openbmc_project.State.Host", "/xyz/openbmc_project/state/host0",
345 "org.freedesktop.DBus.Properties", "Get");
346
347 method.append("xyz.openbmc_project.State.Host", "CurrentHostState");
348
349 try
350 {
351 auto reply = this->busLog.call(method);
352 reply.read(property);
353 }
354 catch (const sdbusplus::exception_t& e)
355 {
356 // Quiescing the host is a "best effort" type function. If unable to
357 // read the host state or it comes back empty, just return.
358 // The boot block object will still be created and the associations to
359 // find the log will be present. Don't want a dependency with
360 // phosphor-state-manager service
361 lg2::info("Error reading QuiesceOnHwError property: {ERROR}", "ERROR",
362 e);
363 return;
364 }
365
366 auto hostState = std::get<Host::HostState>(property);
367 if (hostState != Host::HostState::Running)
368 {
369 return;
370 }
371
372 auto quiesce = this->busLog.new_method_call(
373 "org.freedesktop.systemd1", "/org/freedesktop/systemd1",
374 "org.freedesktop.systemd1.Manager", "StartUnit");
375
376 quiesce.append("obmc-host-graceful-quiesce@0.target");
377 quiesce.append("replace");
378
379 this->busLog.call_noreply(quiesce);
380 }
381
quiesceOnError(const uint32_t entryId)382 void Manager::quiesceOnError(const uint32_t entryId)
383 {
384 // Verify we don't already have this entry blocking
385 auto it = find_if(this->blockingErrors.begin(), this->blockingErrors.end(),
386 [&](const std::unique_ptr<Block>& obj) {
387 return obj->entryId == entryId;
388 });
389 if (it != this->blockingErrors.end())
390 {
391 // Already recorded so just return
392 lg2::debug(
393 "QuiesceOnError set and callout present but entry already logged");
394 return;
395 }
396
397 lg2::info("QuiesceOnError set and callout present");
398
399 auto blockPath = std::string(OBJ_LOGGING) + "/block" +
400 std::to_string(entryId);
401 auto blockObj = std::make_unique<Block>(this->busLog, blockPath, entryId);
402 this->blockingErrors.push_back(std::move(blockObj));
403
404 // Register call back if log is resolved
405 using namespace sdbusplus::bus::match::rules;
406 auto entryPath = std::string(OBJ_ENTRY) + '/' + std::to_string(entryId);
407 auto callback = std::make_unique<sdbusplus::bus::match_t>(
408 this->busLog,
409 propertiesChanged(entryPath, "xyz.openbmc_project.Logging.Entry"),
410 std::bind(std::mem_fn(&Manager::onEntryResolve), this,
411 std::placeholders::_1));
412
413 propChangedEntryCallback.insert(
414 std::make_pair(entryId, std::move(callback)));
415
416 checkAndQuiesceHost();
417 }
418
doExtensionLogCreate(const Entry & entry,const FFDCEntries & ffdc)419 void Manager::doExtensionLogCreate(const Entry& entry, const FFDCEntries& ffdc)
420 {
421 // Make the association <endpointpath>/<endpointtype> paths
422 std::vector<std::string> assocs;
423 for (const auto& [forwardType, reverseType, endpoint] :
424 entry.associations())
425 {
426 std::string e{endpoint};
427 e += '/' + reverseType;
428 assocs.push_back(e);
429 }
430
431 for (auto& create : Extensions::getCreateFunctions())
432 {
433 try
434 {
435 create(entry.message(), entry.id(), entry.timestamp(),
436 entry.severity(), entry.additionalData(), assocs, ffdc);
437 }
438 catch (const std::exception& e)
439 {
440 lg2::error(
441 "An extension's create function threw an exception: {ERROR}",
442 "ERROR", e);
443 }
444 }
445 }
446
processMetadata(const std::string &,const std::vector<std::string> & additionalData,AssociationList & objects) const447 void Manager::processMetadata(const std::string& /*errorName*/,
448 const std::vector<std::string>& additionalData,
449 AssociationList& objects) const
450 {
451 // additionalData is a list of "metadata=value"
452 constexpr auto separator = '=';
453 for (const auto& entryItem : additionalData)
454 {
455 auto found = entryItem.find(separator);
456 if (std::string::npos != found)
457 {
458 auto metadata = entryItem.substr(0, found);
459 auto iter = meta.find(metadata);
460 if (meta.end() != iter)
461 {
462 (iter->second)(metadata, additionalData, objects);
463 }
464 }
465 }
466 }
467
checkAndRemoveBlockingError(uint32_t entryId)468 void Manager::checkAndRemoveBlockingError(uint32_t entryId)
469 {
470 // First look for blocking object and remove
471 auto it = find_if(blockingErrors.begin(), blockingErrors.end(),
472 [&](const std::unique_ptr<Block>& obj) {
473 return obj->entryId == entryId;
474 });
475 if (it != blockingErrors.end())
476 {
477 blockingErrors.erase(it);
478 }
479
480 // Now remove the callback looking for the error to be resolved
481 auto resolveFind = propChangedEntryCallback.find(entryId);
482 if (resolveFind != propChangedEntryCallback.end())
483 {
484 propChangedEntryCallback.erase(resolveFind);
485 }
486
487 return;
488 }
489
erase(uint32_t entryId)490 void Manager::erase(uint32_t entryId)
491 {
492 auto entryFound = entries.find(entryId);
493 if (entries.end() != entryFound)
494 {
495 for (auto& func : Extensions::getDeleteProhibitedFunctions())
496 {
497 try
498 {
499 bool prohibited = false;
500 func(entryId, prohibited);
501 if (prohibited)
502 {
503 // Future work remains to throw an error here.
504 return;
505 }
506 }
507 catch (const std::exception& e)
508 {
509 lg2::error("An extension's deleteProhibited function threw an "
510 "exception: {ERROR}",
511 "ERROR", e);
512 }
513 }
514
515 // Delete the persistent representation of this error.
516 fs::path errorPath(ERRLOG_PERSIST_PATH);
517 errorPath /= std::to_string(entryId);
518 fs::remove(errorPath);
519
520 auto removeId = [](std::list<uint32_t>& ids, uint32_t id) {
521 auto it = std::find(ids.begin(), ids.end(), id);
522 if (it != ids.end())
523 {
524 ids.erase(it);
525 }
526 };
527 if (entryFound->second->severity() >= Entry::sevLowerLimit)
528 {
529 removeId(infoErrors, entryId);
530 }
531 else
532 {
533 removeId(realErrors, entryId);
534 }
535 entries.erase(entryFound);
536
537 checkAndRemoveBlockingError(entryId);
538
539 for (auto& remove : Extensions::getDeleteFunctions())
540 {
541 try
542 {
543 remove(entryId);
544 }
545 catch (const std::exception& e)
546 {
547 lg2::error("An extension's delete function threw an exception: "
548 "{ERROR}",
549 "ERROR", e);
550 }
551 }
552 }
553 else
554 {
555 lg2::error("Invalid entry ID ({ID}) to delete", "ID", entryId);
556 }
557 }
558
restore()559 void Manager::restore()
560 {
561 auto sanity = [](const auto& id, const auto& restoredId) {
562 return id == restoredId;
563 };
564
565 fs::path dir(ERRLOG_PERSIST_PATH);
566 if (!fs::exists(dir) || fs::is_empty(dir))
567 {
568 return;
569 }
570
571 for (auto& file : fs::directory_iterator(dir))
572 {
573 auto id = file.path().filename().c_str();
574 auto idNum = std::stol(id);
575 auto e = std::make_unique<Entry>(
576 busLog, std::string(OBJ_ENTRY) + '/' + id, idNum, *this);
577 if (deserialize(file.path(), *e))
578 {
579 // validate the restored error entry id
580 if (sanity(static_cast<uint32_t>(idNum), e->id()))
581 {
582 e->path(file.path(), true);
583 if (e->severity() >= Entry::sevLowerLimit)
584 {
585 infoErrors.push_back(idNum);
586 }
587 else
588 {
589 realErrors.push_back(idNum);
590 }
591
592 entries.insert(std::make_pair(idNum, std::move(e)));
593 }
594 else
595 {
596 lg2::error(
597 "Failed in sanity check while restoring error entry. "
598 "Ignoring error entry {ID_NUM}/{ENTRY_ID}.",
599 "ID_NUM", idNum, "ENTRY_ID", e->id());
600 }
601 }
602 }
603
604 if (!entries.empty())
605 {
606 entryId = entries.rbegin()->first;
607 }
608 }
609
readFWVersion()610 std::string Manager::readFWVersion()
611 {
612 auto version = util::getOSReleaseValue("VERSION_ID");
613
614 if (!version)
615 {
616 lg2::error("Unable to read BMC firmware version");
617 }
618
619 return version.value_or("");
620 }
621
create(const std::string & message,Entry::Level severity,const std::map<std::string,std::string> & additionalData,const FFDCEntries & ffdc)622 void Manager::create(const std::string& message, Entry::Level severity,
623 const std::map<std::string, std::string>& additionalData,
624 const FFDCEntries& ffdc)
625 {
626 // Convert the map into a vector of "key=value" strings
627 std::vector<std::string> ad;
628 metadata::associations::combine(additionalData, ad);
629
630 createEntry(message, severity, ad, ffdc);
631 }
632
633 } // namespace internal
634 } // namespace logging
635 } // namespace phosphor
636