1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "manager.hpp"
17 
18 #include "additional_data.hpp"
19 #include "json_utils.hpp"
20 #include "pel.hpp"
21 #include "service_indicators.hpp"
22 
23 #include <fmt/format.h>
24 #include <sys/inotify.h>
25 #include <unistd.h>
26 
27 #include <filesystem>
28 #include <fstream>
29 #include <xyz/openbmc_project/Common/error.hpp>
30 #include <xyz/openbmc_project/Logging/Create/server.hpp>
31 
32 namespace openpower
33 {
34 namespace pels
35 {
36 
37 using namespace phosphor::logging;
38 namespace fs = std::filesystem;
39 namespace rg = openpower::pels::message;
40 
41 namespace common_error = sdbusplus::xyz::openbmc_project::Common::Error;
42 
43 using Create = sdbusplus::xyz::openbmc_project::Logging::server::Create;
44 
45 namespace additional_data
46 {
47 constexpr auto rawPEL = "RAWPEL";
48 constexpr auto esel = "ESEL";
49 constexpr auto error = "ERROR_NAME";
50 } // namespace additional_data
51 
52 constexpr auto defaultLogMessage = "xyz.openbmc_project.Logging.Error.Default";
53 
54 Manager::~Manager()
55 {
56     if (_pelFileDeleteFD != -1)
57     {
58         if (_pelFileDeleteWatchFD != -1)
59         {
60             inotify_rm_watch(_pelFileDeleteFD, _pelFileDeleteWatchFD);
61         }
62         close(_pelFileDeleteFD);
63     }
64 }
65 
66 void Manager::create(const std::string& message, uint32_t obmcLogID,
67                      uint64_t timestamp, Entry::Level severity,
68                      const std::vector<std::string>& additionalData,
69                      const std::vector<std::string>& associations,
70                      const FFDCEntries& ffdc)
71 {
72     AdditionalData ad{additionalData};
73 
74     // If a PEL was passed in via a filename or in an ESEL,
75     // use that.  Otherwise, create one.
76     auto rawPelPath = ad.getValue(additional_data::rawPEL);
77     if (rawPelPath)
78     {
79         addRawPEL(*rawPelPath, obmcLogID);
80     }
81     else
82     {
83         auto esel = ad.getValue(additional_data::esel);
84         if (esel)
85         {
86             addESELPEL(*esel, obmcLogID);
87         }
88         else
89         {
90             createPEL(message, obmcLogID, timestamp, severity, additionalData,
91                       associations, ffdc);
92         }
93     }
94 
95     setEntryPath(obmcLogID);
96 }
97 
98 void Manager::addRawPEL(const std::string& rawPelPath, uint32_t obmcLogID)
99 {
100     if (fs::exists(rawPelPath))
101     {
102         std::ifstream file(rawPelPath, std::ios::in | std::ios::binary);
103 
104         auto data = std::vector<uint8_t>(std::istreambuf_iterator<char>(file),
105                                          std::istreambuf_iterator<char>());
106         if (file.fail())
107         {
108             log<level::ERR>("Filesystem error reading a raw PEL",
109                             entry("PELFILE=%s", rawPelPath.c_str()),
110                             entry("OBMCLOGID=%d", obmcLogID));
111             // TODO, Decide what to do here. Maybe nothing.
112             return;
113         }
114 
115         file.close();
116 
117         addPEL(data, obmcLogID);
118     }
119     else
120     {
121         log<level::ERR>("Raw PEL file from BMC event log does not exist",
122                         entry("PELFILE=%s", (rawPelPath).c_str()),
123                         entry("OBMCLOGID=%d", obmcLogID));
124     }
125 }
126 
127 void Manager::addPEL(std::vector<uint8_t>& pelData, uint32_t obmcLogID)
128 {
129 
130     auto pel = std::make_unique<openpower::pels::PEL>(pelData, obmcLogID);
131     if (pel->valid())
132     {
133         // PELs created by others still need these fields set by us.
134         pel->assignID();
135         pel->setCommitTime();
136 
137         // Update System Info to Extended User Data
138         pel->updateSysInfoInExtendedUserDataSection(*_dataIface);
139 
140         try
141         {
142             log<level::DEBUG>(
143                 fmt::format("Adding external PEL {:#x} (BMC ID {}) to repo",
144                             pel->id(), obmcLogID)
145                     .c_str());
146 
147             _repo.add(pel);
148 
149             if (_repo.sizeWarning())
150             {
151                 scheduleRepoPrune();
152             }
153 
154             // Activate any resulting service indicators if necessary
155             auto policy = service_indicators::getPolicy(*_dataIface);
156             policy->activate(*pel);
157         }
158         catch (std::exception& e)
159         {
160             // Probably a full or r/o filesystem, not much we can do.
161             log<level::ERR>("Unable to add PEL to Repository",
162                             entry("PEL_ID=0x%X", pel->id()));
163         }
164 
165         // Check if firmware should quiesce system due to error
166         checkPelAndQuiesce(pel);
167     }
168     else
169     {
170         log<level::ERR>("Invalid PEL received from the host",
171                         entry("OBMCLOGID=%d", obmcLogID));
172 
173         AdditionalData ad;
174         ad.add("PLID", getNumberString("0x%08X", pel->plid()));
175         ad.add("OBMC_LOG_ID", std::to_string(obmcLogID));
176         ad.add("PEL_SIZE", std::to_string(pelData.size()));
177 
178         std::string asciiString;
179         auto src = pel->primarySRC();
180         if (src)
181         {
182             asciiString = (*src)->asciiString();
183         }
184 
185         ad.add("SRC", asciiString);
186 
187         _eventLogger.log("org.open_power.Logging.Error.BadHostPEL",
188                          Entry::Level::Error, ad);
189 
190         // Save it to a file for debug in the lab.  Just keep the latest.
191         // Not adding it to the PEL because it could already be max size
192         // and don't want to truncate an already invalid PEL.
193         std::ofstream pelFile{getPELRepoPath() / "badPEL"};
194         pelFile.write(reinterpret_cast<const char*>(pelData.data()),
195                       pelData.size());
196     }
197 }
198 
199 void Manager::addESELPEL(const std::string& esel, uint32_t obmcLogID)
200 {
201     std::vector<uint8_t> data;
202 
203     log<level::DEBUG>("Adding PEL from ESEL",
204                       entry("OBMC_LOG_ID=%d", obmcLogID));
205 
206     try
207     {
208         data = std::move(eselToRawData(esel));
209     }
210     catch (std::exception& e)
211     {
212         // Try to add it below anyway, so it follows the usual bad data path.
213         log<level::ERR>("Problems converting ESEL string to a byte vector");
214     }
215 
216     addPEL(data, obmcLogID);
217 }
218 
219 std::vector<uint8_t> Manager::eselToRawData(const std::string& esel)
220 {
221     std::vector<uint8_t> data;
222     std::string byteString;
223 
224     // As the eSEL string looks like: "50 48 00 ab ..." there are 3
225     // characters per raw byte, and since the actual PEL data starts
226     // at the 16th byte, the code will grab the PEL data starting at
227     // offset 48 in the string.
228     static constexpr size_t pelStart = 16 * 3;
229 
230     if (esel.size() <= pelStart)
231     {
232         log<level::ERR>("ESEL data too short",
233                         entry("ESEL_SIZE=%d", esel.size()));
234 
235         throw std::length_error("ESEL data too short");
236     }
237 
238     for (size_t i = pelStart; i < esel.size(); i += 3)
239     {
240         if (i + 1 < esel.size())
241         {
242             byteString = esel.substr(i, 2);
243             data.push_back(std::stoi(byteString, nullptr, 16));
244         }
245         else
246         {
247             log<level::ERR>("ESEL data too short",
248                             entry("ESEL_SIZE=%d", esel.size()));
249             throw std::length_error("ESEL data too short");
250         }
251     }
252 
253     return data;
254 }
255 
256 void Manager::erase(uint32_t obmcLogID)
257 {
258     Repository::LogID id{Repository::LogID::Obmc(obmcLogID)};
259 
260     _repo.remove(id);
261 }
262 
263 bool Manager::isDeleteProhibited(uint32_t /*obmcLogID*/)
264 {
265     return false;
266 }
267 
268 PelFFDC Manager::convertToPelFFDC(const FFDCEntries& ffdc)
269 {
270     PelFFDC pelFFDC;
271 
272     std::for_each(ffdc.begin(), ffdc.end(), [&pelFFDC](const auto& f) {
273         PelFFDCfile pf;
274         pf.subType = std::get<ffdcSubtypePos>(f);
275         pf.version = std::get<ffdcVersionPos>(f);
276         pf.fd = std::get<ffdcFDPos>(f);
277 
278         switch (std::get<ffdcFormatPos>(f))
279         {
280             case Create::FFDCFormat::JSON:
281                 pf.format = UserDataFormat::json;
282                 break;
283             case Create::FFDCFormat::CBOR:
284                 pf.format = UserDataFormat::cbor;
285                 break;
286             case Create::FFDCFormat::Text:
287                 pf.format = UserDataFormat::text;
288                 break;
289             case Create::FFDCFormat::Custom:
290                 pf.format = UserDataFormat::custom;
291                 break;
292         }
293 
294         pelFFDC.push_back(pf);
295     });
296 
297     return pelFFDC;
298 }
299 
300 void Manager::createPEL(const std::string& message, uint32_t obmcLogID,
301                         uint64_t timestamp,
302                         phosphor::logging::Entry::Level severity,
303                         const std::vector<std::string>& additionalData,
304                         const std::vector<std::string>& /*associations*/,
305                         const FFDCEntries& ffdc)
306 {
307     auto entry = _registry.lookup(message, rg::LookupType::name);
308     auto pelFFDC = convertToPelFFDC(ffdc);
309     AdditionalData ad{additionalData};
310     std::string msg;
311 
312     if (!entry)
313     {
314         // Instead, get the default entry that means there is no
315         // other matching entry.  This error will still use the
316         // AdditionalData values of the original error, and this
317         // code will add the error message value that wasn't found
318         // to this AD.  This way, there will at least be a PEL,
319         // possibly with callouts, to allow users to debug the
320         // issue that caused the error even without its own PEL.
321         msg = "Event not found in PEL message registry: " + message;
322         log<level::INFO>(msg.c_str());
323 
324         entry = _registry.lookup(defaultLogMessage, rg::LookupType::name);
325         if (!entry)
326         {
327             log<level::ERR>("Default event not found in PEL message registry");
328             return;
329         }
330 
331         ad.add(additional_data::error, message);
332     }
333 
334     auto pel = std::make_unique<openpower::pels::PEL>(
335         *entry, obmcLogID, timestamp, severity, ad, pelFFDC, *_dataIface);
336 
337     _repo.add(pel);
338 
339     if (_repo.sizeWarning())
340     {
341         scheduleRepoPrune();
342     }
343 
344     auto src = pel->primarySRC();
345     if (src)
346     {
347         auto msg =
348             fmt::format("Created PEL {:#x} (BMC ID {}) with SRC {}", pel->id(),
349                         pel->obmcLogID(), (*src)->asciiString());
350         while (msg.back() == ' ')
351         {
352             msg.pop_back();
353         }
354         log<level::INFO>(msg.c_str());
355     }
356 
357     // Activate any resulting service indicators if necessary
358     auto policy = service_indicators::getPolicy(*_dataIface);
359     policy->activate(*pel);
360 
361     // Check if firmware should quiesce system due to error
362     checkPelAndQuiesce(pel);
363 }
364 
365 sdbusplus::message::unix_fd Manager::getPEL(uint32_t pelID)
366 {
367     Repository::LogID id{Repository::LogID::Pel(pelID)};
368     std::optional<int> fd;
369 
370     log<level::DEBUG>("getPEL", entry("PEL_ID=0x%X", pelID));
371 
372     try
373     {
374         fd = _repo.getPELFD(id);
375     }
376     catch (std::exception& e)
377     {
378         throw common_error::InternalFailure();
379     }
380 
381     if (!fd)
382     {
383         throw common_error::InvalidArgument();
384     }
385 
386     scheduleFDClose(*fd);
387 
388     return *fd;
389 }
390 
391 void Manager::scheduleFDClose(int fd)
392 {
393     _fdCloserEventSource = std::make_unique<sdeventplus::source::Defer>(
394         _event, std::bind(std::mem_fn(&Manager::closeFD), this, fd,
395                           std::placeholders::_1));
396 }
397 
398 void Manager::closeFD(int fd, sdeventplus::source::EventBase& /*source*/)
399 {
400     close(fd);
401     _fdCloserEventSource.reset();
402 }
403 
404 std::vector<uint8_t> Manager::getPELFromOBMCID(uint32_t obmcLogID)
405 {
406     Repository::LogID id{Repository::LogID::Obmc(obmcLogID)};
407     std::optional<std::vector<uint8_t>> data;
408 
409     log<level::DEBUG>("getPELFromOBMCID", entry("OBMC_LOG_ID=%d", obmcLogID));
410 
411     try
412     {
413         data = _repo.getPELData(id);
414     }
415     catch (std::exception& e)
416     {
417         throw common_error::InternalFailure();
418     }
419 
420     if (!data)
421     {
422         throw common_error::InvalidArgument();
423     }
424 
425     return *data;
426 }
427 
428 void Manager::hostAck(uint32_t pelID)
429 {
430     Repository::LogID id{Repository::LogID::Pel(pelID)};
431 
432     log<level::DEBUG>("HostAck", entry("PEL_ID=0x%X", pelID));
433 
434     if (!_repo.hasPEL(id))
435     {
436         throw common_error::InvalidArgument();
437     }
438 
439     if (_hostNotifier)
440     {
441         _hostNotifier->ackPEL(pelID);
442     }
443 }
444 
445 void Manager::hostReject(uint32_t pelID, RejectionReason reason)
446 {
447     Repository::LogID id{Repository::LogID::Pel(pelID)};
448 
449     log<level::DEBUG>("HostReject", entry("PEL_ID=0x%X", pelID),
450                       entry("REASON=%d", static_cast<int>(reason)));
451 
452     if (!_repo.hasPEL(id))
453     {
454         throw common_error::InvalidArgument();
455     }
456 
457     if (reason == RejectionReason::BadPEL)
458     {
459         AdditionalData data;
460         data.add("BAD_ID", getNumberString("0x%08X", pelID));
461         _eventLogger.log("org.open_power.Logging.Error.SentBadPELToHost",
462                          Entry::Level::Informational, data);
463         if (_hostNotifier)
464         {
465             _hostNotifier->setBadPEL(pelID);
466         }
467     }
468     else if ((reason == RejectionReason::HostFull) && _hostNotifier)
469     {
470         _hostNotifier->setHostFull(pelID);
471     }
472 }
473 
474 void Manager::scheduleRepoPrune()
475 {
476     _repoPrunerEventSource = std::make_unique<sdeventplus::source::Defer>(
477         _event, std::bind(std::mem_fn(&Manager::pruneRepo), this,
478                           std::placeholders::_1));
479 }
480 
481 void Manager::pruneRepo(sdeventplus::source::EventBase& /*source*/)
482 {
483     auto idsToDelete = _repo.prune();
484 
485     // Remove the OpenBMC event logs for the PELs that were just removed.
486     std::for_each(idsToDelete.begin(), idsToDelete.end(),
487                   [this](auto id) { this->_logManager.erase(id); });
488 
489     _repoPrunerEventSource.reset();
490 }
491 
492 void Manager::setupPELDeleteWatch()
493 {
494     _pelFileDeleteFD = inotify_init1(IN_NONBLOCK);
495     if (-1 == _pelFileDeleteFD)
496     {
497         auto e = errno;
498         std::string msg =
499             "inotify_init1 failed with errno " + std::to_string(e);
500         log<level::ERR>(msg.c_str());
501         abort();
502     }
503 
504     _pelFileDeleteWatchFD = inotify_add_watch(
505         _pelFileDeleteFD, _repo.repoPath().c_str(), IN_DELETE);
506     if (-1 == _pelFileDeleteWatchFD)
507     {
508         auto e = errno;
509         std::string msg =
510             "inotify_add_watch failed with error " + std::to_string(e);
511         log<level::ERR>(msg.c_str());
512         abort();
513     }
514 
515     _pelFileDeleteEventSource = std::make_unique<sdeventplus::source::IO>(
516         _event, _pelFileDeleteFD, EPOLLIN,
517         std::bind(std::mem_fn(&Manager::pelFileDeleted), this,
518                   std::placeholders::_1, std::placeholders::_2,
519                   std::placeholders::_3));
520 }
521 
522 void Manager::pelFileDeleted(sdeventplus::source::IO& /*io*/, int /*fd*/,
523                              uint32_t revents)
524 {
525     if (!(revents & EPOLLIN))
526     {
527         return;
528     }
529 
530     // An event for 1 PEL uses 48B. When all PELs are deleted at once,
531     // as many events as there is room for can be handled in one callback.
532     // A size of 2000 will allow 41 to be processed, with additional
533     // callbacks being needed to process the remaining ones.
534     std::array<uint8_t, 2000> data{};
535     auto bytesRead = read(_pelFileDeleteFD, data.data(), data.size());
536     if (bytesRead < 0)
537     {
538         auto e = errno;
539         std::string msg = "Failed reading data from inotify event, errno = " +
540                           std::to_string(e);
541         log<level::ERR>(msg.c_str());
542         abort();
543     }
544 
545     auto offset = 0;
546     while (offset < bytesRead)
547     {
548         auto event = reinterpret_cast<inotify_event*>(&data[offset]);
549         if (event->mask & IN_DELETE)
550         {
551             std::string filename{event->name};
552 
553             // Get the PEL ID from the filename and tell the
554             // repo it's been removed, and then delete the BMC
555             // event log if it's there.
556             auto pos = filename.find_first_of('_');
557             if (pos != std::string::npos)
558             {
559                 try
560                 {
561                     auto idString = filename.substr(pos + 1);
562                     auto pelID = std::stoul(idString, nullptr, 16);
563 
564                     Repository::LogID id{Repository::LogID::Pel(pelID)};
565                     auto removedLogID = _repo.remove(id);
566                     if (removedLogID)
567                     {
568                         _logManager.erase(removedLogID->obmcID.id);
569                     }
570                 }
571                 catch (const std::exception& e)
572                 {
573                     log<level::INFO>("Could not find PEL ID from its filename",
574                                      entry("FILENAME=%s", filename.c_str()));
575                 }
576             }
577         }
578 
579         offset += offsetof(inotify_event, name) + event->len;
580     }
581 }
582 
583 std::tuple<uint32_t, uint32_t> Manager::createPELWithFFDCFiles(
584     std::string message, Entry::Level severity,
585     std::map<std::string, std::string> additionalData,
586     std::vector<std::tuple<
587         sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
588         uint8_t, uint8_t, sdbusplus::message::unix_fd>>
589         fFDC)
590 {
591     _logManager.createWithFFDC(message, severity, additionalData, fFDC);
592 
593     return {_logManager.lastEntryID(), _repo.lastPelID()};
594 }
595 
596 void Manager::checkPelAndQuiesce(std::unique_ptr<openpower::pels::PEL>& pel)
597 {
598     if ((pel->userHeader().severity() ==
599          static_cast<uint8_t>(SeverityType::nonError)) ||
600         (pel->userHeader().severity() ==
601          static_cast<uint8_t>(SeverityType::recovered)))
602     {
603         log<level::DEBUG>(
604             "PEL severity informational or recovered. no quiesce needed");
605         return;
606     }
607     if (!_logManager.isQuiesceOnErrorEnabled())
608     {
609         log<level::DEBUG>("QuiesceOnHwError not enabled, no quiesce needed");
610         return;
611     }
612 
613     // Now check if it has any type of callout
614     if (pel->isCalloutPresent())
615     {
616         log<level::INFO>(
617             "QuiesceOnHwError enabled, PEL severity not nonError or recovered, "
618             "and callout is present");
619 
620         _logManager.quiesceOnError(pel->obmcLogID());
621     }
622 }
623 
624 void Manager::setEntryPath(uint32_t obmcLogID)
625 {
626     Repository::LogID id{Repository::LogID::Obmc(obmcLogID)};
627     if (auto attributes = _repo.getPELAttributes(id); attributes)
628     {
629         auto& attr = attributes.value().get();
630         auto entry = _logManager.entries.find(obmcLogID);
631         if (entry != _logManager.entries.end())
632         {
633             entry->second->path(attr.path);
634         }
635     }
636 }
637 
638 } // namespace pels
639 } // namespace openpower
640