1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "manager.hpp"
17 
18 #include "additional_data.hpp"
19 #include "json_utils.hpp"
20 #include "pel.hpp"
21 #include "service_indicators.hpp"
22 
23 #include <fmt/format.h>
24 #include <sys/inotify.h>
25 #include <unistd.h>
26 
27 #include <filesystem>
28 #include <fstream>
29 #include <xyz/openbmc_project/Common/error.hpp>
30 #include <xyz/openbmc_project/Logging/Create/server.hpp>
31 
32 namespace openpower
33 {
34 namespace pels
35 {
36 
37 using namespace phosphor::logging;
38 namespace fs = std::filesystem;
39 namespace rg = openpower::pels::message;
40 
41 namespace common_error = sdbusplus::xyz::openbmc_project::Common::Error;
42 
43 using Create = sdbusplus::xyz::openbmc_project::Logging::server::Create;
44 
45 namespace additional_data
46 {
47 constexpr auto rawPEL = "RAWPEL";
48 constexpr auto esel = "ESEL";
49 constexpr auto error = "ERROR_NAME";
50 } // namespace additional_data
51 
52 constexpr auto defaultLogMessage = "xyz.openbmc_project.Logging.Error.Default";
53 
54 Manager::~Manager()
55 {
56     if (_pelFileDeleteFD != -1)
57     {
58         if (_pelFileDeleteWatchFD != -1)
59         {
60             inotify_rm_watch(_pelFileDeleteFD, _pelFileDeleteWatchFD);
61         }
62         close(_pelFileDeleteFD);
63     }
64 }
65 
66 void Manager::create(const std::string& message, uint32_t obmcLogID,
67                      uint64_t timestamp, Entry::Level severity,
68                      const std::vector<std::string>& additionalData,
69                      const std::vector<std::string>& associations,
70                      const FFDCEntries& ffdc)
71 {
72     AdditionalData ad{additionalData};
73 
74     // If a PEL was passed in via a filename or in an ESEL,
75     // use that.  Otherwise, create one.
76     auto rawPelPath = ad.getValue(additional_data::rawPEL);
77     if (rawPelPath)
78     {
79         addRawPEL(*rawPelPath, obmcLogID);
80     }
81     else
82     {
83         auto esel = ad.getValue(additional_data::esel);
84         if (esel)
85         {
86             addESELPEL(*esel, obmcLogID);
87         }
88         else
89         {
90             createPEL(message, obmcLogID, timestamp, severity, additionalData,
91                       associations, ffdc);
92         }
93     }
94 }
95 
96 void Manager::addRawPEL(const std::string& rawPelPath, uint32_t obmcLogID)
97 {
98     if (fs::exists(rawPelPath))
99     {
100         std::ifstream file(rawPelPath, std::ios::in | std::ios::binary);
101 
102         auto data = std::vector<uint8_t>(std::istreambuf_iterator<char>(file),
103                                          std::istreambuf_iterator<char>());
104         if (file.fail())
105         {
106             log<level::ERR>("Filesystem error reading a raw PEL",
107                             entry("PELFILE=%s", rawPelPath.c_str()),
108                             entry("OBMCLOGID=%d", obmcLogID));
109             // TODO, Decide what to do here. Maybe nothing.
110             return;
111         }
112 
113         file.close();
114 
115         addPEL(data, obmcLogID);
116     }
117     else
118     {
119         log<level::ERR>("Raw PEL file from BMC event log does not exist",
120                         entry("PELFILE=%s", (rawPelPath).c_str()),
121                         entry("OBMCLOGID=%d", obmcLogID));
122     }
123 }
124 
125 void Manager::addPEL(std::vector<uint8_t>& pelData, uint32_t obmcLogID)
126 {
127 
128     auto pel = std::make_unique<openpower::pels::PEL>(pelData, obmcLogID);
129     if (pel->valid())
130     {
131         // PELs created by others still need these fields set by us.
132         pel->assignID();
133         pel->setCommitTime();
134 
135         try
136         {
137             log<level::INFO>(
138                 fmt::format("Adding external PEL {:#x} (BMC ID {}) to repo",
139                             pel->id(), obmcLogID)
140                     .c_str());
141 
142             _repo.add(pel);
143 
144             if (_repo.sizeWarning())
145             {
146                 scheduleRepoPrune();
147             }
148 
149             // Activate any resulting service indicators if necessary
150             auto policy = service_indicators::getPolicy(*_dataIface);
151             policy->activate(*pel);
152         }
153         catch (std::exception& e)
154         {
155             // Probably a full or r/o filesystem, not much we can do.
156             log<level::ERR>("Unable to add PEL to Repository",
157                             entry("PEL_ID=0x%X", pel->id()));
158         }
159 
160         // Check if firmware should quiesce system due to error
161         checkPelAndQuiesce(pel);
162     }
163     else
164     {
165         log<level::ERR>("Invalid PEL received from the host",
166                         entry("OBMCLOGID=%d", obmcLogID));
167 
168         AdditionalData ad;
169         ad.add("PLID", getNumberString("0x%08X", pel->plid()));
170         ad.add("OBMC_LOG_ID", std::to_string(obmcLogID));
171         ad.add("PEL_SIZE", std::to_string(pelData.size()));
172 
173         std::string asciiString;
174         auto src = pel->primarySRC();
175         if (src)
176         {
177             asciiString = (*src)->asciiString();
178         }
179 
180         ad.add("SRC", asciiString);
181 
182         _eventLogger.log("org.open_power.Logging.Error.BadHostPEL",
183                          Entry::Level::Error, ad);
184 
185         // Save it to a file for debug in the lab.  Just keep the latest.
186         // Not adding it to the PEL because it could already be max size
187         // and don't want to truncate an already invalid PEL.
188         std::ofstream pelFile{getPELRepoPath() / "badPEL"};
189         pelFile.write(reinterpret_cast<const char*>(pelData.data()),
190                       pelData.size());
191     }
192 }
193 
194 void Manager::addESELPEL(const std::string& esel, uint32_t obmcLogID)
195 {
196     std::vector<uint8_t> data;
197 
198     log<level::DEBUG>("Adding PEL from ESEL",
199                       entry("OBMC_LOG_ID=%d", obmcLogID));
200 
201     try
202     {
203         data = std::move(eselToRawData(esel));
204     }
205     catch (std::exception& e)
206     {
207         // Try to add it below anyway, so it follows the usual bad data path.
208         log<level::ERR>("Problems converting ESEL string to a byte vector");
209     }
210 
211     addPEL(data, obmcLogID);
212 }
213 
214 std::vector<uint8_t> Manager::eselToRawData(const std::string& esel)
215 {
216     std::vector<uint8_t> data;
217     std::string byteString;
218 
219     // As the eSEL string looks like: "50 48 00 ab ..." there are 3
220     // characters per raw byte, and since the actual PEL data starts
221     // at the 16th byte, the code will grab the PEL data starting at
222     // offset 48 in the string.
223     static constexpr size_t pelStart = 16 * 3;
224 
225     if (esel.size() <= pelStart)
226     {
227         log<level::ERR>("ESEL data too short",
228                         entry("ESEL_SIZE=%d", esel.size()));
229 
230         throw std::length_error("ESEL data too short");
231     }
232 
233     for (size_t i = pelStart; i < esel.size(); i += 3)
234     {
235         if (i + 1 < esel.size())
236         {
237             byteString = esel.substr(i, 2);
238             data.push_back(std::stoi(byteString, nullptr, 16));
239         }
240         else
241         {
242             log<level::ERR>("ESEL data too short",
243                             entry("ESEL_SIZE=%d", esel.size()));
244             throw std::length_error("ESEL data too short");
245         }
246     }
247 
248     return data;
249 }
250 
251 void Manager::erase(uint32_t obmcLogID)
252 {
253     Repository::LogID id{Repository::LogID::Obmc(obmcLogID)};
254 
255     _repo.remove(id);
256 }
257 
258 bool Manager::isDeleteProhibited(uint32_t obmcLogID)
259 {
260     return false;
261 }
262 
263 PelFFDC Manager::convertToPelFFDC(const FFDCEntries& ffdc)
264 {
265     PelFFDC pelFFDC;
266 
267     std::for_each(ffdc.begin(), ffdc.end(), [&pelFFDC](const auto& f) {
268         PelFFDCfile pf;
269         pf.subType = std::get<ffdcSubtypePos>(f);
270         pf.version = std::get<ffdcVersionPos>(f);
271         pf.fd = std::get<ffdcFDPos>(f);
272 
273         switch (std::get<ffdcFormatPos>(f))
274         {
275             case Create::FFDCFormat::JSON:
276                 pf.format = UserDataFormat::json;
277                 break;
278             case Create::FFDCFormat::CBOR:
279                 pf.format = UserDataFormat::cbor;
280                 break;
281             case Create::FFDCFormat::Text:
282                 pf.format = UserDataFormat::text;
283                 break;
284             case Create::FFDCFormat::Custom:
285                 pf.format = UserDataFormat::custom;
286                 break;
287         }
288 
289         pelFFDC.push_back(pf);
290     });
291 
292     return pelFFDC;
293 }
294 
295 void Manager::createPEL(const std::string& message, uint32_t obmcLogID,
296                         uint64_t timestamp,
297                         phosphor::logging::Entry::Level severity,
298                         const std::vector<std::string>& additionalData,
299                         const std::vector<std::string>& associations,
300                         const FFDCEntries& ffdc)
301 {
302     auto entry = _registry.lookup(message, rg::LookupType::name);
303     auto pelFFDC = convertToPelFFDC(ffdc);
304     AdditionalData ad{additionalData};
305     std::string msg;
306 
307     if (!entry)
308     {
309         // Instead, get the default entry that means there is no
310         // other matching entry.  This error will still use the
311         // AdditionalData values of the original error, and this
312         // code will add the error message value that wasn't found
313         // to this AD.  This way, there will at least be a PEL,
314         // possibly with callouts, to allow users to debug the
315         // issue that caused the error even without its own PEL.
316         msg = "Event not found in PEL message registry: " + message;
317         log<level::INFO>(msg.c_str());
318 
319         entry = _registry.lookup(defaultLogMessage, rg::LookupType::name);
320         if (!entry)
321         {
322             log<level::ERR>("Default event not found in PEL message registry");
323             return;
324         }
325 
326         ad.add(additional_data::error, message);
327     }
328 
329     auto pel = std::make_unique<openpower::pels::PEL>(
330         *entry, obmcLogID, timestamp, severity, ad, pelFFDC, *_dataIface);
331 
332     _repo.add(pel);
333 
334     if (_repo.sizeWarning())
335     {
336         scheduleRepoPrune();
337     }
338 
339     auto src = pel->primarySRC();
340     if (src)
341     {
342         auto msg =
343             fmt::format("Created PEL {:#x} (BMC ID {}) with SRC {}", pel->id(),
344                         pel->obmcLogID(), (*src)->asciiString());
345         while (msg.back() == ' ')
346         {
347             msg.pop_back();
348         }
349         log<level::INFO>(msg.c_str());
350     }
351 
352     // Activate any resulting service indicators if necessary
353     auto policy = service_indicators::getPolicy(*_dataIface);
354     policy->activate(*pel);
355 
356     // Check if firmware should quiesce system due to error
357     checkPelAndQuiesce(pel);
358 }
359 
360 sdbusplus::message::unix_fd Manager::getPEL(uint32_t pelID)
361 {
362     Repository::LogID id{Repository::LogID::Pel(pelID)};
363     std::optional<int> fd;
364 
365     log<level::DEBUG>("getPEL", entry("PEL_ID=0x%X", pelID));
366 
367     try
368     {
369         fd = _repo.getPELFD(id);
370     }
371     catch (std::exception& e)
372     {
373         throw common_error::InternalFailure();
374     }
375 
376     if (!fd)
377     {
378         throw common_error::InvalidArgument();
379     }
380 
381     scheduleFDClose(*fd);
382 
383     return *fd;
384 }
385 
386 void Manager::scheduleFDClose(int fd)
387 {
388     _fdCloserEventSource = std::make_unique<sdeventplus::source::Defer>(
389         _event, std::bind(std::mem_fn(&Manager::closeFD), this, fd,
390                           std::placeholders::_1));
391 }
392 
393 void Manager::closeFD(int fd, sdeventplus::source::EventBase& source)
394 {
395     close(fd);
396     _fdCloserEventSource.reset();
397 }
398 
399 std::vector<uint8_t> Manager::getPELFromOBMCID(uint32_t obmcLogID)
400 {
401     Repository::LogID id{Repository::LogID::Obmc(obmcLogID)};
402     std::optional<std::vector<uint8_t>> data;
403 
404     log<level::DEBUG>("getPELFromOBMCID", entry("OBMC_LOG_ID=%d", obmcLogID));
405 
406     try
407     {
408         data = _repo.getPELData(id);
409     }
410     catch (std::exception& e)
411     {
412         throw common_error::InternalFailure();
413     }
414 
415     if (!data)
416     {
417         throw common_error::InvalidArgument();
418     }
419 
420     return *data;
421 }
422 
423 void Manager::hostAck(uint32_t pelID)
424 {
425     Repository::LogID id{Repository::LogID::Pel(pelID)};
426 
427     log<level::DEBUG>("HostAck", entry("PEL_ID=0x%X", pelID));
428 
429     if (!_repo.hasPEL(id))
430     {
431         throw common_error::InvalidArgument();
432     }
433 
434     if (_hostNotifier)
435     {
436         _hostNotifier->ackPEL(pelID);
437     }
438 }
439 
440 void Manager::hostReject(uint32_t pelID, RejectionReason reason)
441 {
442     Repository::LogID id{Repository::LogID::Pel(pelID)};
443 
444     log<level::DEBUG>("HostReject", entry("PEL_ID=0x%X", pelID),
445                       entry("REASON=%d", static_cast<int>(reason)));
446 
447     if (!_repo.hasPEL(id))
448     {
449         throw common_error::InvalidArgument();
450     }
451 
452     if (reason == RejectionReason::BadPEL)
453     {
454         AdditionalData data;
455         data.add("BAD_ID", getNumberString("0x%08X", pelID));
456         _eventLogger.log("org.open_power.Logging.Error.SentBadPELToHost",
457                          Entry::Level::Informational, data);
458         if (_hostNotifier)
459         {
460             _hostNotifier->setBadPEL(pelID);
461         }
462     }
463     else if ((reason == RejectionReason::HostFull) && _hostNotifier)
464     {
465         _hostNotifier->setHostFull(pelID);
466     }
467 }
468 
469 void Manager::scheduleRepoPrune()
470 {
471     _repoPrunerEventSource = std::make_unique<sdeventplus::source::Defer>(
472         _event, std::bind(std::mem_fn(&Manager::pruneRepo), this,
473                           std::placeholders::_1));
474 }
475 
476 void Manager::pruneRepo(sdeventplus::source::EventBase& source)
477 {
478     auto idsToDelete = _repo.prune();
479 
480     // Remove the OpenBMC event logs for the PELs that were just removed.
481     std::for_each(idsToDelete.begin(), idsToDelete.end(),
482                   [this](auto id) { this->_logManager.erase(id); });
483 
484     _repoPrunerEventSource.reset();
485 }
486 
487 void Manager::setupPELDeleteWatch()
488 {
489     _pelFileDeleteFD = inotify_init1(IN_NONBLOCK);
490     if (-1 == _pelFileDeleteFD)
491     {
492         auto e = errno;
493         std::string msg =
494             "inotify_init1 failed with errno " + std::to_string(e);
495         log<level::ERR>(msg.c_str());
496         abort();
497     }
498 
499     _pelFileDeleteWatchFD = inotify_add_watch(
500         _pelFileDeleteFD, _repo.repoPath().c_str(), IN_DELETE);
501     if (-1 == _pelFileDeleteWatchFD)
502     {
503         auto e = errno;
504         std::string msg =
505             "inotify_add_watch failed with error " + std::to_string(e);
506         log<level::ERR>(msg.c_str());
507         abort();
508     }
509 
510     _pelFileDeleteEventSource = std::make_unique<sdeventplus::source::IO>(
511         _event, _pelFileDeleteFD, EPOLLIN,
512         std::bind(std::mem_fn(&Manager::pelFileDeleted), this,
513                   std::placeholders::_1, std::placeholders::_2,
514                   std::placeholders::_3));
515 }
516 
517 void Manager::pelFileDeleted(sdeventplus::source::IO& io, int fd,
518                              uint32_t revents)
519 {
520     if (!(revents & EPOLLIN))
521     {
522         return;
523     }
524 
525     // An event for 1 PEL uses 48B. When all PELs are deleted at once,
526     // as many events as there is room for can be handled in one callback.
527     // A size of 2000 will allow 41 to be processed, with additional
528     // callbacks being needed to process the remaining ones.
529     std::array<uint8_t, 2000> data;
530     auto bytesRead = read(_pelFileDeleteFD, data.data(), data.size());
531     if (bytesRead < 0)
532     {
533         auto e = errno;
534         std::string msg = "Failed reading data from inotify event, errno = " +
535                           std::to_string(e);
536         log<level::ERR>(msg.c_str());
537         abort();
538     }
539 
540     auto offset = 0;
541     while (offset < bytesRead)
542     {
543         auto event = reinterpret_cast<inotify_event*>(&data[offset]);
544         if (event->mask & IN_DELETE)
545         {
546             std::string filename{event->name};
547 
548             // Get the PEL ID from the filename and tell the
549             // repo it's been removed, and then delete the BMC
550             // event log if it's there.
551             auto pos = filename.find_first_of('_');
552             if (pos != std::string::npos)
553             {
554                 try
555                 {
556                     auto idString = filename.substr(pos + 1);
557                     auto pelID = std::stoul(idString, nullptr, 16);
558 
559                     Repository::LogID id{Repository::LogID::Pel(pelID)};
560                     auto removedLogID = _repo.remove(id);
561                     if (removedLogID)
562                     {
563                         _logManager.erase(removedLogID->obmcID.id);
564                     }
565                 }
566                 catch (const std::exception& e)
567                 {
568                     log<level::INFO>("Could not find PEL ID from its filename",
569                                      entry("FILENAME=%s", filename.c_str()));
570                 }
571             }
572         }
573 
574         offset += offsetof(inotify_event, name) + event->len;
575     }
576 }
577 
578 std::tuple<uint32_t, uint32_t> Manager::createPELWithFFDCFiles(
579     std::string message, Entry::Level severity,
580     std::map<std::string, std::string> additionalData,
581     std::vector<std::tuple<
582         sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat,
583         uint8_t, uint8_t, sdbusplus::message::unix_fd>>
584         fFDC)
585 {
586     _logManager.createWithFFDC(message, severity, additionalData, fFDC);
587 
588     return {_logManager.lastEntryID(), _repo.lastPelID()};
589 }
590 
591 void Manager::checkPelAndQuiesce(std::unique_ptr<openpower::pels::PEL>& pel)
592 {
593     if (pel->userHeader().severity() ==
594         static_cast<uint8_t>(SeverityType::nonError))
595     {
596         log<level::DEBUG>("PEL severity informational. no quiesce needed");
597         return;
598     }
599     if (!_logManager.isQuiesceOnErrorEnabled())
600     {
601         log<level::DEBUG>("QuiesceOnHwError not enabled, no quiesce needed");
602         return;
603     }
604 
605     // Now check if it has any type of callout
606     if (pel->isCalloutPresent())
607     {
608         log<level::INFO>("QuiesceOnHwError enabled, PEL severity not nonError, "
609                          "and callout is present");
610 
611         _logManager.quiesceOnError(pel->obmcLogID());
612     }
613 }
614 
615 } // namespace pels
616 } // namespace openpower
617