xref: /openbmc/phosphor-logging/extensions/openpower-pels/repository.cpp (revision e053884811eea3f165ce465b3942e192b8a36cbc)
1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "repository.hpp"
17 
18 #include <sys/stat.h>
19 
20 #include <fstream>
21 #include <phosphor-logging/log.hpp>
22 #include <xyz/openbmc_project/Common/File/error.hpp>
23 
24 namespace openpower
25 {
26 namespace pels
27 {
28 
29 namespace fs = std::filesystem;
30 using namespace phosphor::logging;
31 namespace file_error = sdbusplus::xyz::openbmc_project::Common::File::Error;
32 
33 constexpr size_t warningPercentage = 95;
34 
35 /**
36  * @brief Returns the amount of space the file uses on disk.
37  *
38  * This is different than just the regular size of the file.
39  *
40  * @param[in] file - The file to get the size of
41  *
42  * @return size_t The disk space the file uses
43  */
44 size_t getFileDiskSize(const std::filesystem::path& file)
45 {
46     constexpr size_t statBlockSize = 512;
47     struct stat statData;
48     auto rc = stat(file.c_str(), &statData);
49     if (rc != 0)
50     {
51         auto e = errno;
52         std::string msg = "call to stat() failed on " + file.native() +
53                           " with errno " + std::to_string(e);
54         log<level::ERR>(msg.c_str());
55         abort();
56     }
57 
58     return statData.st_blocks * statBlockSize;
59 }
60 
61 Repository::Repository(const std::filesystem::path& basePath, size_t repoSize,
62                        size_t maxNumPELs) :
63     _logPath(basePath / "logs"),
64     _maxRepoSize(repoSize), _maxNumPELs(maxNumPELs),
65     _archivePath(basePath / "logs" / "archive")
66 {
67     if (!fs::exists(_logPath))
68     {
69         fs::create_directories(_logPath);
70     }
71 
72     if (!fs::exists(_archivePath))
73     {
74         fs::create_directories(_archivePath);
75     }
76 
77     restore();
78 }
79 
80 void Repository::restore()
81 {
82     for (auto& dirEntry : fs::directory_iterator(_logPath))
83     {
84         try
85         {
86             if (!fs::is_regular_file(dirEntry.path()))
87             {
88                 continue;
89             }
90 
91             std::ifstream file{dirEntry.path()};
92             std::vector<uint8_t> data{std::istreambuf_iterator<char>(file),
93                                       std::istreambuf_iterator<char>()};
94             file.close();
95 
96             PEL pel{data};
97             if (pel.valid())
98             {
99                 // If the host hasn't acked it, reset the host state so
100                 // it will get sent up again.
101                 if (pel.hostTransmissionState() == TransmissionState::sent)
102                 {
103                     pel.setHostTransmissionState(TransmissionState::newPEL);
104                     try
105                     {
106                         write(pel, dirEntry.path());
107                     }
108                     catch (std::exception& e)
109                     {
110                         log<level::ERR>(
111                             "Failed to save PEL after updating host state",
112                             entry("PELID=0x%X", pel.id()));
113                     }
114                 }
115 
116                 PELAttributes attributes{dirEntry.path(),
117                                          getFileDiskSize(dirEntry.path()),
118                                          pel.privateHeader().creatorID(),
119                                          pel.userHeader().severity(),
120                                          pel.userHeader().actionFlags(),
121                                          pel.hostTransmissionState(),
122                                          pel.hmcTransmissionState()};
123 
124                 using pelID = LogID::Pel;
125                 using obmcID = LogID::Obmc;
126                 _pelAttributes.emplace(
127                     LogID(pelID(pel.id()), obmcID(pel.obmcLogID())),
128                     attributes);
129 
130                 updateRepoStats(attributes, true);
131             }
132             else
133             {
134                 log<level::ERR>(
135                     "Found invalid PEL file while restoring.  Removing.",
136                     entry("FILENAME=%s", dirEntry.path().c_str()));
137                 fs::remove(dirEntry.path());
138             }
139         }
140         catch (std::exception& e)
141         {
142             log<level::ERR>("Hit exception while restoring PEL File",
143                             entry("FILENAME=%s", dirEntry.path().c_str()),
144                             entry("ERROR=%s", e.what()));
145         }
146     }
147 
148     // Get size of archive folder
149     for (auto& dirEntry : fs::directory_iterator(_archivePath))
150     {
151         _archiveSize += getFileDiskSize(dirEntry);
152     }
153 }
154 
155 std::string Repository::getPELFilename(uint32_t pelID, const BCDTime& time)
156 {
157     char name[50];
158     sprintf(name, "%.2X%.2X%.2X%.2X%.2X%.2X%.2X%.2X_%.8X", time.yearMSB,
159             time.yearLSB, time.month, time.day, time.hour, time.minutes,
160             time.seconds, time.hundredths, pelID);
161     return std::string{name};
162 }
163 
164 void Repository::add(std::unique_ptr<PEL>& pel)
165 {
166     pel->setHostTransmissionState(TransmissionState::newPEL);
167     pel->setHMCTransmissionState(TransmissionState::newPEL);
168 
169     auto path = _logPath / getPELFilename(pel->id(), pel->commitTime());
170 
171     write(*(pel.get()), path);
172 
173     PELAttributes attributes{path,
174                              getFileDiskSize(path),
175                              pel->privateHeader().creatorID(),
176                              pel->userHeader().severity(),
177                              pel->userHeader().actionFlags(),
178                              pel->hostTransmissionState(),
179                              pel->hmcTransmissionState()};
180 
181     using pelID = LogID::Pel;
182     using obmcID = LogID::Obmc;
183     _pelAttributes.emplace(LogID(pelID(pel->id()), obmcID(pel->obmcLogID())),
184                            attributes);
185 
186     _lastPelID = pel->id();
187 
188     updateRepoStats(attributes, true);
189 
190     processAddCallbacks(*pel);
191 }
192 
193 void Repository::write(const PEL& pel, const fs::path& path)
194 {
195     std::ofstream file{path, std::ios::binary};
196 
197     if (!file.good())
198     {
199         // If this fails, the filesystem is probably full so it isn't like
200         // we could successfully create yet another error log here.
201         auto e = errno;
202         fs::remove(path);
203         log<level::ERR>("Unable to open PEL file for writing",
204                         entry("ERRNO=%d", e), entry("PATH=%s", path.c_str()));
205         throw file_error::Open();
206     }
207 
208     auto data = pel.data();
209     file.write(reinterpret_cast<const char*>(data.data()), data.size());
210 
211     if (file.fail())
212     {
213         // Same note as above about not being able to create an error log
214         // for this case even if we wanted.
215         auto e = errno;
216         file.close();
217         fs::remove(path);
218         log<level::ERR>("Unable to write PEL file", entry("ERRNO=%d", e),
219                         entry("PATH=%s", path.c_str()));
220         throw file_error::Write();
221     }
222 }
223 
224 std::optional<Repository::LogID> Repository::remove(const LogID& id)
225 {
226     auto pel = findPEL(id);
227     if (pel == _pelAttributes.end())
228     {
229         return std::nullopt;
230     }
231 
232     LogID actualID = pel->first;
233     updateRepoStats(pel->second, false);
234 
235     log<level::DEBUG>("Removing PEL from repository",
236                       entry("PEL_ID=0x%X", actualID.pelID.id),
237                       entry("OBMC_LOG_ID=%d", actualID.obmcID.id));
238 
239     if (fs::exists(pel->second.path))
240     {
241         // Check for existense of new archive folder
242         if (!fs::exists(_archivePath))
243         {
244             fs::create_directories(_archivePath);
245         }
246 
247         // Move log file to archive folder
248         auto fileName = _archivePath / pel->second.path.filename();
249         fs::rename(pel->second.path, fileName);
250 
251         // Update size of file
252         _archiveSize += getFileDiskSize(fileName);
253     }
254 
255     _pelAttributes.erase(pel);
256 
257     processDeleteCallbacks(actualID.pelID.id);
258 
259     return actualID;
260 }
261 
262 std::optional<std::vector<uint8_t>> Repository::getPELData(const LogID& id)
263 {
264     auto pel = findPEL(id);
265     if (pel != _pelAttributes.end())
266     {
267         std::ifstream file{pel->second.path.c_str()};
268         if (!file.good())
269         {
270             auto e = errno;
271             log<level::ERR>("Unable to open PEL file", entry("ERRNO=%d", e),
272                             entry("PATH=%s", pel->second.path.c_str()));
273             throw file_error::Open();
274         }
275 
276         std::vector<uint8_t> data{std::istreambuf_iterator<char>(file),
277                                   std::istreambuf_iterator<char>()};
278         return data;
279     }
280 
281     return std::nullopt;
282 }
283 
284 std::optional<sdbusplus::message::unix_fd> Repository::getPELFD(const LogID& id)
285 {
286     auto pel = findPEL(id);
287     if (pel != _pelAttributes.end())
288     {
289         FILE* fp = fopen(pel->second.path.c_str(), "rb");
290 
291         if (fp == nullptr)
292         {
293             auto e = errno;
294             log<level::ERR>("Unable to open PEL File", entry("ERRNO=%d", e),
295                             entry("PATH=%s", pel->second.path.c_str()));
296             throw file_error::Open();
297         }
298 
299         // Must leave the file open here.  It will be closed by sdbusplus
300         // when it sends it back over D-Bus.
301 
302         return fileno(fp);
303     }
304     return std::nullopt;
305 }
306 
307 void Repository::for_each(ForEachFunc func) const
308 {
309     for (const auto& [id, attributes] : _pelAttributes)
310     {
311         std::ifstream file{attributes.path};
312 
313         if (!file.good())
314         {
315             auto e = errno;
316             log<level::ERR>("Repository::for_each: Unable to open PEL file",
317                             entry("ERRNO=%d", e),
318                             entry("PATH=%s", attributes.path.c_str()));
319             continue;
320         }
321 
322         std::vector<uint8_t> data{std::istreambuf_iterator<char>(file),
323                                   std::istreambuf_iterator<char>()};
324         file.close();
325 
326         PEL pel{data};
327 
328         try
329         {
330             if (func(pel))
331             {
332                 break;
333             }
334         }
335         catch (std::exception& e)
336         {
337             log<level::ERR>("Repository::for_each function exception",
338                             entry("ERROR=%s", e.what()));
339         }
340     }
341 }
342 
343 void Repository::processAddCallbacks(const PEL& pel) const
344 {
345     for (auto& [name, func] : _addSubscriptions)
346     {
347         try
348         {
349             func(pel);
350         }
351         catch (std::exception& e)
352         {
353             log<level::ERR>("PEL Repository add callback exception",
354                             entry("NAME=%s", name.c_str()),
355                             entry("ERROR=%s", e.what()));
356         }
357     }
358 }
359 
360 void Repository::processDeleteCallbacks(uint32_t id) const
361 {
362     for (auto& [name, func] : _deleteSubscriptions)
363     {
364         try
365         {
366             func(id);
367         }
368         catch (std::exception& e)
369         {
370             log<level::ERR>("PEL Repository delete callback exception",
371                             entry("NAME=%s", name.c_str()),
372                             entry("ERROR=%s", e.what()));
373         }
374     }
375 }
376 
377 std::optional<std::reference_wrapper<const Repository::PELAttributes>>
378     Repository::getPELAttributes(const LogID& id) const
379 {
380     auto pel = findPEL(id);
381     if (pel != _pelAttributes.end())
382     {
383         return pel->second;
384     }
385 
386     return std::nullopt;
387 }
388 
389 void Repository::setPELHostTransState(uint32_t pelID, TransmissionState state)
390 {
391     LogID id{LogID::Pel{pelID}};
392     auto attr = std::find_if(_pelAttributes.begin(), _pelAttributes.end(),
393                              [&id](const auto& a) { return a.first == id; });
394 
395     if ((attr != _pelAttributes.end()) && (attr->second.hostState != state))
396     {
397         PELUpdateFunc func = [state](PEL& pel) {
398             pel.setHostTransmissionState(state);
399         };
400 
401         try
402         {
403             updatePEL(attr->second.path, func);
404 
405             attr->second.hostState = state;
406         }
407         catch (std::exception& e)
408         {
409             log<level::ERR>("Unable to update PEL host transmission state",
410                             entry("PATH=%s", attr->second.path.c_str()),
411                             entry("ERROR=%s", e.what()));
412         }
413     }
414 }
415 
416 void Repository::setPELHMCTransState(uint32_t pelID, TransmissionState state)
417 {
418     LogID id{LogID::Pel{pelID}};
419     auto attr = std::find_if(_pelAttributes.begin(), _pelAttributes.end(),
420                              [&id](const auto& a) { return a.first == id; });
421 
422     if ((attr != _pelAttributes.end()) && (attr->second.hmcState != state))
423     {
424         PELUpdateFunc func = [state](PEL& pel) {
425             pel.setHMCTransmissionState(state);
426         };
427 
428         try
429         {
430             updatePEL(attr->second.path, func);
431 
432             attr->second.hmcState = state;
433         }
434         catch (std::exception& e)
435         {
436             log<level::ERR>("Unable to update PEL HMC transmission state",
437                             entry("PATH=%s", attr->second.path.c_str()),
438                             entry("ERROR=%s", e.what()));
439         }
440     }
441 }
442 
443 void Repository::updatePEL(const fs::path& path, PELUpdateFunc updateFunc)
444 {
445     std::ifstream file{path};
446     std::vector<uint8_t> data{std::istreambuf_iterator<char>(file),
447                               std::istreambuf_iterator<char>()};
448     file.close();
449 
450     PEL pel{data};
451 
452     if (pel.valid())
453     {
454         updateFunc(pel);
455 
456         write(pel, path);
457     }
458     else
459     {
460         throw std::runtime_error(
461             "Unable to read a valid PEL when trying to update it");
462     }
463 }
464 
465 bool Repository::isServiceableSev(const PELAttributes& pel)
466 {
467     auto sevType = static_cast<SeverityType>(pel.severity & 0xF0);
468     auto sevPVEntry =
469         pel_values::findByValue(pel.severity, pel_values::severityValues);
470     std::string sevName = std::get<pel_values::registryNamePos>(*sevPVEntry);
471 
472     bool check1 = (sevType == SeverityType::predictive) ||
473                   (sevType == SeverityType::unrecoverable) ||
474                   (sevType == SeverityType::critical);
475 
476     bool check2 = ((sevType == SeverityType::recovered) ||
477                    (sevName == "symptom_recovered")) &&
478                   !pel.actionFlags.test(hiddenFlagBit);
479 
480     bool check3 = (sevName == "symptom_predictive") ||
481                   (sevName == "symptom_unrecoverable") ||
482                   (sevName == "symptom_critical");
483 
484     return check1 || check2 || check3;
485 }
486 
487 void Repository::updateRepoStats(const PELAttributes& pel, bool pelAdded)
488 {
489     auto isServiceable = Repository::isServiceableSev(pel);
490     auto bmcPEL = CreatorID::openBMC == static_cast<CreatorID>(pel.creator);
491 
492     auto adjustSize = [pelAdded, &pel](auto& runningSize) {
493         if (pelAdded)
494         {
495             runningSize += pel.sizeOnDisk;
496         }
497         else
498         {
499             runningSize = std::max(static_cast<int64_t>(runningSize) -
500                                        static_cast<int64_t>(pel.sizeOnDisk),
501                                    static_cast<int64_t>(0));
502         }
503     };
504 
505     adjustSize(_sizes.total);
506 
507     if (bmcPEL)
508     {
509         adjustSize(_sizes.bmc);
510         if (isServiceable)
511         {
512             adjustSize(_sizes.bmcServiceable);
513         }
514         else
515         {
516             adjustSize(_sizes.bmcInfo);
517         }
518     }
519     else
520     {
521         adjustSize(_sizes.nonBMC);
522         if (isServiceable)
523         {
524             adjustSize(_sizes.nonBMCServiceable);
525         }
526         else
527         {
528             adjustSize(_sizes.nonBMCInfo);
529         }
530     }
531 }
532 
533 bool Repository::sizeWarning() const
534 {
535     if ((_archiveSize > 0) && ((_sizes.total + _archiveSize) >
536                                ((_maxRepoSize * warningPercentage) / 100)))
537     {
538         log<level::INFO>(
539             "Repository::sizeWarning function:Deleting the files in archive");
540 
541         std::string cmd = "rm " + _archivePath.string() + "/*_*";
542         auto rc = system(cmd.c_str());
543         if (rc)
544         {
545             log<level::ERR>("Repository::sizeWarning function:Could not delete "
546                             "files in archive");
547         }
548     }
549 
550     return (_sizes.total > (_maxRepoSize * warningPercentage / 100)) ||
551            (_pelAttributes.size() > _maxNumPELs);
552 }
553 
554 std::vector<Repository::AttributesReference>
555     Repository::getAllPELAttributes(SortOrder order) const
556 {
557     std::vector<Repository::AttributesReference> attributes;
558 
559     std::for_each(
560         _pelAttributes.begin(), _pelAttributes.end(),
561         [&attributes](auto& pelEntry) { attributes.push_back(pelEntry); });
562 
563     std::sort(attributes.begin(), attributes.end(),
564               [order](const auto& left, const auto& right) {
565                   if (order == SortOrder::ascending)
566                   {
567                       return left.get().second.path < right.get().second.path;
568                   }
569                   return left.get().second.path > right.get().second.path;
570               });
571 
572     return attributes;
573 }
574 
575 std::vector<uint32_t> Repository::prune()
576 {
577     std::vector<uint32_t> obmcLogIDs;
578     std::string msg = "Pruning PEL repository that takes up " +
579                       std::to_string(_sizes.total) + " bytes and has " +
580                       std::to_string(_pelAttributes.size()) + " PELs";
581     log<level::INFO>(msg.c_str());
582 
583     // Set up the 5 functions to check if the PEL category
584     // is still over its limits.
585 
586     // BMC informational PELs should only take up 15%
587     IsOverLimitFunc overBMCInfoLimit = [this]() {
588         return _sizes.bmcInfo > _maxRepoSize * 15 / 100;
589     };
590 
591     // BMC non informational PELs should only take up 30%
592     IsOverLimitFunc overBMCNonInfoLimit = [this]() {
593         return _sizes.bmcServiceable > _maxRepoSize * 30 / 100;
594     };
595 
596     // Non BMC informational PELs should only take up 15%
597     IsOverLimitFunc overNonBMCInfoLimit = [this]() {
598         return _sizes.nonBMCInfo > _maxRepoSize * 15 / 100;
599     };
600 
601     // Non BMC non informational PELs should only take up 15%
602     IsOverLimitFunc overNonBMCNonInfoLimit = [this]() {
603         return _sizes.nonBMCServiceable > _maxRepoSize * 30 / 100;
604     };
605 
606     // Bring the total number of PELs down to 80% of the max
607     IsOverLimitFunc tooManyPELsLimit = [this]() {
608         return _pelAttributes.size() > _maxNumPELs * 80 / 100;
609     };
610 
611     // Set up the functions to determine which category a PEL is in.
612     // TODO: Return false in these functions if a PEL caused a guard record.
613 
614     // A BMC informational PEL
615     IsPELTypeFunc isBMCInfo = [](const PELAttributes& pel) {
616         return (CreatorID::openBMC == static_cast<CreatorID>(pel.creator)) &&
617                !Repository::isServiceableSev(pel);
618     };
619 
620     // A BMC non informational PEL
621     IsPELTypeFunc isBMCNonInfo = [](const PELAttributes& pel) {
622         return (CreatorID::openBMC == static_cast<CreatorID>(pel.creator)) &&
623                Repository::isServiceableSev(pel);
624     };
625 
626     // A non BMC informational PEL
627     IsPELTypeFunc isNonBMCInfo = [](const PELAttributes& pel) {
628         return (CreatorID::openBMC != static_cast<CreatorID>(pel.creator)) &&
629                !Repository::isServiceableSev(pel);
630     };
631 
632     // A non BMC non informational PEL
633     IsPELTypeFunc isNonBMCNonInfo = [](const PELAttributes& pel) {
634         return (CreatorID::openBMC != static_cast<CreatorID>(pel.creator)) &&
635                Repository::isServiceableSev(pel);
636     };
637 
638     // When counting PELs, count every PEL
639     IsPELTypeFunc isAnyPEL = [](const PELAttributes& /*pel*/) { return true; };
640 
641     // Check all 4 categories, which will result in at most 90%
642     // usage (15 + 30 + 15 + 30).
643     removePELs(overBMCInfoLimit, isBMCInfo, obmcLogIDs);
644     removePELs(overBMCNonInfoLimit, isBMCNonInfo, obmcLogIDs);
645     removePELs(overNonBMCInfoLimit, isNonBMCInfo, obmcLogIDs);
646     removePELs(overNonBMCNonInfoLimit, isNonBMCNonInfo, obmcLogIDs);
647 
648     // After the above pruning check if there are still too many PELs,
649     // which can happen depending on PEL sizes.
650     if (_pelAttributes.size() > _maxNumPELs)
651     {
652         removePELs(tooManyPELsLimit, isAnyPEL, obmcLogIDs);
653     }
654 
655     if (!obmcLogIDs.empty())
656     {
657         std::string msg = "Number of PELs removed to save space: " +
658                           std::to_string(obmcLogIDs.size());
659         log<level::INFO>(msg.c_str());
660     }
661 
662     return obmcLogIDs;
663 }
664 
665 void Repository::removePELs(IsOverLimitFunc& isOverLimit,
666                             IsPELTypeFunc& isPELType,
667                             std::vector<uint32_t>& removedBMCLogIDs)
668 {
669     if (!isOverLimit())
670     {
671         return;
672     }
673 
674     auto attributes = getAllPELAttributes(SortOrder::ascending);
675 
676     // Make 4 passes on the PELs, stopping as soon as isOverLimit
677     // returns false.
678     //   Pass 1: only delete HMC acked PELs
679     //   Pass 2: only delete OS acked PELs
680     //   Pass 3: only delete PHYP sent PELs
681     //   Pass 4: delete all PELs
682     static const std::vector<std::function<bool(const PELAttributes& pel)>>
683         stateChecks{[](const auto& pel) {
684                         return pel.hmcState == TransmissionState::acked;
685                     },
686 
687                     [](const auto& pel) {
688                         return pel.hostState == TransmissionState::acked;
689                     },
690 
691                     [](const auto& pel) {
692                         return pel.hostState == TransmissionState::sent;
693                     },
694 
695                     [](const auto& /*pel*/) { return true; }};
696 
697     for (const auto& stateCheck : stateChecks)
698     {
699         for (auto it = attributes.begin(); it != attributes.end();)
700         {
701             const auto& pel = it->get();
702             if (isPELType(pel.second) && stateCheck(pel.second))
703             {
704                 auto removedID = pel.first.obmcID.id;
705                 remove(pel.first);
706 
707                 removedBMCLogIDs.push_back(removedID);
708 
709                 attributes.erase(it);
710 
711                 if (!isOverLimit())
712                 {
713                     break;
714                 }
715             }
716             else
717             {
718                 ++it;
719             }
720         }
721 
722         if (!isOverLimit())
723         {
724             break;
725         }
726     }
727 }
728 
729 } // namespace pels
730 } // namespace openpower
731