1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "repository.hpp" 17 18 #include <sys/stat.h> 19 20 #include <fstream> 21 #include <phosphor-logging/log.hpp> 22 #include <xyz/openbmc_project/Common/File/error.hpp> 23 24 namespace openpower 25 { 26 namespace pels 27 { 28 29 namespace fs = std::filesystem; 30 using namespace phosphor::logging; 31 namespace file_error = sdbusplus::xyz::openbmc_project::Common::File::Error; 32 33 constexpr size_t warningPercentage = 95; 34 35 /** 36 * @brief Returns the amount of space the file uses on disk. 37 * 38 * This is different than just the regular size of the file. 39 * 40 * @param[in] file - The file to get the size of 41 * 42 * @return size_t The disk space the file uses 43 */ 44 size_t getFileDiskSize(const std::filesystem::path& file) 45 { 46 constexpr size_t statBlockSize = 512; 47 struct stat statData; 48 auto rc = stat(file.c_str(), &statData); 49 if (rc != 0) 50 { 51 auto e = errno; 52 std::string msg = "call to stat() failed on " + file.native() + 53 " with errno " + std::to_string(e); 54 log<level::ERR>(msg.c_str()); 55 abort(); 56 } 57 58 return statData.st_blocks * statBlockSize; 59 } 60 61 Repository::Repository(const std::filesystem::path& basePath, size_t repoSize, 62 size_t maxNumPELs) : 63 _logPath(basePath / "logs"), 64 _maxRepoSize(repoSize), _maxNumPELs(maxNumPELs), 65 _archivePath(basePath / "logs" / "archive") 66 { 67 if (!fs::exists(_logPath)) 68 { 69 fs::create_directories(_logPath); 70 } 71 72 if (!fs::exists(_archivePath)) 73 { 74 fs::create_directories(_archivePath); 75 } 76 77 restore(); 78 } 79 80 void Repository::restore() 81 { 82 for (auto& dirEntry : fs::directory_iterator(_logPath)) 83 { 84 try 85 { 86 if (!fs::is_regular_file(dirEntry.path())) 87 { 88 continue; 89 } 90 91 std::ifstream file{dirEntry.path()}; 92 std::vector<uint8_t> data{std::istreambuf_iterator<char>(file), 93 std::istreambuf_iterator<char>()}; 94 file.close(); 95 96 PEL pel{data}; 97 if (pel.valid()) 98 { 99 // If the host hasn't acked it, reset the host state so 100 // it will get sent up again. 101 if (pel.hostTransmissionState() == TransmissionState::sent) 102 { 103 pel.setHostTransmissionState(TransmissionState::newPEL); 104 try 105 { 106 write(pel, dirEntry.path()); 107 } 108 catch (std::exception& e) 109 { 110 log<level::ERR>( 111 "Failed to save PEL after updating host state", 112 entry("PELID=0x%X", pel.id())); 113 } 114 } 115 116 PELAttributes attributes{dirEntry.path(), 117 getFileDiskSize(dirEntry.path()), 118 pel.privateHeader().creatorID(), 119 pel.userHeader().severity(), 120 pel.userHeader().actionFlags(), 121 pel.hostTransmissionState(), 122 pel.hmcTransmissionState()}; 123 124 using pelID = LogID::Pel; 125 using obmcID = LogID::Obmc; 126 _pelAttributes.emplace( 127 LogID(pelID(pel.id()), obmcID(pel.obmcLogID())), 128 attributes); 129 130 updateRepoStats(attributes, true); 131 } 132 else 133 { 134 log<level::ERR>( 135 "Found invalid PEL file while restoring. Removing.", 136 entry("FILENAME=%s", dirEntry.path().c_str())); 137 fs::remove(dirEntry.path()); 138 } 139 } 140 catch (std::exception& e) 141 { 142 log<level::ERR>("Hit exception while restoring PEL File", 143 entry("FILENAME=%s", dirEntry.path().c_str()), 144 entry("ERROR=%s", e.what())); 145 } 146 } 147 148 // Get size of archive folder 149 for (auto& dirEntry : fs::directory_iterator(_archivePath)) 150 { 151 _archiveSize += getFileDiskSize(dirEntry); 152 } 153 } 154 155 std::string Repository::getPELFilename(uint32_t pelID, const BCDTime& time) 156 { 157 char name[50]; 158 sprintf(name, "%.2X%.2X%.2X%.2X%.2X%.2X%.2X%.2X_%.8X", time.yearMSB, 159 time.yearLSB, time.month, time.day, time.hour, time.minutes, 160 time.seconds, time.hundredths, pelID); 161 return std::string{name}; 162 } 163 164 void Repository::add(std::unique_ptr<PEL>& pel) 165 { 166 pel->setHostTransmissionState(TransmissionState::newPEL); 167 pel->setHMCTransmissionState(TransmissionState::newPEL); 168 169 auto path = _logPath / getPELFilename(pel->id(), pel->commitTime()); 170 171 write(*(pel.get()), path); 172 173 PELAttributes attributes{path, 174 getFileDiskSize(path), 175 pel->privateHeader().creatorID(), 176 pel->userHeader().severity(), 177 pel->userHeader().actionFlags(), 178 pel->hostTransmissionState(), 179 pel->hmcTransmissionState()}; 180 181 using pelID = LogID::Pel; 182 using obmcID = LogID::Obmc; 183 _pelAttributes.emplace(LogID(pelID(pel->id()), obmcID(pel->obmcLogID())), 184 attributes); 185 186 _lastPelID = pel->id(); 187 188 updateRepoStats(attributes, true); 189 190 processAddCallbacks(*pel); 191 } 192 193 void Repository::write(const PEL& pel, const fs::path& path) 194 { 195 std::ofstream file{path, std::ios::binary}; 196 197 if (!file.good()) 198 { 199 // If this fails, the filesystem is probably full so it isn't like 200 // we could successfully create yet another error log here. 201 auto e = errno; 202 fs::remove(path); 203 log<level::ERR>("Unable to open PEL file for writing", 204 entry("ERRNO=%d", e), entry("PATH=%s", path.c_str())); 205 throw file_error::Open(); 206 } 207 208 auto data = pel.data(); 209 file.write(reinterpret_cast<const char*>(data.data()), data.size()); 210 211 if (file.fail()) 212 { 213 // Same note as above about not being able to create an error log 214 // for this case even if we wanted. 215 auto e = errno; 216 file.close(); 217 fs::remove(path); 218 log<level::ERR>("Unable to write PEL file", entry("ERRNO=%d", e), 219 entry("PATH=%s", path.c_str())); 220 throw file_error::Write(); 221 } 222 } 223 224 std::optional<Repository::LogID> Repository::remove(const LogID& id) 225 { 226 auto pel = findPEL(id); 227 if (pel == _pelAttributes.end()) 228 { 229 return std::nullopt; 230 } 231 232 LogID actualID = pel->first; 233 updateRepoStats(pel->second, false); 234 235 log<level::DEBUG>("Removing PEL from repository", 236 entry("PEL_ID=0x%X", actualID.pelID.id), 237 entry("OBMC_LOG_ID=%d", actualID.obmcID.id)); 238 239 if (fs::exists(pel->second.path)) 240 { 241 // Check for existense of new archive folder 242 if (!fs::exists(_archivePath)) 243 { 244 fs::create_directories(_archivePath); 245 } 246 247 // Move log file to archive folder 248 auto fileName = _archivePath / pel->second.path.filename(); 249 fs::rename(pel->second.path, fileName); 250 251 // Update size of file 252 _archiveSize += getFileDiskSize(fileName); 253 } 254 255 _pelAttributes.erase(pel); 256 257 processDeleteCallbacks(actualID.pelID.id); 258 259 return actualID; 260 } 261 262 std::optional<std::vector<uint8_t>> Repository::getPELData(const LogID& id) 263 { 264 auto pel = findPEL(id); 265 if (pel != _pelAttributes.end()) 266 { 267 std::ifstream file{pel->second.path.c_str()}; 268 if (!file.good()) 269 { 270 auto e = errno; 271 log<level::ERR>("Unable to open PEL file", entry("ERRNO=%d", e), 272 entry("PATH=%s", pel->second.path.c_str())); 273 throw file_error::Open(); 274 } 275 276 std::vector<uint8_t> data{std::istreambuf_iterator<char>(file), 277 std::istreambuf_iterator<char>()}; 278 return data; 279 } 280 281 return std::nullopt; 282 } 283 284 std::optional<sdbusplus::message::unix_fd> Repository::getPELFD(const LogID& id) 285 { 286 auto pel = findPEL(id); 287 if (pel != _pelAttributes.end()) 288 { 289 FILE* fp = fopen(pel->second.path.c_str(), "rb"); 290 291 if (fp == nullptr) 292 { 293 auto e = errno; 294 log<level::ERR>("Unable to open PEL File", entry("ERRNO=%d", e), 295 entry("PATH=%s", pel->second.path.c_str())); 296 throw file_error::Open(); 297 } 298 299 // Must leave the file open here. It will be closed by sdbusplus 300 // when it sends it back over D-Bus. 301 302 return fileno(fp); 303 } 304 return std::nullopt; 305 } 306 307 void Repository::for_each(ForEachFunc func) const 308 { 309 for (const auto& [id, attributes] : _pelAttributes) 310 { 311 std::ifstream file{attributes.path}; 312 313 if (!file.good()) 314 { 315 auto e = errno; 316 log<level::ERR>("Repository::for_each: Unable to open PEL file", 317 entry("ERRNO=%d", e), 318 entry("PATH=%s", attributes.path.c_str())); 319 continue; 320 } 321 322 std::vector<uint8_t> data{std::istreambuf_iterator<char>(file), 323 std::istreambuf_iterator<char>()}; 324 file.close(); 325 326 PEL pel{data}; 327 328 try 329 { 330 if (func(pel)) 331 { 332 break; 333 } 334 } 335 catch (std::exception& e) 336 { 337 log<level::ERR>("Repository::for_each function exception", 338 entry("ERROR=%s", e.what())); 339 } 340 } 341 } 342 343 void Repository::processAddCallbacks(const PEL& pel) const 344 { 345 for (auto& [name, func] : _addSubscriptions) 346 { 347 try 348 { 349 func(pel); 350 } 351 catch (std::exception& e) 352 { 353 log<level::ERR>("PEL Repository add callback exception", 354 entry("NAME=%s", name.c_str()), 355 entry("ERROR=%s", e.what())); 356 } 357 } 358 } 359 360 void Repository::processDeleteCallbacks(uint32_t id) const 361 { 362 for (auto& [name, func] : _deleteSubscriptions) 363 { 364 try 365 { 366 func(id); 367 } 368 catch (std::exception& e) 369 { 370 log<level::ERR>("PEL Repository delete callback exception", 371 entry("NAME=%s", name.c_str()), 372 entry("ERROR=%s", e.what())); 373 } 374 } 375 } 376 377 std::optional<std::reference_wrapper<const Repository::PELAttributes>> 378 Repository::getPELAttributes(const LogID& id) const 379 { 380 auto pel = findPEL(id); 381 if (pel != _pelAttributes.end()) 382 { 383 return pel->second; 384 } 385 386 return std::nullopt; 387 } 388 389 void Repository::setPELHostTransState(uint32_t pelID, TransmissionState state) 390 { 391 LogID id{LogID::Pel{pelID}}; 392 auto attr = std::find_if(_pelAttributes.begin(), _pelAttributes.end(), 393 [&id](const auto& a) { return a.first == id; }); 394 395 if ((attr != _pelAttributes.end()) && (attr->second.hostState != state)) 396 { 397 PELUpdateFunc func = [state](PEL& pel) { 398 pel.setHostTransmissionState(state); 399 }; 400 401 try 402 { 403 updatePEL(attr->second.path, func); 404 405 attr->second.hostState = state; 406 } 407 catch (std::exception& e) 408 { 409 log<level::ERR>("Unable to update PEL host transmission state", 410 entry("PATH=%s", attr->second.path.c_str()), 411 entry("ERROR=%s", e.what())); 412 } 413 } 414 } 415 416 void Repository::setPELHMCTransState(uint32_t pelID, TransmissionState state) 417 { 418 LogID id{LogID::Pel{pelID}}; 419 auto attr = std::find_if(_pelAttributes.begin(), _pelAttributes.end(), 420 [&id](const auto& a) { return a.first == id; }); 421 422 if ((attr != _pelAttributes.end()) && (attr->second.hmcState != state)) 423 { 424 PELUpdateFunc func = [state](PEL& pel) { 425 pel.setHMCTransmissionState(state); 426 }; 427 428 try 429 { 430 updatePEL(attr->second.path, func); 431 432 attr->second.hmcState = state; 433 } 434 catch (std::exception& e) 435 { 436 log<level::ERR>("Unable to update PEL HMC transmission state", 437 entry("PATH=%s", attr->second.path.c_str()), 438 entry("ERROR=%s", e.what())); 439 } 440 } 441 } 442 443 void Repository::updatePEL(const fs::path& path, PELUpdateFunc updateFunc) 444 { 445 std::ifstream file{path}; 446 std::vector<uint8_t> data{std::istreambuf_iterator<char>(file), 447 std::istreambuf_iterator<char>()}; 448 file.close(); 449 450 PEL pel{data}; 451 452 if (pel.valid()) 453 { 454 updateFunc(pel); 455 456 write(pel, path); 457 } 458 else 459 { 460 throw std::runtime_error( 461 "Unable to read a valid PEL when trying to update it"); 462 } 463 } 464 465 bool Repository::isServiceableSev(const PELAttributes& pel) 466 { 467 auto sevType = static_cast<SeverityType>(pel.severity & 0xF0); 468 auto sevPVEntry = 469 pel_values::findByValue(pel.severity, pel_values::severityValues); 470 std::string sevName = std::get<pel_values::registryNamePos>(*sevPVEntry); 471 472 bool check1 = (sevType == SeverityType::predictive) || 473 (sevType == SeverityType::unrecoverable) || 474 (sevType == SeverityType::critical); 475 476 bool check2 = ((sevType == SeverityType::recovered) || 477 (sevName == "symptom_recovered")) && 478 !pel.actionFlags.test(hiddenFlagBit); 479 480 bool check3 = (sevName == "symptom_predictive") || 481 (sevName == "symptom_unrecoverable") || 482 (sevName == "symptom_critical"); 483 484 return check1 || check2 || check3; 485 } 486 487 void Repository::updateRepoStats(const PELAttributes& pel, bool pelAdded) 488 { 489 auto isServiceable = Repository::isServiceableSev(pel); 490 auto bmcPEL = CreatorID::openBMC == static_cast<CreatorID>(pel.creator); 491 492 auto adjustSize = [pelAdded, &pel](auto& runningSize) { 493 if (pelAdded) 494 { 495 runningSize += pel.sizeOnDisk; 496 } 497 else 498 { 499 runningSize = std::max(static_cast<int64_t>(runningSize) - 500 static_cast<int64_t>(pel.sizeOnDisk), 501 static_cast<int64_t>(0)); 502 } 503 }; 504 505 adjustSize(_sizes.total); 506 507 if (bmcPEL) 508 { 509 adjustSize(_sizes.bmc); 510 if (isServiceable) 511 { 512 adjustSize(_sizes.bmcServiceable); 513 } 514 else 515 { 516 adjustSize(_sizes.bmcInfo); 517 } 518 } 519 else 520 { 521 adjustSize(_sizes.nonBMC); 522 if (isServiceable) 523 { 524 adjustSize(_sizes.nonBMCServiceable); 525 } 526 else 527 { 528 adjustSize(_sizes.nonBMCInfo); 529 } 530 } 531 } 532 533 bool Repository::sizeWarning() const 534 { 535 if ((_archiveSize > 0) && ((_sizes.total + _archiveSize) > 536 ((_maxRepoSize * warningPercentage) / 100))) 537 { 538 log<level::INFO>( 539 "Repository::sizeWarning function:Deleting the files in archive"); 540 541 std::string cmd = "rm " + _archivePath.string() + "/*_*"; 542 auto rc = system(cmd.c_str()); 543 if (rc) 544 { 545 log<level::ERR>("Repository::sizeWarning function:Could not delete " 546 "files in archive"); 547 } 548 } 549 550 return (_sizes.total > (_maxRepoSize * warningPercentage / 100)) || 551 (_pelAttributes.size() > _maxNumPELs); 552 } 553 554 std::vector<Repository::AttributesReference> 555 Repository::getAllPELAttributes(SortOrder order) const 556 { 557 std::vector<Repository::AttributesReference> attributes; 558 559 std::for_each( 560 _pelAttributes.begin(), _pelAttributes.end(), 561 [&attributes](auto& pelEntry) { attributes.push_back(pelEntry); }); 562 563 std::sort(attributes.begin(), attributes.end(), 564 [order](const auto& left, const auto& right) { 565 if (order == SortOrder::ascending) 566 { 567 return left.get().second.path < right.get().second.path; 568 } 569 return left.get().second.path > right.get().second.path; 570 }); 571 572 return attributes; 573 } 574 575 std::vector<uint32_t> Repository::prune() 576 { 577 std::vector<uint32_t> obmcLogIDs; 578 std::string msg = "Pruning PEL repository that takes up " + 579 std::to_string(_sizes.total) + " bytes and has " + 580 std::to_string(_pelAttributes.size()) + " PELs"; 581 log<level::INFO>(msg.c_str()); 582 583 // Set up the 5 functions to check if the PEL category 584 // is still over its limits. 585 586 // BMC informational PELs should only take up 15% 587 IsOverLimitFunc overBMCInfoLimit = [this]() { 588 return _sizes.bmcInfo > _maxRepoSize * 15 / 100; 589 }; 590 591 // BMC non informational PELs should only take up 30% 592 IsOverLimitFunc overBMCNonInfoLimit = [this]() { 593 return _sizes.bmcServiceable > _maxRepoSize * 30 / 100; 594 }; 595 596 // Non BMC informational PELs should only take up 15% 597 IsOverLimitFunc overNonBMCInfoLimit = [this]() { 598 return _sizes.nonBMCInfo > _maxRepoSize * 15 / 100; 599 }; 600 601 // Non BMC non informational PELs should only take up 15% 602 IsOverLimitFunc overNonBMCNonInfoLimit = [this]() { 603 return _sizes.nonBMCServiceable > _maxRepoSize * 30 / 100; 604 }; 605 606 // Bring the total number of PELs down to 80% of the max 607 IsOverLimitFunc tooManyPELsLimit = [this]() { 608 return _pelAttributes.size() > _maxNumPELs * 80 / 100; 609 }; 610 611 // Set up the functions to determine which category a PEL is in. 612 // TODO: Return false in these functions if a PEL caused a guard record. 613 614 // A BMC informational PEL 615 IsPELTypeFunc isBMCInfo = [](const PELAttributes& pel) { 616 return (CreatorID::openBMC == static_cast<CreatorID>(pel.creator)) && 617 !Repository::isServiceableSev(pel); 618 }; 619 620 // A BMC non informational PEL 621 IsPELTypeFunc isBMCNonInfo = [](const PELAttributes& pel) { 622 return (CreatorID::openBMC == static_cast<CreatorID>(pel.creator)) && 623 Repository::isServiceableSev(pel); 624 }; 625 626 // A non BMC informational PEL 627 IsPELTypeFunc isNonBMCInfo = [](const PELAttributes& pel) { 628 return (CreatorID::openBMC != static_cast<CreatorID>(pel.creator)) && 629 !Repository::isServiceableSev(pel); 630 }; 631 632 // A non BMC non informational PEL 633 IsPELTypeFunc isNonBMCNonInfo = [](const PELAttributes& pel) { 634 return (CreatorID::openBMC != static_cast<CreatorID>(pel.creator)) && 635 Repository::isServiceableSev(pel); 636 }; 637 638 // When counting PELs, count every PEL 639 IsPELTypeFunc isAnyPEL = [](const PELAttributes& /*pel*/) { return true; }; 640 641 // Check all 4 categories, which will result in at most 90% 642 // usage (15 + 30 + 15 + 30). 643 removePELs(overBMCInfoLimit, isBMCInfo, obmcLogIDs); 644 removePELs(overBMCNonInfoLimit, isBMCNonInfo, obmcLogIDs); 645 removePELs(overNonBMCInfoLimit, isNonBMCInfo, obmcLogIDs); 646 removePELs(overNonBMCNonInfoLimit, isNonBMCNonInfo, obmcLogIDs); 647 648 // After the above pruning check if there are still too many PELs, 649 // which can happen depending on PEL sizes. 650 if (_pelAttributes.size() > _maxNumPELs) 651 { 652 removePELs(tooManyPELsLimit, isAnyPEL, obmcLogIDs); 653 } 654 655 if (!obmcLogIDs.empty()) 656 { 657 std::string msg = "Number of PELs removed to save space: " + 658 std::to_string(obmcLogIDs.size()); 659 log<level::INFO>(msg.c_str()); 660 } 661 662 return obmcLogIDs; 663 } 664 665 void Repository::removePELs(IsOverLimitFunc& isOverLimit, 666 IsPELTypeFunc& isPELType, 667 std::vector<uint32_t>& removedBMCLogIDs) 668 { 669 if (!isOverLimit()) 670 { 671 return; 672 } 673 674 auto attributes = getAllPELAttributes(SortOrder::ascending); 675 676 // Make 4 passes on the PELs, stopping as soon as isOverLimit 677 // returns false. 678 // Pass 1: only delete HMC acked PELs 679 // Pass 2: only delete OS acked PELs 680 // Pass 3: only delete PHYP sent PELs 681 // Pass 4: delete all PELs 682 static const std::vector<std::function<bool(const PELAttributes& pel)>> 683 stateChecks{[](const auto& pel) { 684 return pel.hmcState == TransmissionState::acked; 685 }, 686 687 [](const auto& pel) { 688 return pel.hostState == TransmissionState::acked; 689 }, 690 691 [](const auto& pel) { 692 return pel.hostState == TransmissionState::sent; 693 }, 694 695 [](const auto& /*pel*/) { return true; }}; 696 697 for (const auto& stateCheck : stateChecks) 698 { 699 for (auto it = attributes.begin(); it != attributes.end();) 700 { 701 const auto& pel = it->get(); 702 if (isPELType(pel.second) && stateCheck(pel.second)) 703 { 704 auto removedID = pel.first.obmcID.id; 705 remove(pel.first); 706 707 removedBMCLogIDs.push_back(removedID); 708 709 attributes.erase(it); 710 711 if (!isOverLimit()) 712 { 713 break; 714 } 715 } 716 else 717 { 718 ++it; 719 } 720 } 721 722 if (!isOverLimit()) 723 { 724 break; 725 } 726 } 727 } 728 729 } // namespace pels 730 } // namespace openpower 731