1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "repository.hpp" 17 18 #include <sys/stat.h> 19 20 #include <fstream> 21 #include <phosphor-logging/log.hpp> 22 #include <xyz/openbmc_project/Common/File/error.hpp> 23 24 namespace openpower 25 { 26 namespace pels 27 { 28 29 namespace fs = std::filesystem; 30 using namespace phosphor::logging; 31 namespace file_error = sdbusplus::xyz::openbmc_project::Common::File::Error; 32 33 constexpr size_t warningPercentage = 95; 34 35 /** 36 * @brief Returns the amount of space the file uses on disk. 37 * 38 * This is different than just the regular size of the file. 39 * 40 * @param[in] file - The file to get the size of 41 * 42 * @return size_t The disk space the file uses 43 */ 44 size_t getFileDiskSize(const std::filesystem::path& file) 45 { 46 constexpr size_t statBlockSize = 512; 47 struct stat statData; 48 auto rc = stat(file.c_str(), &statData); 49 if (rc != 0) 50 { 51 auto e = errno; 52 std::string msg = "call to stat() failed on " + file.native() + 53 " with errno " + std::to_string(e); 54 log<level::ERR>(msg.c_str()); 55 abort(); 56 } 57 58 return statData.st_blocks * statBlockSize; 59 } 60 61 Repository::Repository(const std::filesystem::path& basePath, size_t repoSize, 62 size_t maxNumPELs) : 63 _logPath(basePath / "logs"), 64 _maxRepoSize(repoSize), _maxNumPELs(maxNumPELs) 65 { 66 if (!fs::exists(_logPath)) 67 { 68 fs::create_directories(_logPath); 69 } 70 71 restore(); 72 } 73 74 void Repository::restore() 75 { 76 for (auto& dirEntry : fs::directory_iterator(_logPath)) 77 { 78 try 79 { 80 if (!fs::is_regular_file(dirEntry.path())) 81 { 82 continue; 83 } 84 85 std::ifstream file{dirEntry.path()}; 86 std::vector<uint8_t> data{std::istreambuf_iterator<char>(file), 87 std::istreambuf_iterator<char>()}; 88 file.close(); 89 90 PEL pel{data}; 91 if (pel.valid()) 92 { 93 // If the host hasn't acked it, reset the host state so 94 // it will get sent up again. 95 if (pel.hostTransmissionState() == TransmissionState::sent) 96 { 97 pel.setHostTransmissionState(TransmissionState::newPEL); 98 try 99 { 100 write(pel, dirEntry.path()); 101 } 102 catch (std::exception& e) 103 { 104 log<level::ERR>( 105 "Failed to save PEL after updating host state", 106 entry("PELID=0x%X", pel.id())); 107 } 108 } 109 110 PELAttributes attributes{dirEntry.path(), 111 getFileDiskSize(dirEntry.path()), 112 pel.privateHeader().creatorID(), 113 pel.userHeader().severity(), 114 pel.userHeader().actionFlags(), 115 pel.hostTransmissionState(), 116 pel.hmcTransmissionState()}; 117 118 using pelID = LogID::Pel; 119 using obmcID = LogID::Obmc; 120 _pelAttributes.emplace( 121 LogID(pelID(pel.id()), obmcID(pel.obmcLogID())), 122 attributes); 123 124 updateRepoStats(attributes, true); 125 } 126 else 127 { 128 log<level::ERR>( 129 "Found invalid PEL file while restoring. Removing.", 130 entry("FILENAME=%s", dirEntry.path().c_str())); 131 fs::remove(dirEntry.path()); 132 } 133 } 134 catch (std::exception& e) 135 { 136 log<level::ERR>("Hit exception while restoring PEL File", 137 entry("FILENAME=%s", dirEntry.path().c_str()), 138 entry("ERROR=%s", e.what())); 139 } 140 } 141 } 142 143 std::string Repository::getPELFilename(uint32_t pelID, const BCDTime& time) 144 { 145 char name[50]; 146 sprintf(name, "%.2X%.2X%.2X%.2X%.2X%.2X%.2X%.2X_%.8X", time.yearMSB, 147 time.yearLSB, time.month, time.day, time.hour, time.minutes, 148 time.seconds, time.hundredths, pelID); 149 return std::string{name}; 150 } 151 152 void Repository::add(std::unique_ptr<PEL>& pel) 153 { 154 pel->setHostTransmissionState(TransmissionState::newPEL); 155 pel->setHMCTransmissionState(TransmissionState::newPEL); 156 157 auto path = _logPath / getPELFilename(pel->id(), pel->commitTime()); 158 159 write(*(pel.get()), path); 160 161 PELAttributes attributes{path, 162 getFileDiskSize(path), 163 pel->privateHeader().creatorID(), 164 pel->userHeader().severity(), 165 pel->userHeader().actionFlags(), 166 pel->hostTransmissionState(), 167 pel->hmcTransmissionState()}; 168 169 using pelID = LogID::Pel; 170 using obmcID = LogID::Obmc; 171 _pelAttributes.emplace(LogID(pelID(pel->id()), obmcID(pel->obmcLogID())), 172 attributes); 173 174 _lastPelID = pel->id(); 175 176 updateRepoStats(attributes, true); 177 178 processAddCallbacks(*pel); 179 } 180 181 void Repository::write(const PEL& pel, const fs::path& path) 182 { 183 std::ofstream file{path, std::ios::binary}; 184 185 if (!file.good()) 186 { 187 // If this fails, the filesystem is probably full so it isn't like 188 // we could successfully create yet another error log here. 189 auto e = errno; 190 fs::remove(path); 191 log<level::ERR>("Unable to open PEL file for writing", 192 entry("ERRNO=%d", e), entry("PATH=%s", path.c_str())); 193 throw file_error::Open(); 194 } 195 196 auto data = pel.data(); 197 file.write(reinterpret_cast<const char*>(data.data()), data.size()); 198 199 if (file.fail()) 200 { 201 // Same note as above about not being able to create an error log 202 // for this case even if we wanted. 203 auto e = errno; 204 file.close(); 205 fs::remove(path); 206 log<level::ERR>("Unable to write PEL file", entry("ERRNO=%d", e), 207 entry("PATH=%s", path.c_str())); 208 throw file_error::Write(); 209 } 210 } 211 212 std::optional<Repository::LogID> Repository::remove(const LogID& id) 213 { 214 auto pel = findPEL(id); 215 if (pel == _pelAttributes.end()) 216 { 217 return std::nullopt; 218 } 219 220 LogID actualID = pel->first; 221 updateRepoStats(pel->second, false); 222 223 log<level::DEBUG>("Removing PEL from repository", 224 entry("PEL_ID=0x%X", actualID.pelID.id), 225 entry("OBMC_LOG_ID=%d", actualID.obmcID.id)); 226 fs::remove(pel->second.path); 227 _pelAttributes.erase(pel); 228 229 processDeleteCallbacks(actualID.pelID.id); 230 231 return actualID; 232 } 233 234 std::optional<std::vector<uint8_t>> Repository::getPELData(const LogID& id) 235 { 236 auto pel = findPEL(id); 237 if (pel != _pelAttributes.end()) 238 { 239 std::ifstream file{pel->second.path.c_str()}; 240 if (!file.good()) 241 { 242 auto e = errno; 243 log<level::ERR>("Unable to open PEL file", entry("ERRNO=%d", e), 244 entry("PATH=%s", pel->second.path.c_str())); 245 throw file_error::Open(); 246 } 247 248 std::vector<uint8_t> data{std::istreambuf_iterator<char>(file), 249 std::istreambuf_iterator<char>()}; 250 return data; 251 } 252 253 return std::nullopt; 254 } 255 256 std::optional<sdbusplus::message::unix_fd> Repository::getPELFD(const LogID& id) 257 { 258 auto pel = findPEL(id); 259 if (pel != _pelAttributes.end()) 260 { 261 FILE* fp = fopen(pel->second.path.c_str(), "rb"); 262 263 if (fp == nullptr) 264 { 265 auto e = errno; 266 log<level::ERR>("Unable to open PEL File", entry("ERRNO=%d", e), 267 entry("PATH=%s", pel->second.path.c_str())); 268 throw file_error::Open(); 269 } 270 271 // Must leave the file open here. It will be closed by sdbusplus 272 // when it sends it back over D-Bus. 273 274 return fileno(fp); 275 } 276 return std::nullopt; 277 } 278 279 void Repository::for_each(ForEachFunc func) const 280 { 281 for (const auto& [id, attributes] : _pelAttributes) 282 { 283 std::ifstream file{attributes.path}; 284 285 if (!file.good()) 286 { 287 auto e = errno; 288 log<level::ERR>("Repository::for_each: Unable to open PEL file", 289 entry("ERRNO=%d", e), 290 entry("PATH=%s", attributes.path.c_str())); 291 continue; 292 } 293 294 std::vector<uint8_t> data{std::istreambuf_iterator<char>(file), 295 std::istreambuf_iterator<char>()}; 296 file.close(); 297 298 PEL pel{data}; 299 300 try 301 { 302 if (func(pel)) 303 { 304 break; 305 } 306 } 307 catch (std::exception& e) 308 { 309 log<level::ERR>("Repository::for_each function exception", 310 entry("ERROR=%s", e.what())); 311 } 312 } 313 } 314 315 void Repository::processAddCallbacks(const PEL& pel) const 316 { 317 for (auto& [name, func] : _addSubscriptions) 318 { 319 try 320 { 321 func(pel); 322 } 323 catch (std::exception& e) 324 { 325 log<level::ERR>("PEL Repository add callback exception", 326 entry("NAME=%s", name.c_str()), 327 entry("ERROR=%s", e.what())); 328 } 329 } 330 } 331 332 void Repository::processDeleteCallbacks(uint32_t id) const 333 { 334 for (auto& [name, func] : _deleteSubscriptions) 335 { 336 try 337 { 338 func(id); 339 } 340 catch (std::exception& e) 341 { 342 log<level::ERR>("PEL Repository delete callback exception", 343 entry("NAME=%s", name.c_str()), 344 entry("ERROR=%s", e.what())); 345 } 346 } 347 } 348 349 std::optional<std::reference_wrapper<const Repository::PELAttributes>> 350 Repository::getPELAttributes(const LogID& id) const 351 { 352 auto pel = findPEL(id); 353 if (pel != _pelAttributes.end()) 354 { 355 return pel->second; 356 } 357 358 return std::nullopt; 359 } 360 361 void Repository::setPELHostTransState(uint32_t pelID, TransmissionState state) 362 { 363 LogID id{LogID::Pel{pelID}}; 364 auto attr = std::find_if(_pelAttributes.begin(), _pelAttributes.end(), 365 [&id](const auto& a) { return a.first == id; }); 366 367 if ((attr != _pelAttributes.end()) && (attr->second.hostState != state)) 368 { 369 PELUpdateFunc func = [state](PEL& pel) { 370 pel.setHostTransmissionState(state); 371 }; 372 373 try 374 { 375 updatePEL(attr->second.path, func); 376 377 attr->second.hostState = state; 378 } 379 catch (std::exception& e) 380 { 381 log<level::ERR>("Unable to update PEL host transmission state", 382 entry("PATH=%s", attr->second.path.c_str()), 383 entry("ERROR=%s", e.what())); 384 } 385 } 386 } 387 388 void Repository::setPELHMCTransState(uint32_t pelID, TransmissionState state) 389 { 390 LogID id{LogID::Pel{pelID}}; 391 auto attr = std::find_if(_pelAttributes.begin(), _pelAttributes.end(), 392 [&id](const auto& a) { return a.first == id; }); 393 394 if ((attr != _pelAttributes.end()) && (attr->second.hmcState != state)) 395 { 396 PELUpdateFunc func = [state](PEL& pel) { 397 pel.setHMCTransmissionState(state); 398 }; 399 400 try 401 { 402 updatePEL(attr->second.path, func); 403 404 attr->second.hmcState = state; 405 } 406 catch (std::exception& e) 407 { 408 log<level::ERR>("Unable to update PEL HMC transmission state", 409 entry("PATH=%s", attr->second.path.c_str()), 410 entry("ERROR=%s", e.what())); 411 } 412 } 413 } 414 415 void Repository::updatePEL(const fs::path& path, PELUpdateFunc updateFunc) 416 { 417 std::ifstream file{path}; 418 std::vector<uint8_t> data{std::istreambuf_iterator<char>(file), 419 std::istreambuf_iterator<char>()}; 420 file.close(); 421 422 PEL pel{data}; 423 424 if (pel.valid()) 425 { 426 updateFunc(pel); 427 428 write(pel, path); 429 } 430 else 431 { 432 throw std::runtime_error( 433 "Unable to read a valid PEL when trying to update it"); 434 } 435 } 436 437 bool Repository::isServiceableSev(const PELAttributes& pel) 438 { 439 auto sevType = static_cast<SeverityType>(pel.severity & 0xF0); 440 auto sevPVEntry = 441 pel_values::findByValue(pel.severity, pel_values::severityValues); 442 std::string sevName = std::get<pel_values::registryNamePos>(*sevPVEntry); 443 444 bool check1 = (sevType == SeverityType::predictive) || 445 (sevType == SeverityType::unrecoverable) || 446 (sevType == SeverityType::critical); 447 448 bool check2 = ((sevType == SeverityType::recovered) || 449 (sevName == "symptom_recovered")) && 450 !pel.actionFlags.test(hiddenFlagBit); 451 452 bool check3 = (sevName == "symptom_predictive") || 453 (sevName == "symptom_unrecoverable") || 454 (sevName == "symptom_critical"); 455 456 return check1 || check2 || check3; 457 } 458 459 void Repository::updateRepoStats(const PELAttributes& pel, bool pelAdded) 460 { 461 auto isServiceable = Repository::isServiceableSev(pel); 462 auto bmcPEL = CreatorID::openBMC == static_cast<CreatorID>(pel.creator); 463 464 auto adjustSize = [pelAdded, &pel](auto& runningSize) { 465 if (pelAdded) 466 { 467 runningSize += pel.sizeOnDisk; 468 } 469 else 470 { 471 runningSize = std::max(static_cast<int64_t>(runningSize) - 472 static_cast<int64_t>(pel.sizeOnDisk), 473 static_cast<int64_t>(0)); 474 } 475 }; 476 477 adjustSize(_sizes.total); 478 479 if (bmcPEL) 480 { 481 adjustSize(_sizes.bmc); 482 if (isServiceable) 483 { 484 adjustSize(_sizes.bmcServiceable); 485 } 486 else 487 { 488 adjustSize(_sizes.bmcInfo); 489 } 490 } 491 else 492 { 493 adjustSize(_sizes.nonBMC); 494 if (isServiceable) 495 { 496 adjustSize(_sizes.nonBMCServiceable); 497 } 498 else 499 { 500 adjustSize(_sizes.nonBMCInfo); 501 } 502 } 503 } 504 505 bool Repository::sizeWarning() const 506 { 507 return (_sizes.total > (_maxRepoSize * warningPercentage / 100)) || 508 (_pelAttributes.size() > _maxNumPELs); 509 } 510 511 std::vector<Repository::AttributesReference> 512 Repository::getAllPELAttributes(SortOrder order) const 513 { 514 std::vector<Repository::AttributesReference> attributes; 515 516 std::for_each( 517 _pelAttributes.begin(), _pelAttributes.end(), 518 [&attributes](auto& pelEntry) { attributes.push_back(pelEntry); }); 519 520 std::sort(attributes.begin(), attributes.end(), 521 [order](const auto& left, const auto& right) { 522 if (order == SortOrder::ascending) 523 { 524 return left.get().second.path < right.get().second.path; 525 } 526 return left.get().second.path > right.get().second.path; 527 }); 528 529 return attributes; 530 } 531 532 std::vector<uint32_t> Repository::prune() 533 { 534 std::vector<uint32_t> obmcLogIDs; 535 std::string msg = "Pruning PEL repository that takes up " + 536 std::to_string(_sizes.total) + " bytes and has " + 537 std::to_string(_pelAttributes.size()) + " PELs"; 538 log<level::INFO>(msg.c_str()); 539 540 // Set up the 5 functions to check if the PEL category 541 // is still over its limits. 542 543 // BMC informational PELs should only take up 15% 544 IsOverLimitFunc overBMCInfoLimit = [this]() { 545 return _sizes.bmcInfo > _maxRepoSize * 15 / 100; 546 }; 547 548 // BMC non informational PELs should only take up 30% 549 IsOverLimitFunc overBMCNonInfoLimit = [this]() { 550 return _sizes.bmcServiceable > _maxRepoSize * 30 / 100; 551 }; 552 553 // Non BMC informational PELs should only take up 15% 554 IsOverLimitFunc overNonBMCInfoLimit = [this]() { 555 return _sizes.nonBMCInfo > _maxRepoSize * 15 / 100; 556 }; 557 558 // Non BMC non informational PELs should only take up 15% 559 IsOverLimitFunc overNonBMCNonInfoLimit = [this]() { 560 return _sizes.nonBMCServiceable > _maxRepoSize * 30 / 100; 561 }; 562 563 // Bring the total number of PELs down to 80% of the max 564 IsOverLimitFunc tooManyPELsLimit = [this]() { 565 return _pelAttributes.size() > _maxNumPELs * 80 / 100; 566 }; 567 568 // Set up the functions to determine which category a PEL is in. 569 // TODO: Return false in these functions if a PEL caused a guard record. 570 571 // A BMC informational PEL 572 IsPELTypeFunc isBMCInfo = [](const PELAttributes& pel) { 573 return (CreatorID::openBMC == static_cast<CreatorID>(pel.creator)) && 574 !Repository::isServiceableSev(pel); 575 }; 576 577 // A BMC non informational PEL 578 IsPELTypeFunc isBMCNonInfo = [](const PELAttributes& pel) { 579 return (CreatorID::openBMC == static_cast<CreatorID>(pel.creator)) && 580 Repository::isServiceableSev(pel); 581 }; 582 583 // A non BMC informational PEL 584 IsPELTypeFunc isNonBMCInfo = [](const PELAttributes& pel) { 585 return (CreatorID::openBMC != static_cast<CreatorID>(pel.creator)) && 586 !Repository::isServiceableSev(pel); 587 }; 588 589 // A non BMC non informational PEL 590 IsPELTypeFunc isNonBMCNonInfo = [](const PELAttributes& pel) { 591 return (CreatorID::openBMC != static_cast<CreatorID>(pel.creator)) && 592 Repository::isServiceableSev(pel); 593 }; 594 595 // When counting PELs, count every PEL 596 IsPELTypeFunc isAnyPEL = [](const PELAttributes& /*pel*/) { return true; }; 597 598 // Check all 4 categories, which will result in at most 90% 599 // usage (15 + 30 + 15 + 30). 600 removePELs(overBMCInfoLimit, isBMCInfo, obmcLogIDs); 601 removePELs(overBMCNonInfoLimit, isBMCNonInfo, obmcLogIDs); 602 removePELs(overNonBMCInfoLimit, isNonBMCInfo, obmcLogIDs); 603 removePELs(overNonBMCNonInfoLimit, isNonBMCNonInfo, obmcLogIDs); 604 605 // After the above pruning check if there are still too many PELs, 606 // which can happen depending on PEL sizes. 607 if (_pelAttributes.size() > _maxNumPELs) 608 { 609 removePELs(tooManyPELsLimit, isAnyPEL, obmcLogIDs); 610 } 611 612 if (!obmcLogIDs.empty()) 613 { 614 std::string msg = "Number of PELs removed to save space: " + 615 std::to_string(obmcLogIDs.size()); 616 log<level::INFO>(msg.c_str()); 617 } 618 619 return obmcLogIDs; 620 } 621 622 void Repository::removePELs(IsOverLimitFunc& isOverLimit, 623 IsPELTypeFunc& isPELType, 624 std::vector<uint32_t>& removedBMCLogIDs) 625 { 626 if (!isOverLimit()) 627 { 628 return; 629 } 630 631 auto attributes = getAllPELAttributes(SortOrder::ascending); 632 633 // Make 4 passes on the PELs, stopping as soon as isOverLimit 634 // returns false. 635 // Pass 1: only delete HMC acked PELs 636 // Pass 2: only delete OS acked PELs 637 // Pass 3: only delete PHYP sent PELs 638 // Pass 4: delete all PELs 639 static const std::vector<std::function<bool(const PELAttributes& pel)>> 640 stateChecks{[](const auto& pel) { 641 return pel.hmcState == TransmissionState::acked; 642 }, 643 644 [](const auto& pel) { 645 return pel.hostState == TransmissionState::acked; 646 }, 647 648 [](const auto& pel) { 649 return pel.hostState == TransmissionState::sent; 650 }, 651 652 [](const auto& /*pel*/) { return true; }}; 653 654 for (const auto& stateCheck : stateChecks) 655 { 656 for (auto it = attributes.begin(); it != attributes.end();) 657 { 658 const auto& pel = it->get(); 659 if (isPELType(pel.second) && stateCheck(pel.second)) 660 { 661 auto removedID = pel.first.obmcID.id; 662 remove(pel.first); 663 664 removedBMCLogIDs.push_back(removedID); 665 666 attributes.erase(it); 667 668 if (!isOverLimit()) 669 { 670 break; 671 } 672 } 673 else 674 { 675 ++it; 676 } 677 } 678 679 if (!isOverLimit()) 680 { 681 break; 682 } 683 } 684 } 685 686 } // namespace pels 687 } // namespace openpower 688