1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "manager.hpp" 17 18 #include "additional_data.hpp" 19 #include "json_utils.hpp" 20 #include "pel.hpp" 21 #include "service_indicators.hpp" 22 23 #include <fmt/format.h> 24 #include <sys/inotify.h> 25 #include <unistd.h> 26 27 #include <filesystem> 28 #include <fstream> 29 #include <xyz/openbmc_project/Common/error.hpp> 30 #include <xyz/openbmc_project/Logging/Create/server.hpp> 31 32 namespace openpower 33 { 34 namespace pels 35 { 36 37 using namespace phosphor::logging; 38 namespace fs = std::filesystem; 39 namespace rg = openpower::pels::message; 40 41 namespace common_error = sdbusplus::xyz::openbmc_project::Common::Error; 42 43 using Create = sdbusplus::xyz::openbmc_project::Logging::server::Create; 44 45 namespace additional_data 46 { 47 constexpr auto rawPEL = "RAWPEL"; 48 constexpr auto esel = "ESEL"; 49 constexpr auto error = "ERROR_NAME"; 50 } // namespace additional_data 51 52 constexpr auto defaultLogMessage = "xyz.openbmc_project.Logging.Error.Default"; 53 54 Manager::~Manager() 55 { 56 if (_pelFileDeleteFD != -1) 57 { 58 if (_pelFileDeleteWatchFD != -1) 59 { 60 inotify_rm_watch(_pelFileDeleteFD, _pelFileDeleteWatchFD); 61 } 62 close(_pelFileDeleteFD); 63 } 64 } 65 66 void Manager::create(const std::string& message, uint32_t obmcLogID, 67 uint64_t timestamp, Entry::Level severity, 68 const std::vector<std::string>& additionalData, 69 const std::vector<std::string>& associations, 70 const FFDCEntries& ffdc) 71 { 72 AdditionalData ad{additionalData}; 73 74 // If a PEL was passed in via a filename or in an ESEL, 75 // use that. Otherwise, create one. 76 auto rawPelPath = ad.getValue(additional_data::rawPEL); 77 if (rawPelPath) 78 { 79 addRawPEL(*rawPelPath, obmcLogID); 80 } 81 else 82 { 83 auto esel = ad.getValue(additional_data::esel); 84 if (esel) 85 { 86 addESELPEL(*esel, obmcLogID); 87 } 88 else 89 { 90 createPEL(message, obmcLogID, timestamp, severity, additionalData, 91 associations, ffdc); 92 } 93 } 94 95 setEntryPath(obmcLogID); 96 } 97 98 void Manager::addRawPEL(const std::string& rawPelPath, uint32_t obmcLogID) 99 { 100 if (fs::exists(rawPelPath)) 101 { 102 std::ifstream file(rawPelPath, std::ios::in | std::ios::binary); 103 104 auto data = std::vector<uint8_t>(std::istreambuf_iterator<char>(file), 105 std::istreambuf_iterator<char>()); 106 if (file.fail()) 107 { 108 log<level::ERR>("Filesystem error reading a raw PEL", 109 entry("PELFILE=%s", rawPelPath.c_str()), 110 entry("OBMCLOGID=%d", obmcLogID)); 111 // TODO, Decide what to do here. Maybe nothing. 112 return; 113 } 114 115 file.close(); 116 117 addPEL(data, obmcLogID); 118 } 119 else 120 { 121 log<level::ERR>("Raw PEL file from BMC event log does not exist", 122 entry("PELFILE=%s", (rawPelPath).c_str()), 123 entry("OBMCLOGID=%d", obmcLogID)); 124 } 125 } 126 127 void Manager::addPEL(std::vector<uint8_t>& pelData, uint32_t obmcLogID) 128 { 129 130 auto pel = std::make_unique<openpower::pels::PEL>(pelData, obmcLogID); 131 if (pel->valid()) 132 { 133 // PELs created by others still need these fields set by us. 134 pel->assignID(); 135 pel->setCommitTime(); 136 137 // Update System Info to Extended User Data 138 pel->updateSysInfoInExtendedUserDataSection(*_dataIface); 139 140 try 141 { 142 log<level::DEBUG>( 143 fmt::format("Adding external PEL {:#x} (BMC ID {}) to repo", 144 pel->id(), obmcLogID) 145 .c_str()); 146 147 _repo.add(pel); 148 149 if (_repo.sizeWarning()) 150 { 151 scheduleRepoPrune(); 152 } 153 154 // Activate any resulting service indicators if necessary 155 auto policy = service_indicators::getPolicy(*_dataIface); 156 policy->activate(*pel); 157 } 158 catch (std::exception& e) 159 { 160 // Probably a full or r/o filesystem, not much we can do. 161 log<level::ERR>("Unable to add PEL to Repository", 162 entry("PEL_ID=0x%X", pel->id())); 163 } 164 165 // Check if firmware should quiesce system due to error 166 checkPelAndQuiesce(pel); 167 } 168 else 169 { 170 log<level::ERR>("Invalid PEL received from the host", 171 entry("OBMCLOGID=%d", obmcLogID)); 172 173 AdditionalData ad; 174 ad.add("PLID", getNumberString("0x%08X", pel->plid())); 175 ad.add("OBMC_LOG_ID", std::to_string(obmcLogID)); 176 ad.add("PEL_SIZE", std::to_string(pelData.size())); 177 178 std::string asciiString; 179 auto src = pel->primarySRC(); 180 if (src) 181 { 182 asciiString = (*src)->asciiString(); 183 } 184 185 ad.add("SRC", asciiString); 186 187 _eventLogger.log("org.open_power.Logging.Error.BadHostPEL", 188 Entry::Level::Error, ad); 189 190 // Save it to a file for debug in the lab. Just keep the latest. 191 // Not adding it to the PEL because it could already be max size 192 // and don't want to truncate an already invalid PEL. 193 std::ofstream pelFile{getPELRepoPath() / "badPEL"}; 194 pelFile.write(reinterpret_cast<const char*>(pelData.data()), 195 pelData.size()); 196 } 197 } 198 199 void Manager::addESELPEL(const std::string& esel, uint32_t obmcLogID) 200 { 201 std::vector<uint8_t> data; 202 203 log<level::DEBUG>("Adding PEL from ESEL", 204 entry("OBMC_LOG_ID=%d", obmcLogID)); 205 206 try 207 { 208 data = std::move(eselToRawData(esel)); 209 } 210 catch (std::exception& e) 211 { 212 // Try to add it below anyway, so it follows the usual bad data path. 213 log<level::ERR>("Problems converting ESEL string to a byte vector"); 214 } 215 216 addPEL(data, obmcLogID); 217 } 218 219 std::vector<uint8_t> Manager::eselToRawData(const std::string& esel) 220 { 221 std::vector<uint8_t> data; 222 std::string byteString; 223 224 // As the eSEL string looks like: "50 48 00 ab ..." there are 3 225 // characters per raw byte, and since the actual PEL data starts 226 // at the 16th byte, the code will grab the PEL data starting at 227 // offset 48 in the string. 228 static constexpr size_t pelStart = 16 * 3; 229 230 if (esel.size() <= pelStart) 231 { 232 log<level::ERR>("ESEL data too short", 233 entry("ESEL_SIZE=%d", esel.size())); 234 235 throw std::length_error("ESEL data too short"); 236 } 237 238 for (size_t i = pelStart; i < esel.size(); i += 3) 239 { 240 if (i + 1 < esel.size()) 241 { 242 byteString = esel.substr(i, 2); 243 data.push_back(std::stoi(byteString, nullptr, 16)); 244 } 245 else 246 { 247 log<level::ERR>("ESEL data too short", 248 entry("ESEL_SIZE=%d", esel.size())); 249 throw std::length_error("ESEL data too short"); 250 } 251 } 252 253 return data; 254 } 255 256 void Manager::erase(uint32_t obmcLogID) 257 { 258 Repository::LogID id{Repository::LogID::Obmc(obmcLogID)}; 259 260 _repo.remove(id); 261 } 262 263 bool Manager::isDeleteProhibited(uint32_t /*obmcLogID*/) 264 { 265 return false; 266 } 267 268 PelFFDC Manager::convertToPelFFDC(const FFDCEntries& ffdc) 269 { 270 PelFFDC pelFFDC; 271 272 std::for_each(ffdc.begin(), ffdc.end(), [&pelFFDC](const auto& f) { 273 PelFFDCfile pf; 274 pf.subType = std::get<ffdcSubtypePos>(f); 275 pf.version = std::get<ffdcVersionPos>(f); 276 pf.fd = std::get<ffdcFDPos>(f); 277 278 switch (std::get<ffdcFormatPos>(f)) 279 { 280 case Create::FFDCFormat::JSON: 281 pf.format = UserDataFormat::json; 282 break; 283 case Create::FFDCFormat::CBOR: 284 pf.format = UserDataFormat::cbor; 285 break; 286 case Create::FFDCFormat::Text: 287 pf.format = UserDataFormat::text; 288 break; 289 case Create::FFDCFormat::Custom: 290 pf.format = UserDataFormat::custom; 291 break; 292 } 293 294 pelFFDC.push_back(pf); 295 }); 296 297 return pelFFDC; 298 } 299 300 void Manager::createPEL(const std::string& message, uint32_t obmcLogID, 301 uint64_t timestamp, 302 phosphor::logging::Entry::Level severity, 303 const std::vector<std::string>& additionalData, 304 const std::vector<std::string>& /*associations*/, 305 const FFDCEntries& ffdc) 306 { 307 auto entry = _registry.lookup(message, rg::LookupType::name); 308 auto pelFFDC = convertToPelFFDC(ffdc); 309 AdditionalData ad{additionalData}; 310 std::string msg; 311 312 if (!entry) 313 { 314 // Instead, get the default entry that means there is no 315 // other matching entry. This error will still use the 316 // AdditionalData values of the original error, and this 317 // code will add the error message value that wasn't found 318 // to this AD. This way, there will at least be a PEL, 319 // possibly with callouts, to allow users to debug the 320 // issue that caused the error even without its own PEL. 321 msg = "Event not found in PEL message registry: " + message; 322 log<level::INFO>(msg.c_str()); 323 324 entry = _registry.lookup(defaultLogMessage, rg::LookupType::name); 325 if (!entry) 326 { 327 log<level::ERR>("Default event not found in PEL message registry"); 328 return; 329 } 330 331 ad.add(additional_data::error, message); 332 } 333 334 auto pel = std::make_unique<openpower::pels::PEL>( 335 *entry, obmcLogID, timestamp, severity, ad, pelFFDC, *_dataIface); 336 337 _repo.add(pel); 338 339 if (_repo.sizeWarning()) 340 { 341 scheduleRepoPrune(); 342 } 343 344 auto src = pel->primarySRC(); 345 if (src) 346 { 347 auto msg = 348 fmt::format("Created PEL {:#x} (BMC ID {}) with SRC {}", pel->id(), 349 pel->obmcLogID(), (*src)->asciiString()); 350 while (msg.back() == ' ') 351 { 352 msg.pop_back(); 353 } 354 log<level::INFO>(msg.c_str()); 355 } 356 357 // Activate any resulting service indicators if necessary 358 auto policy = service_indicators::getPolicy(*_dataIface); 359 policy->activate(*pel); 360 361 // Check if firmware should quiesce system due to error 362 checkPelAndQuiesce(pel); 363 } 364 365 sdbusplus::message::unix_fd Manager::getPEL(uint32_t pelID) 366 { 367 Repository::LogID id{Repository::LogID::Pel(pelID)}; 368 std::optional<int> fd; 369 370 log<level::DEBUG>("getPEL", entry("PEL_ID=0x%X", pelID)); 371 372 try 373 { 374 fd = _repo.getPELFD(id); 375 } 376 catch (std::exception& e) 377 { 378 throw common_error::InternalFailure(); 379 } 380 381 if (!fd) 382 { 383 throw common_error::InvalidArgument(); 384 } 385 386 scheduleFDClose(*fd); 387 388 return *fd; 389 } 390 391 void Manager::scheduleFDClose(int fd) 392 { 393 _fdCloserEventSource = std::make_unique<sdeventplus::source::Defer>( 394 _event, std::bind(std::mem_fn(&Manager::closeFD), this, fd, 395 std::placeholders::_1)); 396 } 397 398 void Manager::closeFD(int fd, sdeventplus::source::EventBase& /*source*/) 399 { 400 close(fd); 401 _fdCloserEventSource.reset(); 402 } 403 404 std::vector<uint8_t> Manager::getPELFromOBMCID(uint32_t obmcLogID) 405 { 406 Repository::LogID id{Repository::LogID::Obmc(obmcLogID)}; 407 std::optional<std::vector<uint8_t>> data; 408 409 log<level::DEBUG>("getPELFromOBMCID", entry("OBMC_LOG_ID=%d", obmcLogID)); 410 411 try 412 { 413 data = _repo.getPELData(id); 414 } 415 catch (std::exception& e) 416 { 417 throw common_error::InternalFailure(); 418 } 419 420 if (!data) 421 { 422 throw common_error::InvalidArgument(); 423 } 424 425 return *data; 426 } 427 428 void Manager::hostAck(uint32_t pelID) 429 { 430 Repository::LogID id{Repository::LogID::Pel(pelID)}; 431 432 log<level::DEBUG>("HostAck", entry("PEL_ID=0x%X", pelID)); 433 434 if (!_repo.hasPEL(id)) 435 { 436 throw common_error::InvalidArgument(); 437 } 438 439 if (_hostNotifier) 440 { 441 _hostNotifier->ackPEL(pelID); 442 } 443 } 444 445 void Manager::hostReject(uint32_t pelID, RejectionReason reason) 446 { 447 Repository::LogID id{Repository::LogID::Pel(pelID)}; 448 449 log<level::DEBUG>("HostReject", entry("PEL_ID=0x%X", pelID), 450 entry("REASON=%d", static_cast<int>(reason))); 451 452 if (!_repo.hasPEL(id)) 453 { 454 throw common_error::InvalidArgument(); 455 } 456 457 if (reason == RejectionReason::BadPEL) 458 { 459 AdditionalData data; 460 data.add("BAD_ID", getNumberString("0x%08X", pelID)); 461 _eventLogger.log("org.open_power.Logging.Error.SentBadPELToHost", 462 Entry::Level::Informational, data); 463 if (_hostNotifier) 464 { 465 _hostNotifier->setBadPEL(pelID); 466 } 467 } 468 else if ((reason == RejectionReason::HostFull) && _hostNotifier) 469 { 470 _hostNotifier->setHostFull(pelID); 471 } 472 } 473 474 void Manager::scheduleRepoPrune() 475 { 476 _repoPrunerEventSource = std::make_unique<sdeventplus::source::Defer>( 477 _event, std::bind(std::mem_fn(&Manager::pruneRepo), this, 478 std::placeholders::_1)); 479 } 480 481 void Manager::pruneRepo(sdeventplus::source::EventBase& /*source*/) 482 { 483 auto idsToDelete = _repo.prune(); 484 485 // Remove the OpenBMC event logs for the PELs that were just removed. 486 std::for_each(idsToDelete.begin(), idsToDelete.end(), 487 [this](auto id) { this->_logManager.erase(id); }); 488 489 _repoPrunerEventSource.reset(); 490 } 491 492 void Manager::setupPELDeleteWatch() 493 { 494 _pelFileDeleteFD = inotify_init1(IN_NONBLOCK); 495 if (-1 == _pelFileDeleteFD) 496 { 497 auto e = errno; 498 std::string msg = 499 "inotify_init1 failed with errno " + std::to_string(e); 500 log<level::ERR>(msg.c_str()); 501 abort(); 502 } 503 504 _pelFileDeleteWatchFD = inotify_add_watch( 505 _pelFileDeleteFD, _repo.repoPath().c_str(), IN_DELETE); 506 if (-1 == _pelFileDeleteWatchFD) 507 { 508 auto e = errno; 509 std::string msg = 510 "inotify_add_watch failed with error " + std::to_string(e); 511 log<level::ERR>(msg.c_str()); 512 abort(); 513 } 514 515 _pelFileDeleteEventSource = std::make_unique<sdeventplus::source::IO>( 516 _event, _pelFileDeleteFD, EPOLLIN, 517 std::bind(std::mem_fn(&Manager::pelFileDeleted), this, 518 std::placeholders::_1, std::placeholders::_2, 519 std::placeholders::_3)); 520 } 521 522 void Manager::pelFileDeleted(sdeventplus::source::IO& /*io*/, int /*fd*/, 523 uint32_t revents) 524 { 525 if (!(revents & EPOLLIN)) 526 { 527 return; 528 } 529 530 // An event for 1 PEL uses 48B. When all PELs are deleted at once, 531 // as many events as there is room for can be handled in one callback. 532 // A size of 2000 will allow 41 to be processed, with additional 533 // callbacks being needed to process the remaining ones. 534 std::array<uint8_t, 2000> data{}; 535 auto bytesRead = read(_pelFileDeleteFD, data.data(), data.size()); 536 if (bytesRead < 0) 537 { 538 auto e = errno; 539 std::string msg = "Failed reading data from inotify event, errno = " + 540 std::to_string(e); 541 log<level::ERR>(msg.c_str()); 542 abort(); 543 } 544 545 auto offset = 0; 546 while (offset < bytesRead) 547 { 548 auto event = reinterpret_cast<inotify_event*>(&data[offset]); 549 if (event->mask & IN_DELETE) 550 { 551 std::string filename{event->name}; 552 553 // Get the PEL ID from the filename and tell the 554 // repo it's been removed, and then delete the BMC 555 // event log if it's there. 556 auto pos = filename.find_first_of('_'); 557 if (pos != std::string::npos) 558 { 559 try 560 { 561 auto idString = filename.substr(pos + 1); 562 auto pelID = std::stoul(idString, nullptr, 16); 563 564 Repository::LogID id{Repository::LogID::Pel(pelID)}; 565 auto removedLogID = _repo.remove(id); 566 if (removedLogID) 567 { 568 _logManager.erase(removedLogID->obmcID.id); 569 } 570 } 571 catch (const std::exception& e) 572 { 573 log<level::INFO>("Could not find PEL ID from its filename", 574 entry("FILENAME=%s", filename.c_str())); 575 } 576 } 577 } 578 579 offset += offsetof(inotify_event, name) + event->len; 580 } 581 } 582 583 std::tuple<uint32_t, uint32_t> Manager::createPELWithFFDCFiles( 584 std::string message, Entry::Level severity, 585 std::map<std::string, std::string> additionalData, 586 std::vector<std::tuple< 587 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat, 588 uint8_t, uint8_t, sdbusplus::message::unix_fd>> 589 fFDC) 590 { 591 _logManager.createWithFFDC(message, severity, additionalData, fFDC); 592 593 return {_logManager.lastEntryID(), _repo.lastPelID()}; 594 } 595 596 void Manager::checkPelAndQuiesce(std::unique_ptr<openpower::pels::PEL>& pel) 597 { 598 if ((pel->userHeader().severity() == 599 static_cast<uint8_t>(SeverityType::nonError)) || 600 (pel->userHeader().severity() == 601 static_cast<uint8_t>(SeverityType::recovered))) 602 { 603 log<level::DEBUG>( 604 "PEL severity informational or recovered. no quiesce needed"); 605 return; 606 } 607 if (!_logManager.isQuiesceOnErrorEnabled()) 608 { 609 log<level::DEBUG>("QuiesceOnHwError not enabled, no quiesce needed"); 610 return; 611 } 612 613 // Now check if it has any type of callout 614 if (pel->isCalloutPresent()) 615 { 616 log<level::INFO>( 617 "QuiesceOnHwError enabled, PEL severity not nonError or recovered, " 618 "and callout is present"); 619 620 _logManager.quiesceOnError(pel->obmcLogID()); 621 } 622 } 623 624 void Manager::setEntryPath(uint32_t obmcLogID) 625 { 626 Repository::LogID id{Repository::LogID::Obmc(obmcLogID)}; 627 if (auto attributes = _repo.getPELAttributes(id); attributes) 628 { 629 auto& attr = attributes.value().get(); 630 auto entry = _logManager.entries.find(obmcLogID); 631 if (entry != _logManager.entries.end()) 632 { 633 entry->second->path(attr.path); 634 } 635 } 636 } 637 638 } // namespace pels 639 } // namespace openpower 640