1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "manager.hpp" 17 18 #include "additional_data.hpp" 19 #include "json_utils.hpp" 20 #include "pel.hpp" 21 #include "service_indicators.hpp" 22 23 #include <fmt/format.h> 24 #include <sys/inotify.h> 25 #include <unistd.h> 26 27 #include <filesystem> 28 #include <fstream> 29 #include <xyz/openbmc_project/Common/error.hpp> 30 #include <xyz/openbmc_project/Logging/Create/server.hpp> 31 32 namespace openpower 33 { 34 namespace pels 35 { 36 37 using namespace phosphor::logging; 38 namespace fs = std::filesystem; 39 namespace rg = openpower::pels::message; 40 41 namespace common_error = sdbusplus::xyz::openbmc_project::Common::Error; 42 43 using Create = sdbusplus::xyz::openbmc_project::Logging::server::Create; 44 45 namespace additional_data 46 { 47 constexpr auto rawPEL = "RAWPEL"; 48 constexpr auto esel = "ESEL"; 49 constexpr auto error = "ERROR_NAME"; 50 } // namespace additional_data 51 52 constexpr auto defaultLogMessage = "xyz.openbmc_project.Logging.Error.Default"; 53 54 Manager::~Manager() 55 { 56 if (_pelFileDeleteFD != -1) 57 { 58 if (_pelFileDeleteWatchFD != -1) 59 { 60 inotify_rm_watch(_pelFileDeleteFD, _pelFileDeleteWatchFD); 61 } 62 close(_pelFileDeleteFD); 63 } 64 } 65 66 void Manager::create(const std::string& message, uint32_t obmcLogID, 67 uint64_t timestamp, Entry::Level severity, 68 const std::vector<std::string>& additionalData, 69 const std::vector<std::string>& associations, 70 const FFDCEntries& ffdc) 71 { 72 AdditionalData ad{additionalData}; 73 74 // If a PEL was passed in via a filename or in an ESEL, 75 // use that. Otherwise, create one. 76 auto rawPelPath = ad.getValue(additional_data::rawPEL); 77 if (rawPelPath) 78 { 79 addRawPEL(*rawPelPath, obmcLogID); 80 } 81 else 82 { 83 auto esel = ad.getValue(additional_data::esel); 84 if (esel) 85 { 86 addESELPEL(*esel, obmcLogID); 87 } 88 else 89 { 90 createPEL(message, obmcLogID, timestamp, severity, additionalData, 91 associations, ffdc); 92 } 93 } 94 } 95 96 void Manager::addRawPEL(const std::string& rawPelPath, uint32_t obmcLogID) 97 { 98 if (fs::exists(rawPelPath)) 99 { 100 std::ifstream file(rawPelPath, std::ios::in | std::ios::binary); 101 102 auto data = std::vector<uint8_t>(std::istreambuf_iterator<char>(file), 103 std::istreambuf_iterator<char>()); 104 if (file.fail()) 105 { 106 log<level::ERR>("Filesystem error reading a raw PEL", 107 entry("PELFILE=%s", rawPelPath.c_str()), 108 entry("OBMCLOGID=%d", obmcLogID)); 109 // TODO, Decide what to do here. Maybe nothing. 110 return; 111 } 112 113 file.close(); 114 115 addPEL(data, obmcLogID); 116 } 117 else 118 { 119 log<level::ERR>("Raw PEL file from BMC event log does not exist", 120 entry("PELFILE=%s", (rawPelPath).c_str()), 121 entry("OBMCLOGID=%d", obmcLogID)); 122 } 123 } 124 125 void Manager::addPEL(std::vector<uint8_t>& pelData, uint32_t obmcLogID) 126 { 127 128 auto pel = std::make_unique<openpower::pels::PEL>(pelData, obmcLogID); 129 if (pel->valid()) 130 { 131 // PELs created by others still need these fields set by us. 132 pel->assignID(); 133 pel->setCommitTime(); 134 135 try 136 { 137 log<level::INFO>( 138 fmt::format("Adding external PEL {:#x} (BMC ID {}) to repo", 139 pel->id(), obmcLogID) 140 .c_str()); 141 142 _repo.add(pel); 143 144 if (_repo.sizeWarning()) 145 { 146 scheduleRepoPrune(); 147 } 148 149 // Activate any resulting service indicators if necessary 150 auto policy = service_indicators::getPolicy(*_dataIface); 151 policy->activate(*pel); 152 } 153 catch (std::exception& e) 154 { 155 // Probably a full or r/o filesystem, not much we can do. 156 log<level::ERR>("Unable to add PEL to Repository", 157 entry("PEL_ID=0x%X", pel->id())); 158 } 159 160 // Check if firmware should quiesce system due to error 161 checkPelAndQuiesce(pel); 162 } 163 else 164 { 165 log<level::ERR>("Invalid PEL received from the host", 166 entry("OBMCLOGID=%d", obmcLogID)); 167 168 AdditionalData ad; 169 ad.add("PLID", getNumberString("0x%08X", pel->plid())); 170 ad.add("OBMC_LOG_ID", std::to_string(obmcLogID)); 171 ad.add("PEL_SIZE", std::to_string(pelData.size())); 172 173 std::string asciiString; 174 auto src = pel->primarySRC(); 175 if (src) 176 { 177 asciiString = (*src)->asciiString(); 178 } 179 180 ad.add("SRC", asciiString); 181 182 _eventLogger.log("org.open_power.Logging.Error.BadHostPEL", 183 Entry::Level::Error, ad); 184 185 // Save it to a file for debug in the lab. Just keep the latest. 186 // Not adding it to the PEL because it could already be max size 187 // and don't want to truncate an already invalid PEL. 188 std::ofstream pelFile{getPELRepoPath() / "badPEL"}; 189 pelFile.write(reinterpret_cast<const char*>(pelData.data()), 190 pelData.size()); 191 } 192 } 193 194 void Manager::addESELPEL(const std::string& esel, uint32_t obmcLogID) 195 { 196 std::vector<uint8_t> data; 197 198 log<level::DEBUG>("Adding PEL from ESEL", 199 entry("OBMC_LOG_ID=%d", obmcLogID)); 200 201 try 202 { 203 data = std::move(eselToRawData(esel)); 204 } 205 catch (std::exception& e) 206 { 207 // Try to add it below anyway, so it follows the usual bad data path. 208 log<level::ERR>("Problems converting ESEL string to a byte vector"); 209 } 210 211 addPEL(data, obmcLogID); 212 } 213 214 std::vector<uint8_t> Manager::eselToRawData(const std::string& esel) 215 { 216 std::vector<uint8_t> data; 217 std::string byteString; 218 219 // As the eSEL string looks like: "50 48 00 ab ..." there are 3 220 // characters per raw byte, and since the actual PEL data starts 221 // at the 16th byte, the code will grab the PEL data starting at 222 // offset 48 in the string. 223 static constexpr size_t pelStart = 16 * 3; 224 225 if (esel.size() <= pelStart) 226 { 227 log<level::ERR>("ESEL data too short", 228 entry("ESEL_SIZE=%d", esel.size())); 229 230 throw std::length_error("ESEL data too short"); 231 } 232 233 for (size_t i = pelStart; i < esel.size(); i += 3) 234 { 235 if (i + 1 < esel.size()) 236 { 237 byteString = esel.substr(i, 2); 238 data.push_back(std::stoi(byteString, nullptr, 16)); 239 } 240 else 241 { 242 log<level::ERR>("ESEL data too short", 243 entry("ESEL_SIZE=%d", esel.size())); 244 throw std::length_error("ESEL data too short"); 245 } 246 } 247 248 return data; 249 } 250 251 void Manager::erase(uint32_t obmcLogID) 252 { 253 Repository::LogID id{Repository::LogID::Obmc(obmcLogID)}; 254 255 _repo.remove(id); 256 } 257 258 bool Manager::isDeleteProhibited(uint32_t obmcLogID) 259 { 260 return false; 261 } 262 263 PelFFDC Manager::convertToPelFFDC(const FFDCEntries& ffdc) 264 { 265 PelFFDC pelFFDC; 266 267 std::for_each(ffdc.begin(), ffdc.end(), [&pelFFDC](const auto& f) { 268 PelFFDCfile pf; 269 pf.subType = std::get<ffdcSubtypePos>(f); 270 pf.version = std::get<ffdcVersionPos>(f); 271 pf.fd = std::get<ffdcFDPos>(f); 272 273 switch (std::get<ffdcFormatPos>(f)) 274 { 275 case Create::FFDCFormat::JSON: 276 pf.format = UserDataFormat::json; 277 break; 278 case Create::FFDCFormat::CBOR: 279 pf.format = UserDataFormat::cbor; 280 break; 281 case Create::FFDCFormat::Text: 282 pf.format = UserDataFormat::text; 283 break; 284 case Create::FFDCFormat::Custom: 285 pf.format = UserDataFormat::custom; 286 break; 287 } 288 289 pelFFDC.push_back(pf); 290 }); 291 292 return pelFFDC; 293 } 294 295 void Manager::createPEL(const std::string& message, uint32_t obmcLogID, 296 uint64_t timestamp, 297 phosphor::logging::Entry::Level severity, 298 const std::vector<std::string>& additionalData, 299 const std::vector<std::string>& associations, 300 const FFDCEntries& ffdc) 301 { 302 auto entry = _registry.lookup(message, rg::LookupType::name); 303 auto pelFFDC = convertToPelFFDC(ffdc); 304 AdditionalData ad{additionalData}; 305 std::string msg; 306 307 if (!entry) 308 { 309 // Instead, get the default entry that means there is no 310 // other matching entry. This error will still use the 311 // AdditionalData values of the original error, and this 312 // code will add the error message value that wasn't found 313 // to this AD. This way, there will at least be a PEL, 314 // possibly with callouts, to allow users to debug the 315 // issue that caused the error even without its own PEL. 316 msg = "Event not found in PEL message registry: " + message; 317 log<level::INFO>(msg.c_str()); 318 319 entry = _registry.lookup(defaultLogMessage, rg::LookupType::name); 320 if (!entry) 321 { 322 log<level::ERR>("Default event not found in PEL message registry"); 323 return; 324 } 325 326 ad.add(additional_data::error, message); 327 } 328 329 auto pel = std::make_unique<openpower::pels::PEL>( 330 *entry, obmcLogID, timestamp, severity, ad, pelFFDC, *_dataIface); 331 332 _repo.add(pel); 333 334 if (_repo.sizeWarning()) 335 { 336 scheduleRepoPrune(); 337 } 338 339 auto src = pel->primarySRC(); 340 if (src) 341 { 342 auto msg = 343 fmt::format("Created PEL {:#x} (BMC ID {}) with SRC {}", pel->id(), 344 pel->obmcLogID(), (*src)->asciiString()); 345 while (msg.back() == ' ') 346 { 347 msg.pop_back(); 348 } 349 log<level::INFO>(msg.c_str()); 350 } 351 352 // Activate any resulting service indicators if necessary 353 auto policy = service_indicators::getPolicy(*_dataIface); 354 policy->activate(*pel); 355 356 // Check if firmware should quiesce system due to error 357 checkPelAndQuiesce(pel); 358 } 359 360 sdbusplus::message::unix_fd Manager::getPEL(uint32_t pelID) 361 { 362 Repository::LogID id{Repository::LogID::Pel(pelID)}; 363 std::optional<int> fd; 364 365 log<level::DEBUG>("getPEL", entry("PEL_ID=0x%X", pelID)); 366 367 try 368 { 369 fd = _repo.getPELFD(id); 370 } 371 catch (std::exception& e) 372 { 373 throw common_error::InternalFailure(); 374 } 375 376 if (!fd) 377 { 378 throw common_error::InvalidArgument(); 379 } 380 381 scheduleFDClose(*fd); 382 383 return *fd; 384 } 385 386 void Manager::scheduleFDClose(int fd) 387 { 388 _fdCloserEventSource = std::make_unique<sdeventplus::source::Defer>( 389 _event, std::bind(std::mem_fn(&Manager::closeFD), this, fd, 390 std::placeholders::_1)); 391 } 392 393 void Manager::closeFD(int fd, sdeventplus::source::EventBase& source) 394 { 395 close(fd); 396 _fdCloserEventSource.reset(); 397 } 398 399 std::vector<uint8_t> Manager::getPELFromOBMCID(uint32_t obmcLogID) 400 { 401 Repository::LogID id{Repository::LogID::Obmc(obmcLogID)}; 402 std::optional<std::vector<uint8_t>> data; 403 404 log<level::DEBUG>("getPELFromOBMCID", entry("OBMC_LOG_ID=%d", obmcLogID)); 405 406 try 407 { 408 data = _repo.getPELData(id); 409 } 410 catch (std::exception& e) 411 { 412 throw common_error::InternalFailure(); 413 } 414 415 if (!data) 416 { 417 throw common_error::InvalidArgument(); 418 } 419 420 return *data; 421 } 422 423 void Manager::hostAck(uint32_t pelID) 424 { 425 Repository::LogID id{Repository::LogID::Pel(pelID)}; 426 427 log<level::DEBUG>("HostAck", entry("PEL_ID=0x%X", pelID)); 428 429 if (!_repo.hasPEL(id)) 430 { 431 throw common_error::InvalidArgument(); 432 } 433 434 if (_hostNotifier) 435 { 436 _hostNotifier->ackPEL(pelID); 437 } 438 } 439 440 void Manager::hostReject(uint32_t pelID, RejectionReason reason) 441 { 442 Repository::LogID id{Repository::LogID::Pel(pelID)}; 443 444 log<level::DEBUG>("HostReject", entry("PEL_ID=0x%X", pelID), 445 entry("REASON=%d", static_cast<int>(reason))); 446 447 if (!_repo.hasPEL(id)) 448 { 449 throw common_error::InvalidArgument(); 450 } 451 452 if (reason == RejectionReason::BadPEL) 453 { 454 AdditionalData data; 455 data.add("BAD_ID", getNumberString("0x%08X", pelID)); 456 _eventLogger.log("org.open_power.Logging.Error.SentBadPELToHost", 457 Entry::Level::Informational, data); 458 if (_hostNotifier) 459 { 460 _hostNotifier->setBadPEL(pelID); 461 } 462 } 463 else if ((reason == RejectionReason::HostFull) && _hostNotifier) 464 { 465 _hostNotifier->setHostFull(pelID); 466 } 467 } 468 469 void Manager::scheduleRepoPrune() 470 { 471 _repoPrunerEventSource = std::make_unique<sdeventplus::source::Defer>( 472 _event, std::bind(std::mem_fn(&Manager::pruneRepo), this, 473 std::placeholders::_1)); 474 } 475 476 void Manager::pruneRepo(sdeventplus::source::EventBase& source) 477 { 478 auto idsToDelete = _repo.prune(); 479 480 // Remove the OpenBMC event logs for the PELs that were just removed. 481 std::for_each(idsToDelete.begin(), idsToDelete.end(), 482 [this](auto id) { this->_logManager.erase(id); }); 483 484 _repoPrunerEventSource.reset(); 485 } 486 487 void Manager::setupPELDeleteWatch() 488 { 489 _pelFileDeleteFD = inotify_init1(IN_NONBLOCK); 490 if (-1 == _pelFileDeleteFD) 491 { 492 auto e = errno; 493 std::string msg = 494 "inotify_init1 failed with errno " + std::to_string(e); 495 log<level::ERR>(msg.c_str()); 496 abort(); 497 } 498 499 _pelFileDeleteWatchFD = inotify_add_watch( 500 _pelFileDeleteFD, _repo.repoPath().c_str(), IN_DELETE); 501 if (-1 == _pelFileDeleteWatchFD) 502 { 503 auto e = errno; 504 std::string msg = 505 "inotify_add_watch failed with error " + std::to_string(e); 506 log<level::ERR>(msg.c_str()); 507 abort(); 508 } 509 510 _pelFileDeleteEventSource = std::make_unique<sdeventplus::source::IO>( 511 _event, _pelFileDeleteFD, EPOLLIN, 512 std::bind(std::mem_fn(&Manager::pelFileDeleted), this, 513 std::placeholders::_1, std::placeholders::_2, 514 std::placeholders::_3)); 515 } 516 517 void Manager::pelFileDeleted(sdeventplus::source::IO& io, int fd, 518 uint32_t revents) 519 { 520 if (!(revents & EPOLLIN)) 521 { 522 return; 523 } 524 525 // An event for 1 PEL uses 48B. When all PELs are deleted at once, 526 // as many events as there is room for can be handled in one callback. 527 // A size of 2000 will allow 41 to be processed, with additional 528 // callbacks being needed to process the remaining ones. 529 std::array<uint8_t, 2000> data; 530 auto bytesRead = read(_pelFileDeleteFD, data.data(), data.size()); 531 if (bytesRead < 0) 532 { 533 auto e = errno; 534 std::string msg = "Failed reading data from inotify event, errno = " + 535 std::to_string(e); 536 log<level::ERR>(msg.c_str()); 537 abort(); 538 } 539 540 auto offset = 0; 541 while (offset < bytesRead) 542 { 543 auto event = reinterpret_cast<inotify_event*>(&data[offset]); 544 if (event->mask & IN_DELETE) 545 { 546 std::string filename{event->name}; 547 548 // Get the PEL ID from the filename and tell the 549 // repo it's been removed, and then delete the BMC 550 // event log if it's there. 551 auto pos = filename.find_first_of('_'); 552 if (pos != std::string::npos) 553 { 554 try 555 { 556 auto idString = filename.substr(pos + 1); 557 auto pelID = std::stoul(idString, nullptr, 16); 558 559 Repository::LogID id{Repository::LogID::Pel(pelID)}; 560 auto removedLogID = _repo.remove(id); 561 if (removedLogID) 562 { 563 _logManager.erase(removedLogID->obmcID.id); 564 } 565 } 566 catch (const std::exception& e) 567 { 568 log<level::INFO>("Could not find PEL ID from its filename", 569 entry("FILENAME=%s", filename.c_str())); 570 } 571 } 572 } 573 574 offset += offsetof(inotify_event, name) + event->len; 575 } 576 } 577 578 std::tuple<uint32_t, uint32_t> Manager::createPELWithFFDCFiles( 579 std::string message, Entry::Level severity, 580 std::map<std::string, std::string> additionalData, 581 std::vector<std::tuple< 582 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat, 583 uint8_t, uint8_t, sdbusplus::message::unix_fd>> 584 fFDC) 585 { 586 _logManager.createWithFFDC(message, severity, additionalData, fFDC); 587 588 return {_logManager.lastEntryID(), _repo.lastPelID()}; 589 } 590 591 void Manager::checkPelAndQuiesce(std::unique_ptr<openpower::pels::PEL>& pel) 592 { 593 if (pel->userHeader().severity() == 594 static_cast<uint8_t>(SeverityType::nonError)) 595 { 596 log<level::DEBUG>("PEL severity informational. no quiesce needed"); 597 return; 598 } 599 if (!_logManager.isQuiesceOnErrorEnabled()) 600 { 601 log<level::DEBUG>("QuiesceOnHwError not enabled, no quiesce needed"); 602 return; 603 } 604 605 // Now check if it has any type of callout 606 if (pel->isCalloutPresent()) 607 { 608 log<level::INFO>("QuiesceOnHwError enabled, PEL severity not nonError, " 609 "and callout is present"); 610 611 _logManager.quiesceOnError(pel->obmcLogID()); 612 } 613 } 614 615 } // namespace pels 616 } // namespace openpower 617