1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "manager.hpp" 17 18 #include "additional_data.hpp" 19 #include "json_utils.hpp" 20 #include "pel.hpp" 21 #include "service_indicators.hpp" 22 23 #include <fmt/format.h> 24 #include <sys/inotify.h> 25 #include <unistd.h> 26 27 #include <filesystem> 28 #include <fstream> 29 #include <xyz/openbmc_project/Common/error.hpp> 30 #include <xyz/openbmc_project/Logging/Create/server.hpp> 31 32 namespace openpower 33 { 34 namespace pels 35 { 36 37 using namespace phosphor::logging; 38 namespace fs = std::filesystem; 39 namespace rg = openpower::pels::message; 40 41 namespace common_error = sdbusplus::xyz::openbmc_project::Common::Error; 42 43 using Create = sdbusplus::xyz::openbmc_project::Logging::server::Create; 44 45 namespace additional_data 46 { 47 constexpr auto rawPEL = "RAWPEL"; 48 constexpr auto esel = "ESEL"; 49 constexpr auto error = "ERROR_NAME"; 50 } // namespace additional_data 51 52 constexpr auto defaultLogMessage = "xyz.openbmc_project.Logging.Error.Default"; 53 54 Manager::~Manager() 55 { 56 if (_pelFileDeleteFD != -1) 57 { 58 if (_pelFileDeleteWatchFD != -1) 59 { 60 inotify_rm_watch(_pelFileDeleteFD, _pelFileDeleteWatchFD); 61 } 62 close(_pelFileDeleteFD); 63 } 64 } 65 66 void Manager::create(const std::string& message, uint32_t obmcLogID, 67 uint64_t timestamp, Entry::Level severity, 68 const std::vector<std::string>& additionalData, 69 const std::vector<std::string>& associations, 70 const FFDCEntries& ffdc) 71 { 72 AdditionalData ad{additionalData}; 73 74 // If a PEL was passed in via a filename or in an ESEL, 75 // use that. Otherwise, create one. 76 auto rawPelPath = ad.getValue(additional_data::rawPEL); 77 if (rawPelPath) 78 { 79 addRawPEL(*rawPelPath, obmcLogID); 80 } 81 else 82 { 83 auto esel = ad.getValue(additional_data::esel); 84 if (esel) 85 { 86 addESELPEL(*esel, obmcLogID); 87 } 88 else 89 { 90 createPEL(message, obmcLogID, timestamp, severity, additionalData, 91 associations, ffdc); 92 } 93 } 94 95 setEntryPath(obmcLogID); 96 } 97 98 void Manager::addRawPEL(const std::string& rawPelPath, uint32_t obmcLogID) 99 { 100 if (fs::exists(rawPelPath)) 101 { 102 std::ifstream file(rawPelPath, std::ios::in | std::ios::binary); 103 104 auto data = std::vector<uint8_t>(std::istreambuf_iterator<char>(file), 105 std::istreambuf_iterator<char>()); 106 if (file.fail()) 107 { 108 log<level::ERR>("Filesystem error reading a raw PEL", 109 entry("PELFILE=%s", rawPelPath.c_str()), 110 entry("OBMCLOGID=%d", obmcLogID)); 111 // TODO, Decide what to do here. Maybe nothing. 112 return; 113 } 114 115 file.close(); 116 117 addPEL(data, obmcLogID); 118 } 119 else 120 { 121 log<level::ERR>("Raw PEL file from BMC event log does not exist", 122 entry("PELFILE=%s", (rawPelPath).c_str()), 123 entry("OBMCLOGID=%d", obmcLogID)); 124 } 125 } 126 127 void Manager::addPEL(std::vector<uint8_t>& pelData, uint32_t obmcLogID) 128 { 129 130 auto pel = std::make_unique<openpower::pels::PEL>(pelData, obmcLogID); 131 if (pel->valid()) 132 { 133 // PELs created by others still need these fields set by us. 134 pel->assignID(); 135 pel->setCommitTime(); 136 137 try 138 { 139 log<level::DEBUG>( 140 fmt::format("Adding external PEL {:#x} (BMC ID {}) to repo", 141 pel->id(), obmcLogID) 142 .c_str()); 143 144 _repo.add(pel); 145 146 if (_repo.sizeWarning()) 147 { 148 scheduleRepoPrune(); 149 } 150 151 // Activate any resulting service indicators if necessary 152 auto policy = service_indicators::getPolicy(*_dataIface); 153 policy->activate(*pel); 154 } 155 catch (std::exception& e) 156 { 157 // Probably a full or r/o filesystem, not much we can do. 158 log<level::ERR>("Unable to add PEL to Repository", 159 entry("PEL_ID=0x%X", pel->id())); 160 } 161 162 // Check if firmware should quiesce system due to error 163 checkPelAndQuiesce(pel); 164 } 165 else 166 { 167 log<level::ERR>("Invalid PEL received from the host", 168 entry("OBMCLOGID=%d", obmcLogID)); 169 170 AdditionalData ad; 171 ad.add("PLID", getNumberString("0x%08X", pel->plid())); 172 ad.add("OBMC_LOG_ID", std::to_string(obmcLogID)); 173 ad.add("PEL_SIZE", std::to_string(pelData.size())); 174 175 std::string asciiString; 176 auto src = pel->primarySRC(); 177 if (src) 178 { 179 asciiString = (*src)->asciiString(); 180 } 181 182 ad.add("SRC", asciiString); 183 184 _eventLogger.log("org.open_power.Logging.Error.BadHostPEL", 185 Entry::Level::Error, ad); 186 187 // Save it to a file for debug in the lab. Just keep the latest. 188 // Not adding it to the PEL because it could already be max size 189 // and don't want to truncate an already invalid PEL. 190 std::ofstream pelFile{getPELRepoPath() / "badPEL"}; 191 pelFile.write(reinterpret_cast<const char*>(pelData.data()), 192 pelData.size()); 193 } 194 } 195 196 void Manager::addESELPEL(const std::string& esel, uint32_t obmcLogID) 197 { 198 std::vector<uint8_t> data; 199 200 log<level::DEBUG>("Adding PEL from ESEL", 201 entry("OBMC_LOG_ID=%d", obmcLogID)); 202 203 try 204 { 205 data = std::move(eselToRawData(esel)); 206 } 207 catch (std::exception& e) 208 { 209 // Try to add it below anyway, so it follows the usual bad data path. 210 log<level::ERR>("Problems converting ESEL string to a byte vector"); 211 } 212 213 addPEL(data, obmcLogID); 214 } 215 216 std::vector<uint8_t> Manager::eselToRawData(const std::string& esel) 217 { 218 std::vector<uint8_t> data; 219 std::string byteString; 220 221 // As the eSEL string looks like: "50 48 00 ab ..." there are 3 222 // characters per raw byte, and since the actual PEL data starts 223 // at the 16th byte, the code will grab the PEL data starting at 224 // offset 48 in the string. 225 static constexpr size_t pelStart = 16 * 3; 226 227 if (esel.size() <= pelStart) 228 { 229 log<level::ERR>("ESEL data too short", 230 entry("ESEL_SIZE=%d", esel.size())); 231 232 throw std::length_error("ESEL data too short"); 233 } 234 235 for (size_t i = pelStart; i < esel.size(); i += 3) 236 { 237 if (i + 1 < esel.size()) 238 { 239 byteString = esel.substr(i, 2); 240 data.push_back(std::stoi(byteString, nullptr, 16)); 241 } 242 else 243 { 244 log<level::ERR>("ESEL data too short", 245 entry("ESEL_SIZE=%d", esel.size())); 246 throw std::length_error("ESEL data too short"); 247 } 248 } 249 250 return data; 251 } 252 253 void Manager::erase(uint32_t obmcLogID) 254 { 255 Repository::LogID id{Repository::LogID::Obmc(obmcLogID)}; 256 257 _repo.remove(id); 258 } 259 260 bool Manager::isDeleteProhibited(uint32_t /*obmcLogID*/) 261 { 262 return false; 263 } 264 265 PelFFDC Manager::convertToPelFFDC(const FFDCEntries& ffdc) 266 { 267 PelFFDC pelFFDC; 268 269 std::for_each(ffdc.begin(), ffdc.end(), [&pelFFDC](const auto& f) { 270 PelFFDCfile pf; 271 pf.subType = std::get<ffdcSubtypePos>(f); 272 pf.version = std::get<ffdcVersionPos>(f); 273 pf.fd = std::get<ffdcFDPos>(f); 274 275 switch (std::get<ffdcFormatPos>(f)) 276 { 277 case Create::FFDCFormat::JSON: 278 pf.format = UserDataFormat::json; 279 break; 280 case Create::FFDCFormat::CBOR: 281 pf.format = UserDataFormat::cbor; 282 break; 283 case Create::FFDCFormat::Text: 284 pf.format = UserDataFormat::text; 285 break; 286 case Create::FFDCFormat::Custom: 287 pf.format = UserDataFormat::custom; 288 break; 289 } 290 291 pelFFDC.push_back(pf); 292 }); 293 294 return pelFFDC; 295 } 296 297 void Manager::createPEL(const std::string& message, uint32_t obmcLogID, 298 uint64_t timestamp, 299 phosphor::logging::Entry::Level severity, 300 const std::vector<std::string>& additionalData, 301 const std::vector<std::string>& /*associations*/, 302 const FFDCEntries& ffdc) 303 { 304 auto entry = _registry.lookup(message, rg::LookupType::name); 305 auto pelFFDC = convertToPelFFDC(ffdc); 306 AdditionalData ad{additionalData}; 307 std::string msg; 308 309 if (!entry) 310 { 311 // Instead, get the default entry that means there is no 312 // other matching entry. This error will still use the 313 // AdditionalData values of the original error, and this 314 // code will add the error message value that wasn't found 315 // to this AD. This way, there will at least be a PEL, 316 // possibly with callouts, to allow users to debug the 317 // issue that caused the error even without its own PEL. 318 msg = "Event not found in PEL message registry: " + message; 319 log<level::INFO>(msg.c_str()); 320 321 entry = _registry.lookup(defaultLogMessage, rg::LookupType::name); 322 if (!entry) 323 { 324 log<level::ERR>("Default event not found in PEL message registry"); 325 return; 326 } 327 328 ad.add(additional_data::error, message); 329 } 330 331 auto pel = std::make_unique<openpower::pels::PEL>( 332 *entry, obmcLogID, timestamp, severity, ad, pelFFDC, *_dataIface); 333 334 _repo.add(pel); 335 336 if (_repo.sizeWarning()) 337 { 338 scheduleRepoPrune(); 339 } 340 341 auto src = pel->primarySRC(); 342 if (src) 343 { 344 auto msg = 345 fmt::format("Created PEL {:#x} (BMC ID {}) with SRC {}", pel->id(), 346 pel->obmcLogID(), (*src)->asciiString()); 347 while (msg.back() == ' ') 348 { 349 msg.pop_back(); 350 } 351 log<level::INFO>(msg.c_str()); 352 } 353 354 // Activate any resulting service indicators if necessary 355 auto policy = service_indicators::getPolicy(*_dataIface); 356 policy->activate(*pel); 357 358 // Check if firmware should quiesce system due to error 359 checkPelAndQuiesce(pel); 360 } 361 362 sdbusplus::message::unix_fd Manager::getPEL(uint32_t pelID) 363 { 364 Repository::LogID id{Repository::LogID::Pel(pelID)}; 365 std::optional<int> fd; 366 367 log<level::DEBUG>("getPEL", entry("PEL_ID=0x%X", pelID)); 368 369 try 370 { 371 fd = _repo.getPELFD(id); 372 } 373 catch (std::exception& e) 374 { 375 throw common_error::InternalFailure(); 376 } 377 378 if (!fd) 379 { 380 throw common_error::InvalidArgument(); 381 } 382 383 scheduleFDClose(*fd); 384 385 return *fd; 386 } 387 388 void Manager::scheduleFDClose(int fd) 389 { 390 _fdCloserEventSource = std::make_unique<sdeventplus::source::Defer>( 391 _event, std::bind(std::mem_fn(&Manager::closeFD), this, fd, 392 std::placeholders::_1)); 393 } 394 395 void Manager::closeFD(int fd, sdeventplus::source::EventBase& /*source*/) 396 { 397 close(fd); 398 _fdCloserEventSource.reset(); 399 } 400 401 std::vector<uint8_t> Manager::getPELFromOBMCID(uint32_t obmcLogID) 402 { 403 Repository::LogID id{Repository::LogID::Obmc(obmcLogID)}; 404 std::optional<std::vector<uint8_t>> data; 405 406 log<level::DEBUG>("getPELFromOBMCID", entry("OBMC_LOG_ID=%d", obmcLogID)); 407 408 try 409 { 410 data = _repo.getPELData(id); 411 } 412 catch (std::exception& e) 413 { 414 throw common_error::InternalFailure(); 415 } 416 417 if (!data) 418 { 419 throw common_error::InvalidArgument(); 420 } 421 422 return *data; 423 } 424 425 void Manager::hostAck(uint32_t pelID) 426 { 427 Repository::LogID id{Repository::LogID::Pel(pelID)}; 428 429 log<level::DEBUG>("HostAck", entry("PEL_ID=0x%X", pelID)); 430 431 if (!_repo.hasPEL(id)) 432 { 433 throw common_error::InvalidArgument(); 434 } 435 436 if (_hostNotifier) 437 { 438 _hostNotifier->ackPEL(pelID); 439 } 440 } 441 442 void Manager::hostReject(uint32_t pelID, RejectionReason reason) 443 { 444 Repository::LogID id{Repository::LogID::Pel(pelID)}; 445 446 log<level::DEBUG>("HostReject", entry("PEL_ID=0x%X", pelID), 447 entry("REASON=%d", static_cast<int>(reason))); 448 449 if (!_repo.hasPEL(id)) 450 { 451 throw common_error::InvalidArgument(); 452 } 453 454 if (reason == RejectionReason::BadPEL) 455 { 456 AdditionalData data; 457 data.add("BAD_ID", getNumberString("0x%08X", pelID)); 458 _eventLogger.log("org.open_power.Logging.Error.SentBadPELToHost", 459 Entry::Level::Informational, data); 460 if (_hostNotifier) 461 { 462 _hostNotifier->setBadPEL(pelID); 463 } 464 } 465 else if ((reason == RejectionReason::HostFull) && _hostNotifier) 466 { 467 _hostNotifier->setHostFull(pelID); 468 } 469 } 470 471 void Manager::scheduleRepoPrune() 472 { 473 _repoPrunerEventSource = std::make_unique<sdeventplus::source::Defer>( 474 _event, std::bind(std::mem_fn(&Manager::pruneRepo), this, 475 std::placeholders::_1)); 476 } 477 478 void Manager::pruneRepo(sdeventplus::source::EventBase& /*source*/) 479 { 480 auto idsToDelete = _repo.prune(); 481 482 // Remove the OpenBMC event logs for the PELs that were just removed. 483 std::for_each(idsToDelete.begin(), idsToDelete.end(), 484 [this](auto id) { this->_logManager.erase(id); }); 485 486 _repoPrunerEventSource.reset(); 487 } 488 489 void Manager::setupPELDeleteWatch() 490 { 491 _pelFileDeleteFD = inotify_init1(IN_NONBLOCK); 492 if (-1 == _pelFileDeleteFD) 493 { 494 auto e = errno; 495 std::string msg = 496 "inotify_init1 failed with errno " + std::to_string(e); 497 log<level::ERR>(msg.c_str()); 498 abort(); 499 } 500 501 _pelFileDeleteWatchFD = inotify_add_watch( 502 _pelFileDeleteFD, _repo.repoPath().c_str(), IN_DELETE); 503 if (-1 == _pelFileDeleteWatchFD) 504 { 505 auto e = errno; 506 std::string msg = 507 "inotify_add_watch failed with error " + std::to_string(e); 508 log<level::ERR>(msg.c_str()); 509 abort(); 510 } 511 512 _pelFileDeleteEventSource = std::make_unique<sdeventplus::source::IO>( 513 _event, _pelFileDeleteFD, EPOLLIN, 514 std::bind(std::mem_fn(&Manager::pelFileDeleted), this, 515 std::placeholders::_1, std::placeholders::_2, 516 std::placeholders::_3)); 517 } 518 519 void Manager::pelFileDeleted(sdeventplus::source::IO& /*io*/, int /*fd*/, 520 uint32_t revents) 521 { 522 if (!(revents & EPOLLIN)) 523 { 524 return; 525 } 526 527 // An event for 1 PEL uses 48B. When all PELs are deleted at once, 528 // as many events as there is room for can be handled in one callback. 529 // A size of 2000 will allow 41 to be processed, with additional 530 // callbacks being needed to process the remaining ones. 531 std::array<uint8_t, 2000> data; 532 auto bytesRead = read(_pelFileDeleteFD, data.data(), data.size()); 533 if (bytesRead < 0) 534 { 535 auto e = errno; 536 std::string msg = "Failed reading data from inotify event, errno = " + 537 std::to_string(e); 538 log<level::ERR>(msg.c_str()); 539 abort(); 540 } 541 542 auto offset = 0; 543 while (offset < bytesRead) 544 { 545 auto event = reinterpret_cast<inotify_event*>(&data[offset]); 546 if (event->mask & IN_DELETE) 547 { 548 std::string filename{event->name}; 549 550 // Get the PEL ID from the filename and tell the 551 // repo it's been removed, and then delete the BMC 552 // event log if it's there. 553 auto pos = filename.find_first_of('_'); 554 if (pos != std::string::npos) 555 { 556 try 557 { 558 auto idString = filename.substr(pos + 1); 559 auto pelID = std::stoul(idString, nullptr, 16); 560 561 Repository::LogID id{Repository::LogID::Pel(pelID)}; 562 auto removedLogID = _repo.remove(id); 563 if (removedLogID) 564 { 565 _logManager.erase(removedLogID->obmcID.id); 566 } 567 } 568 catch (const std::exception& e) 569 { 570 log<level::INFO>("Could not find PEL ID from its filename", 571 entry("FILENAME=%s", filename.c_str())); 572 } 573 } 574 } 575 576 offset += offsetof(inotify_event, name) + event->len; 577 } 578 } 579 580 std::tuple<uint32_t, uint32_t> Manager::createPELWithFFDCFiles( 581 std::string message, Entry::Level severity, 582 std::map<std::string, std::string> additionalData, 583 std::vector<std::tuple< 584 sdbusplus::xyz::openbmc_project::Logging::server::Create::FFDCFormat, 585 uint8_t, uint8_t, sdbusplus::message::unix_fd>> 586 fFDC) 587 { 588 _logManager.createWithFFDC(message, severity, additionalData, fFDC); 589 590 return {_logManager.lastEntryID(), _repo.lastPelID()}; 591 } 592 593 void Manager::checkPelAndQuiesce(std::unique_ptr<openpower::pels::PEL>& pel) 594 { 595 if (pel->userHeader().severity() == 596 static_cast<uint8_t>(SeverityType::nonError)) 597 { 598 log<level::DEBUG>("PEL severity informational. no quiesce needed"); 599 return; 600 } 601 if (!_logManager.isQuiesceOnErrorEnabled()) 602 { 603 log<level::DEBUG>("QuiesceOnHwError not enabled, no quiesce needed"); 604 return; 605 } 606 607 // Now check if it has any type of callout 608 if (pel->isCalloutPresent()) 609 { 610 log<level::INFO>("QuiesceOnHwError enabled, PEL severity not nonError, " 611 "and callout is present"); 612 613 _logManager.quiesceOnError(pel->obmcLogID()); 614 } 615 } 616 617 void Manager::setEntryPath(uint32_t obmcLogID) 618 { 619 Repository::LogID id{Repository::LogID::Obmc(obmcLogID)}; 620 if (auto attributes = _repo.getPELAttributes(id); attributes) 621 { 622 auto& attr = attributes.value().get(); 623 auto entry = _logManager.entries.find(obmcLogID); 624 if (entry != _logManager.entries.end()) 625 { 626 entry->second->path(attr.path); 627 } 628 } 629 } 630 631 } // namespace pels 632 } // namespace openpower 633