1 extern "C" 2 { 3 #include <libpdbg.h> 4 #include <libpdbg_sbe.h> 5 } 6 7 #include "create_pel.hpp" 8 #include "sbe_consts.hpp" 9 #include "sbe_dump_collector.hpp" 10 #include "sbe_type.hpp" 11 12 #include <libphal.H> 13 #include <phal_exception.H> 14 15 #include <phosphor-logging/elog-errors.hpp> 16 #include <phosphor-logging/lg2.hpp> 17 #include <phosphor-logging/log.hpp> 18 #include <sbe_consts.hpp> 19 #include <xyz/openbmc_project/Common/File/error.hpp> 20 #include <xyz/openbmc_project/Common/error.hpp> 21 22 #include <cstdint> 23 #include <filesystem> 24 #include <format> 25 #include <fstream> 26 #include <stdexcept> 27 28 namespace openpower::dump::sbe_chipop 29 { 30 31 using namespace phosphor::logging; 32 using namespace openpower::dump::SBE; 33 34 void SbeDumpCollector::collectDump(uint8_t type, uint32_t id, 35 uint64_t failingUnit, 36 const std::filesystem::path& path) 37 { 38 lg2::error("Starting dump collection: type:{TYPE} id:{ID} " 39 "failingUnit:{FAILINGUNIT}, path:{PATH}", 40 "TYPE", type, "ID", id, "FAILINGUNIT", failingUnit, "PATH", 41 path.string()); 42 43 initializePdbg(); 44 45 std::vector<struct pdbg_target*> targets; 46 47 struct pdbg_target* target = nullptr; 48 pdbg_for_each_class_target("proc", target) 49 { 50 if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED || 51 !openpower::phal::pdbg::isTgtFunctional(target)) 52 { 53 continue; 54 } 55 56 bool includeTarget = true; 57 // if the dump type is hostboot then call stop instructions 58 if (type == SBE_DUMP_TYPE_HOSTBOOT) 59 { 60 includeTarget = executeThreadStop(target); 61 } 62 if (includeTarget) 63 { 64 targets.push_back(target); 65 } 66 } 67 68 std::vector<uint8_t> clockStates = {SBE_CLOCK_ON, SBE_CLOCK_OFF}; 69 for (auto cstate : clockStates) 70 { 71 auto futures = spawnDumpCollectionProcesses(type, id, path, failingUnit, 72 cstate, targets); 73 74 // Wait for all asynchronous tasks to complete 75 for (auto& future : futures) 76 { 77 try 78 { 79 future.wait(); 80 } 81 catch (const std::exception& e) 82 { 83 lg2::error("Failed to collect dump from SBE ErrorMsg({ERROR})", 84 "ERROR", e); 85 } 86 } 87 lg2::info( 88 "Dump collection completed for clock state({CSTATE}): type({TYPE}) " 89 "id({ID}) failingUnit({FAILINGUNIT}), path({PATH})", 90 "CSTATE", cstate, "TYPE", type, "ID", id, "FAILINGUNIT", 91 failingUnit, "PATH", path.string()); 92 } 93 if (std::filesystem::is_empty(path)) 94 { 95 lg2::error("Failed to collect the dump"); 96 throw std::runtime_error("Failed to collect the dump"); 97 } 98 lg2::info("Dump collection completed"); 99 } 100 101 void SbeDumpCollector::initializePdbg() 102 { 103 openpower::phal::pdbg::init(); 104 } 105 106 std::vector<std::future<void>> SbeDumpCollector::spawnDumpCollectionProcesses( 107 uint8_t type, uint32_t id, const std::filesystem::path& path, 108 uint64_t failingUnit, uint8_t cstate, 109 const std::vector<struct pdbg_target*>& targets) 110 { 111 std::vector<std::future<void>> futures; 112 113 for (auto target : targets) 114 { 115 if (pdbg_target_probe(target) != PDBG_TARGET_ENABLED || 116 !openpower::phal::pdbg::isTgtFunctional(target)) 117 { 118 continue; 119 } 120 121 auto future = 122 std::async(std::launch::async, 123 [this, target, path, id, type, cstate, failingUnit]() { 124 try 125 { 126 this->collectDumpFromSBE(target, path, id, type, cstate, 127 failingUnit); 128 } 129 catch (const std::exception& e) 130 { 131 lg2::error( 132 "Failed to collect dump from SBE on Proc-({PROCINDEX})", 133 "PROCINDEX", pdbg_target_index(target)); 134 } 135 }); 136 137 futures.push_back(std::move(future)); 138 } 139 140 return futures; 141 } 142 143 void SbeDumpCollector::logErrorAndCreatePEL( 144 const openpower::phal::sbeError_t& sbeError, uint64_t chipPos, 145 SBETypes sbeType, uint32_t cmdClass, uint32_t cmdType) 146 { 147 try 148 { 149 std::string event = sbeTypeAttributes.at(sbeType).chipOpFailure; 150 auto dumpIsRequired = false; 151 152 if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT) 153 { 154 event = sbeTypeAttributes.at(sbeType).chipOpTimeout; 155 dumpIsRequired = true; 156 } 157 158 openpower::dump::pel::FFDCData pelAdditionalData = { 159 {"SRC6", std::format("{:X}{:X}", chipPos, (cmdClass | cmdType))}}; 160 161 openpower::dump::pel::createSbeErrorPEL(event, sbeError, 162 pelAdditionalData); 163 auto logId = openpower::dump::pel::createSbeErrorPEL(event, sbeError, 164 pelAdditionalData); 165 166 // Request SBE Dump if required 167 if (dumpIsRequired) 168 { 169 util::requestSBEDump(chipPos, logId, sbeType); 170 } 171 } 172 catch (const std::out_of_range& e) 173 { 174 lg2::error("Unknown SBE Type({SBETYPE}) ErrorMsg({ERROR})", "SBETYPE", 175 sbeType, "ERROR", e); 176 } 177 catch (const std::exception& e) 178 { 179 lg2::error("SBE Dump request failed, chip position({CHIPPOS}), " 180 "Error: {ERROR}", 181 "CHIPPOS", chipPos, "ERROR", e); 182 } 183 } 184 185 void SbeDumpCollector::collectDumpFromSBE(struct pdbg_target* chip, 186 const std::filesystem::path& path, 187 uint32_t id, uint8_t type, 188 uint8_t clockState, 189 uint64_t failingUnit) 190 { 191 auto chipPos = pdbg_target_index(chip); 192 SBETypes sbeType = getSBEType(chip); 193 auto chipName = sbeTypeAttributes.at(sbeType).chipName; 194 lg2::info( 195 "Collecting dump from proc({PROC}): path({PATH}) id({ID}) " 196 "type({TYPE}) clockState({CLOCKSTATE}) failingUnit({FAILINGUNIT})", 197 "PROC", chipPos, "PATH", path.string(), "ID", id, "TYPE", type, 198 "CLOCKSTATE", clockState, "FAILINGUNIT", failingUnit); 199 200 util::DumpDataPtr dataPtr; 201 uint32_t len = 0; 202 uint8_t collectFastArray = 203 checkFastarrayCollectionNeeded(clockState, type, failingUnit, chipPos); 204 205 try 206 { 207 openpower::phal::sbe::getDump(chip, type, clockState, collectFastArray, 208 dataPtr.getPtr(), &len); 209 } 210 catch (const openpower::phal::sbeError_t& sbeError) 211 { 212 if (sbeError.errType() == 213 openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED) 214 { 215 // SBE is not ready to accept chip-ops, 216 // Skip the request, no additional error handling required. 217 lg2::info("Collect dump: Skipping ({ERROR}) dump({TYPE}) " 218 "on proc({PROC}) clock state({CLOCKSTATE})", 219 "ERROR", sbeError, "TYPE", type, "PROC", chipPos, 220 "CLOCKSTATE", clockState); 221 return; 222 } 223 224 lg2::error("Error in collecting dump dump type({TYPE}), " 225 "clockstate({CLOCKSTATE}), chip type({CHIPTYPE}) " 226 "position({POSITION}), " 227 "collectFastArray({COLLECTFASTARRAY}) error({ERROR})", 228 "TYPE", type, "CLOCKSTATE", clockState, "CHIPTYPE", chipName, 229 "POSITION", chipPos, "COLLECTFASTARRAY", collectFastArray, 230 "ERROR", sbeError); 231 logErrorAndCreatePEL(sbeError, chipPos, sbeType, SBEFIFO_CMD_CLASS_DUMP, 232 SBEFIFO_CMD_GET_DUMP); 233 return; 234 } 235 writeDumpFile(path, id, clockState, 0, chipName, chipPos, dataPtr, len); 236 } 237 238 void SbeDumpCollector::writeDumpFile( 239 const std::filesystem::path& path, const uint32_t id, 240 const uint8_t clockState, const uint8_t nodeNum, 241 const std::string& chipName, const uint8_t chipPos, 242 util::DumpDataPtr& dataPtr, const uint32_t len) 243 { 244 using namespace sdbusplus::xyz::openbmc_project::Common::Error; 245 namespace fileError = sdbusplus::xyz::openbmc_project::Common::File::Error; 246 247 // Construct the filename 248 std::ostringstream filenameBuilder; 249 filenameBuilder << std::setw(8) << std::setfill('0') << id 250 << ".SbeDataClocks" 251 << (clockState == SBE_CLOCK_ON ? "On" : "Off") << ".node" 252 << static_cast<int>(nodeNum) << "." << chipName 253 << static_cast<int>(chipPos); 254 255 auto dumpPath = path / filenameBuilder.str(); 256 257 // Attempt to open the file 258 std::ofstream outfile(dumpPath, std::ios::out | std::ios::binary); 259 if (!outfile) 260 { 261 using namespace sdbusplus::xyz::openbmc_project::Common::File::Error; 262 using metadata = xyz::openbmc_project::Common::File::Open; 263 // Unable to open the file for writing 264 auto err = errno; 265 lg2::error("Error opening file to write dump, " 266 "errno({ERRNO}), filepath({FILEPATH})", 267 "ERRNO", err, "FILEPATH", dumpPath.string()); 268 269 report<Open>(metadata::ERRNO(err), metadata::PATH(dumpPath.c_str())); 270 // Just return here, so that the dumps collected from other 271 // SBEs can be packaged. 272 return; 273 } 274 275 // Write to the file 276 try 277 { 278 outfile.write(reinterpret_cast<const char*>(dataPtr.getData()), len); 279 280 lg2::info("Successfully wrote dump file " 281 "path=({PATH}) size=({SIZE})", 282 "PATH", dumpPath.string(), "SIZE", len); 283 } 284 catch (const std::ofstream::failure& oe) 285 { 286 using namespace sdbusplus::xyz::openbmc_project::Common::File::Error; 287 using metadata = xyz::openbmc_project::Common::File::Write; 288 289 lg2::error( 290 "Failed to write to dump file, " 291 "errorMsg({ERROR}), error({ERRORCODE}), filepath({FILEPATH})", 292 "ERROR", oe, "ERRORCODE", oe.code().value(), "FILEPATH", 293 dumpPath.string()); 294 report<Write>(metadata::ERRNO(oe.code().value()), 295 metadata::PATH(dumpPath.c_str())); 296 // Just return here so dumps collected from other SBEs can be 297 // packaged. 298 } 299 } 300 301 bool SbeDumpCollector::executeThreadStop(struct pdbg_target* target) 302 { 303 try 304 { 305 openpower::phal::sbe::threadStopProc(target); 306 return true; 307 } 308 catch (const openpower::phal::sbeError_t& sbeError) 309 { 310 uint64_t chipPos = pdbg_target_index(target); 311 if (sbeError.errType() == 312 openpower::phal::exception::SBE_CHIPOP_NOT_ALLOWED) 313 { 314 lg2::info("SBE is not ready to accept chip-op: Skipping " 315 "stop instruction on proc-({POSITION}) error({ERROR}) ", 316 "POSITION", chipPos, "ERROR", sbeError); 317 return false; // Do not include the target for dump collection 318 } 319 320 lg2::error("Stop instructions failed on " 321 "proc-({POSITION}) error({ERROR}) ", 322 "POSITION", chipPos, "ERROR", sbeError); 323 324 logErrorAndCreatePEL(sbeError, chipPos, SBETypes::PROC, 325 SBEFIFO_CMD_CLASS_INSTRUCTION, 326 SBEFIFO_CMD_CONTROL_INSN); 327 // For TIMEOUT, log the error and skip adding the processor for dump 328 // collection 329 if (sbeError.errType() == openpower::phal::exception::SBE_CMD_TIMEOUT) 330 { 331 return false; 332 } 333 } 334 // Include the target for dump collection for SBE_CMD_FAILED or any other 335 // non-critical errors 336 return true; 337 } 338 339 } // namespace openpower::dump::sbe_chipop 340