1 #pragma once 2 3 extern "C" 4 { 5 #include <libpdbg.h> 6 #include <libpdbg_sbe.h> 7 } 8 9 #include "dump_utils.hpp" 10 #include "sbe_consts.hpp" 11 #include "sbe_type.hpp" 12 13 #include <phal_exception.H> 14 15 #include <cstdint> 16 #include <filesystem> 17 #include <future> 18 #include <vector> 19 20 namespace openpower::dump::sbe_chipop 21 { 22 23 using TargetMap = 24 std::map<struct pdbg_target*, std::vector<struct pdbg_target*>>; 25 26 /** 27 * @class SbeDumpCollector 28 * @brief Manages the collection of dumps from SBEs on failure. 29 * 30 * This class provides functionalities to orchestrate the collection of 31 * diagnostic dumps from Self Boot Engines across multiple processors 32 * in response to failures or for diagnostic purposes. 33 */ 34 class SbeDumpCollector 35 { 36 public: 37 /** 38 * @brief Constructs a new SbeDumpCollector object. 39 */ 40 SbeDumpCollector() = default; 41 42 /** 43 * @brief Destroys the SbeDumpCollector object. 44 */ 45 ~SbeDumpCollector() = default; 46 47 /** 48 * @brief Drives all type of dump collection process from SBEs. 49 * 50 * Triggers SBE, Hardware/Hostboot dump collection process from SBEs. 51 * Internally calls private method collectHWHBDump(for Hardware/Hostboot 52 * dump) or collectSBEDump(for SBE dump) based on the parameter type's value 53 * 54 * @param type The type of dump which needs to be collected. 55 * @param id ID of the collected dump. 56 * @param failingUnit ID of the failing unit from which the dump is 57 * collected. 58 * @param path Path where the collected dump will be stored. 59 */ 60 void collectDump(uint8_t type, uint32_t id, uint32_t failingUnit, 61 const std::filesystem::path& path); 62 63 private: 64 /** 65 * @brief Orchestrates the collection of dumps from all available SBEs. 66 * 67 * Initiates the process of collecting diagnostic dumps from SBEs. This 68 * involves identifying available processors, initiating the dump 69 * collection process, and managing the collected dump files. 70 * 71 * @param type The type of dump to collect. 72 * @param id A unique identifier for the dump collection operation. 73 * @param failingUnit The identifier of the failing unit prompting the dump 74 * collection. 75 * @param path The filesystem path where collected dumps should be stored. 76 */ 77 void collectHWHBDump(uint8_t type, uint32_t id, uint64_t failingUnit, 78 const std::filesystem::path& path); 79 80 /** 81 * @brief Execute HWPs to collect SBE dump. 82 * 83 * @param[in] id Id of the dump. 84 * @param[in] failingUnit Id of proc containing failing SBE. 85 * @param[in] dumpPath Path to stored the dump files. 86 * @param[in] sbeTypeId ID for SBE type i.e.; Odyssey or normal memory chip 87 * 0xA-->Normal SBE type, 88 * 0xB-->Odyssey SBE type Exceptions: PDBG_INIT_FAIL for any pdbg init 89 * related failure. 90 */ 91 void collectSBEDump(uint32_t id, uint32_t failingUnit, 92 const std::filesystem::path& dumpPath, 93 const int sbeTypeId); 94 95 /** 96 * @brief Collects a dump from a single SBE. 97 * 98 * Executes the low-level operations required to collect a diagnostic 99 * dump from the specified SBE. 100 * 101 * @param chip A pointer to the pdbg_target structure representing the SBE. 102 * @param path The filesystem path where the dump should be stored. 103 * @param id The unique identifier for this dump collection operation. 104 * @param type The type of dump to collect. 105 * @param clockState The clock state of the SBE during dump collection. 106 * @param failingUnit The identifier of the failing unit. 107 */ 108 void collectDumpFromSBE(struct pdbg_target* chip, 109 const std::filesystem::path& path, uint32_t id, 110 uint8_t type, uint8_t clockState, 111 uint64_t failingUnit); 112 113 /** 114 * @brief Initializes the PDBG library. 115 * 116 * Prepares the PDBG library for interacting with processor targets. This 117 * must be called before any PDBG-related operations are performed. 118 */ 119 void initializePdbg(); 120 121 /** 122 * @brief Launches asynchronous dump collection tasks for a set of targets. 123 * 124 * This method initiates the dump collection process asynchronously for each 125 * target provided in the `targets` vector. It launches a separate 126 * asynchronous task for each target, where each task calls 127 * `collectDumpFromSBE` with the specified parameters, including the clock 128 * state. 129 * 130 * @param type The type of the dump to collect. This could be a hardware 131 * dump, software dump, etc., as defined by the SBE dump type enumeration. 132 * @param id A unique identifier for the dump collection operation. This ID 133 * is used to tag the collected dump for identification. 134 * @param path The filesystem path where the collected dumps should be 135 * stored. Each dump file will be stored under this directory. 136 * @param failingUnit The identifier of the unit or component that is 137 * failing or suspected to be the cause of the issue prompting the dump 138 * collection. This is used for diagnostic purposes. 139 * @param cstate The clock state during the dump collection. This parameter 140 * dictates whether the dump should be collected with the 141 * clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF). 142 * @param targetMap A map of `pdbg_target*` representing the targets from 143 * which dumps should be collected. The key is the proc target with the 144 * list of ocmb targets associated with the proc. 145 * 146 * @return A vector of `std::future<void>` objects. Each future represents 147 * the completion state of an asynchronous dump collection task. The caller 148 * can wait on these futures to determine when all dump collection 149 * tasks have completed. Exceptions thrown by the asynchronous tasks are 150 * captured by the futures and can be rethrown when the futures are 151 * accessed. 152 */ 153 std::vector<std::future<void>> spawnDumpCollectionProcesses( 154 uint8_t type, uint32_t id, const std::filesystem::path& path, 155 uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap); 156 157 /** @brief This function creates the new dump file in dump file name 158 * format and then writes the contents into it. 159 * @param path - Path to dump file 160 * @param id - A unique id assigned to dump to be collected 161 * @param clockState - Clock state, ON or Off 162 * @param nodeNum - Node containing the chip 163 * @param chipName - Name of the chip 164 * @param chipPos - Chip position of the failing unit 165 * @param dataPtr - Content to write to file 166 * @param len - Length of the content 167 */ 168 void writeDumpFile(const std::filesystem::path& path, const uint32_t id, 169 const uint8_t clockState, const uint8_t nodeNum, 170 const std::string& chipName, const uint8_t chipPos, 171 util::DumpDataPtr& dataPtr, const uint32_t len); 172 173 /** 174 * @brief Determines if fastarray collection is needed based on dump type 175 * and unit. 176 * 177 * @param clockState The current state of the clock. 178 * @param type The type of the dump being collected. 179 * @param failingUnit The ID of the failing unit. 180 * @param chipPos The position of the chip for which the dump is being 181 * collected. 182 * 183 * @return uint8_t - Returns 1 if fastarray collection is needed, 0 184 * otherwise. 185 */ checkFastarrayCollectionNeeded(const uint8_t clockState,const uint8_t type,uint64_t failingUnit,const uint8_t chipPos) const186 inline uint8_t checkFastarrayCollectionNeeded( 187 const uint8_t clockState, const uint8_t type, uint64_t failingUnit, 188 const uint8_t chipPos) const 189 { 190 using namespace openpower::dump::SBE; 191 192 return (clockState == SBE_CLOCK_OFF && 193 (type == SBE_DUMP_TYPE_HOSTBOOT || 194 (type == SBE_DUMP_TYPE_HARDWARE && chipPos == failingUnit))) 195 ? 1 196 : 0; 197 } 198 199 /** 200 * Logs an error and creates a PEL for SBE chip-op failures. 201 * 202 * @param sbeError - An error object encapsulating details about the SBE 203 * error. 204 * @param chipPos - The position of the chip where the error occurred. 205 * @param sbeType - The type of SBE, used to determine the event log 206 * message. 207 * @param cmdClass - The command class associated with the SBE operation. 208 * @param cmdType - The specific type of command within the command class. 209 * @param path - Dump collection path. 210 * 211 */ 212 bool logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError, 213 uint64_t chipPos, SBETypes sbeType, 214 uint32_t cmdClass, uint32_t cmdType, 215 const std::filesystem::path& path); 216 217 /** 218 * Determines the type of SBE for a given chip target. 219 * 220 * @param chip - A pointer to a pdbg_target structure representing the chip. 221 * @return The SBE type for the given chip target. 222 */ getSBEType(struct pdbg_target * chip)223 inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip) 224 { 225 if (is_ody_ocmb_chip(chip)) 226 { 227 return SBETypes::OCMB; 228 } 229 return SBETypes::PROC; 230 } 231 232 /** 233 * @brief Executes thread stop on a processor target 234 * 235 * If the Self Boot Engine (SBE) is not ready to accept chip operations 236 * (chip-ops), it logs the condition and excludes the processor from the 237 * dump collection process. For critical errors, such as a timeout during 238 * the stop operation, it logs the error and again excludes the processor. 239 * In case of SBE command failure or non-critical errors, it continues with 240 * the dump collection process. 241 * 242 * @param target Pointer to the pdbg target structure representing the 243 * processor to perform the thread stop on. 244 * @param path Dump collection path 245 * @return true If the thread stop was successful or in case of non-critical 246 * errors where dump collection can proceed. 247 * @return false If the SBE is not ready for chip-ops or in case of critical 248 * errors like timeouts, indicating the processor should be 249 * excluded from the dump collection. 250 */ 251 bool executeThreadStop(struct pdbg_target* target, 252 const std::filesystem::path& path); 253 254 /** 255 * @brief Add Failure log information to info.yaml file 256 * @param logId - Error Log Id 257 * @param src - Reason Code of PEL 258 * @param chipName - Resource Name 259 * @param chipPos - Resource number 260 * @param path - Dump collection path 261 */ 262 void addLogDataToDump(uint32_t logId, std::string src, std::string chipName, 263 uint64_t chipPos, const std::filesystem::path& path); 264 }; 265 } // namespace openpower::dump::sbe_chipop 266