1858d1aafSDhruvaraj Subhashchandran #pragma once 2858d1aafSDhruvaraj Subhashchandran 3858d1aafSDhruvaraj Subhashchandran extern "C" 4858d1aafSDhruvaraj Subhashchandran { 5858d1aafSDhruvaraj Subhashchandran #include <libpdbg.h> 6858d1aafSDhruvaraj Subhashchandran #include <libpdbg_sbe.h> 7858d1aafSDhruvaraj Subhashchandran } 8858d1aafSDhruvaraj Subhashchandran 9a699e31eSDhruvaraj Subhashchandran #include "dump_utils.hpp" 10a699e31eSDhruvaraj Subhashchandran #include "sbe_consts.hpp" 116feeebd6SDhruvaraj Subhashchandran #include "sbe_type.hpp" 126feeebd6SDhruvaraj Subhashchandran 136feeebd6SDhruvaraj Subhashchandran #include <phal_exception.H> 14a699e31eSDhruvaraj Subhashchandran 15858d1aafSDhruvaraj Subhashchandran #include <cstdint> 16858d1aafSDhruvaraj Subhashchandran #include <filesystem> 17858d1aafSDhruvaraj Subhashchandran #include <future> 18858d1aafSDhruvaraj Subhashchandran #include <vector> 19858d1aafSDhruvaraj Subhashchandran 20858d1aafSDhruvaraj Subhashchandran namespace openpower::dump::sbe_chipop 21858d1aafSDhruvaraj Subhashchandran { 22858d1aafSDhruvaraj Subhashchandran 23858d1aafSDhruvaraj Subhashchandran /** 24858d1aafSDhruvaraj Subhashchandran * @class SbeDumpCollector 25858d1aafSDhruvaraj Subhashchandran * @brief Manages the collection of dumps from SBEs on failure. 26858d1aafSDhruvaraj Subhashchandran * 27858d1aafSDhruvaraj Subhashchandran * This class provides functionalities to orchestrate the collection of 28858d1aafSDhruvaraj Subhashchandran * diagnostic dumps from Self Boot Engines across multiple processors 29858d1aafSDhruvaraj Subhashchandran * in response to failures or for diagnostic purposes. 30858d1aafSDhruvaraj Subhashchandran */ 31858d1aafSDhruvaraj Subhashchandran class SbeDumpCollector 32858d1aafSDhruvaraj Subhashchandran { 33858d1aafSDhruvaraj Subhashchandran public: 34858d1aafSDhruvaraj Subhashchandran /** 35858d1aafSDhruvaraj Subhashchandran * @brief Constructs a new SbeDumpCollector object. 36858d1aafSDhruvaraj Subhashchandran */ 37858d1aafSDhruvaraj Subhashchandran SbeDumpCollector() = default; 38858d1aafSDhruvaraj Subhashchandran 39858d1aafSDhruvaraj Subhashchandran /** 40858d1aafSDhruvaraj Subhashchandran * @brief Destroys the SbeDumpCollector object. 41858d1aafSDhruvaraj Subhashchandran */ 42858d1aafSDhruvaraj Subhashchandran ~SbeDumpCollector() = default; 43858d1aafSDhruvaraj Subhashchandran 44858d1aafSDhruvaraj Subhashchandran /** 45858d1aafSDhruvaraj Subhashchandran * @brief Orchestrates the collection of dumps from all available SBEs. 46858d1aafSDhruvaraj Subhashchandran * 47858d1aafSDhruvaraj Subhashchandran * Initiates the process of collecting diagnostic dumps from SBEs. This 48858d1aafSDhruvaraj Subhashchandran * involves identifying available processors, initiating the dump 49858d1aafSDhruvaraj Subhashchandran * collection process, and managing the collected dump files. 50858d1aafSDhruvaraj Subhashchandran * 51858d1aafSDhruvaraj Subhashchandran * @param type The type of dump to collect. 52858d1aafSDhruvaraj Subhashchandran * @param id A unique identifier for the dump collection operation. 53858d1aafSDhruvaraj Subhashchandran * @param failingUnit The identifier of the failing unit prompting the dump 54858d1aafSDhruvaraj Subhashchandran * collection. 55858d1aafSDhruvaraj Subhashchandran * @param path The filesystem path where collected dumps should be stored. 56858d1aafSDhruvaraj Subhashchandran */ 57858d1aafSDhruvaraj Subhashchandran void collectDump(uint8_t type, uint32_t id, uint64_t failingUnit, 58858d1aafSDhruvaraj Subhashchandran const std::filesystem::path& path); 59858d1aafSDhruvaraj Subhashchandran 60858d1aafSDhruvaraj Subhashchandran private: 61858d1aafSDhruvaraj Subhashchandran /** 62858d1aafSDhruvaraj Subhashchandran * @brief Collects a dump from a single SBE. 63858d1aafSDhruvaraj Subhashchandran * 64858d1aafSDhruvaraj Subhashchandran * Executes the low-level operations required to collect a diagnostic 65858d1aafSDhruvaraj Subhashchandran * dump from the specified SBE. 66858d1aafSDhruvaraj Subhashchandran * 67858d1aafSDhruvaraj Subhashchandran * @param chip A pointer to the pdbg_target structure representing the SBE. 68858d1aafSDhruvaraj Subhashchandran * @param path The filesystem path where the dump should be stored. 69858d1aafSDhruvaraj Subhashchandran * @param id The unique identifier for this dump collection operation. 70858d1aafSDhruvaraj Subhashchandran * @param type The type of dump to collect. 71858d1aafSDhruvaraj Subhashchandran * @param clockState The clock state of the SBE during dump collection. 72858d1aafSDhruvaraj Subhashchandran * @param failingUnit The identifier of the failing unit. 73858d1aafSDhruvaraj Subhashchandran */ 74858d1aafSDhruvaraj Subhashchandran void collectDumpFromSBE(struct pdbg_target* chip, 75858d1aafSDhruvaraj Subhashchandran const std::filesystem::path& path, uint32_t id, 76858d1aafSDhruvaraj Subhashchandran uint8_t type, uint8_t clockState, 77858d1aafSDhruvaraj Subhashchandran uint64_t failingUnit); 78858d1aafSDhruvaraj Subhashchandran 79858d1aafSDhruvaraj Subhashchandran /** 80858d1aafSDhruvaraj Subhashchandran * @brief Initializes the PDBG library. 81858d1aafSDhruvaraj Subhashchandran * 82858d1aafSDhruvaraj Subhashchandran * Prepares the PDBG library for interacting with processor targets. This 83858d1aafSDhruvaraj Subhashchandran * must be called before any PDBG-related operations are performed. 84858d1aafSDhruvaraj Subhashchandran */ 85858d1aafSDhruvaraj Subhashchandran void initializePdbg(); 86858d1aafSDhruvaraj Subhashchandran 87858d1aafSDhruvaraj Subhashchandran /** 88858d1aafSDhruvaraj Subhashchandran * @brief Launches asynchronous dump collection tasks for a set of targets. 89858d1aafSDhruvaraj Subhashchandran * 90858d1aafSDhruvaraj Subhashchandran * This method initiates the dump collection process asynchronously for each 91858d1aafSDhruvaraj Subhashchandran * target provided in the `targets` vector. It launches a separate 92858d1aafSDhruvaraj Subhashchandran * asynchronous task for each target, where each task calls 93858d1aafSDhruvaraj Subhashchandran * `collectDumpFromSBE` with the specified parameters, including the clock 94858d1aafSDhruvaraj Subhashchandran * state. 95858d1aafSDhruvaraj Subhashchandran * 96858d1aafSDhruvaraj Subhashchandran * @param type The type of the dump to collect. This could be a hardware 97858d1aafSDhruvaraj Subhashchandran * dump, software dump, etc., as defined by the SBE dump type enumeration. 98858d1aafSDhruvaraj Subhashchandran * @param id A unique identifier for the dump collection operation. This ID 99858d1aafSDhruvaraj Subhashchandran * is used to tag the collected dump for identification. 100858d1aafSDhruvaraj Subhashchandran * @param path The filesystem path where the collected dumps should be 101858d1aafSDhruvaraj Subhashchandran * stored. Each dump file will be stored under this directory. 102858d1aafSDhruvaraj Subhashchandran * @param failingUnit The identifier of the unit or component that is 103858d1aafSDhruvaraj Subhashchandran * failing or suspected to be the cause of the issue prompting the dump 104858d1aafSDhruvaraj Subhashchandran * collection. This is used for diagnostic purposes. 105858d1aafSDhruvaraj Subhashchandran * @param cstate The clock state during the dump collection. This parameter 106858d1aafSDhruvaraj Subhashchandran * dictates whether the dump should be collected with the 107858d1aafSDhruvaraj Subhashchandran * clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF). 108858d1aafSDhruvaraj Subhashchandran * @param targets A vector of `pdbg_target*` representing the targets from 109858d1aafSDhruvaraj Subhashchandran * which dumps should be collected. Each target corresponds to a physical or 110858d1aafSDhruvaraj Subhashchandran * logical component in the system, such as a processor or an SBE. 111858d1aafSDhruvaraj Subhashchandran * 112858d1aafSDhruvaraj Subhashchandran * @return A vector of `std::future<void>` objects. Each future represents 113858d1aafSDhruvaraj Subhashchandran * the completion state of an asynchronous dump collection task. The caller 114858d1aafSDhruvaraj Subhashchandran * can wait on these futures to determine when all dump collection 115858d1aafSDhruvaraj Subhashchandran * tasks have completed. Exceptions thrown by the asynchronous tasks are 116858d1aafSDhruvaraj Subhashchandran * captured by the futures and can be rethrown when the futures are 117858d1aafSDhruvaraj Subhashchandran * accessed. 118858d1aafSDhruvaraj Subhashchandran */ 119858d1aafSDhruvaraj Subhashchandran std::vector<std::future<void>> spawnDumpCollectionProcesses( 120858d1aafSDhruvaraj Subhashchandran uint8_t type, uint32_t id, const std::filesystem::path& path, 121858d1aafSDhruvaraj Subhashchandran uint64_t failingUnit, uint8_t cstate, 122858d1aafSDhruvaraj Subhashchandran const std::vector<struct pdbg_target*>& targets); 123a699e31eSDhruvaraj Subhashchandran 124a699e31eSDhruvaraj Subhashchandran /** @brief This function creates the new dump file in dump file name 125a699e31eSDhruvaraj Subhashchandran * format and then writes the contents into it. 126a699e31eSDhruvaraj Subhashchandran * @param path - Path to dump file 127a699e31eSDhruvaraj Subhashchandran * @param id - A unique id assigned to dump to be collected 128a699e31eSDhruvaraj Subhashchandran * @param clockState - Clock state, ON or Off 129a699e31eSDhruvaraj Subhashchandran * @param nodeNum - Node containing the chip 130a699e31eSDhruvaraj Subhashchandran * @param chipName - Name of the chip 131a699e31eSDhruvaraj Subhashchandran * @param chipPos - Chip position of the failing unit 132a699e31eSDhruvaraj Subhashchandran * @param dataPtr - Content to write to file 133a699e31eSDhruvaraj Subhashchandran * @param len - Length of the content 134a699e31eSDhruvaraj Subhashchandran */ 135a699e31eSDhruvaraj Subhashchandran void writeDumpFile(const std::filesystem::path& path, const uint32_t id, 136a699e31eSDhruvaraj Subhashchandran const uint8_t clockState, const uint8_t nodeNum, 1376feeebd6SDhruvaraj Subhashchandran const std::string& chipName, const uint8_t chipPos, 138a699e31eSDhruvaraj Subhashchandran util::DumpDataPtr& dataPtr, const uint32_t len); 139a699e31eSDhruvaraj Subhashchandran 140a699e31eSDhruvaraj Subhashchandran /** 141a699e31eSDhruvaraj Subhashchandran * @brief Determines if fastarray collection is needed based on dump type 142a699e31eSDhruvaraj Subhashchandran * and unit. 143a699e31eSDhruvaraj Subhashchandran * 144a699e31eSDhruvaraj Subhashchandran * @param clockState The current state of the clock. 145a699e31eSDhruvaraj Subhashchandran * @param type The type of the dump being collected. 146a699e31eSDhruvaraj Subhashchandran * @param failingUnit The ID of the failing unit. 147a699e31eSDhruvaraj Subhashchandran * @param chipPos The position of the chip for which the dump is being 148a699e31eSDhruvaraj Subhashchandran * collected. 149a699e31eSDhruvaraj Subhashchandran * 150a699e31eSDhruvaraj Subhashchandran * @return uint8_t - Returns 1 if fastarray collection is needed, 0 151a699e31eSDhruvaraj Subhashchandran * otherwise. 152a699e31eSDhruvaraj Subhashchandran */ 153a699e31eSDhruvaraj Subhashchandran inline uint8_t checkFastarrayCollectionNeeded(const uint8_t clockState, 154a699e31eSDhruvaraj Subhashchandran const uint8_t type, 155a699e31eSDhruvaraj Subhashchandran uint64_t failingUnit, 156a699e31eSDhruvaraj Subhashchandran const uint8_t chipPos) const 157a699e31eSDhruvaraj Subhashchandran { 158a699e31eSDhruvaraj Subhashchandran using namespace openpower::dump::SBE; 159a699e31eSDhruvaraj Subhashchandran 160a699e31eSDhruvaraj Subhashchandran return (clockState == SBE_CLOCK_OFF && 161a699e31eSDhruvaraj Subhashchandran (type == SBE_DUMP_TYPE_HOSTBOOT || 162a699e31eSDhruvaraj Subhashchandran (type == SBE_DUMP_TYPE_HARDWARE && chipPos == failingUnit))) 163a699e31eSDhruvaraj Subhashchandran ? 1 164a699e31eSDhruvaraj Subhashchandran : 0; 165a699e31eSDhruvaraj Subhashchandran } 1666feeebd6SDhruvaraj Subhashchandran 1676feeebd6SDhruvaraj Subhashchandran /** 1686feeebd6SDhruvaraj Subhashchandran * Logs an error and creates a PEL for SBE chip-op failures. 1696feeebd6SDhruvaraj Subhashchandran * 1706feeebd6SDhruvaraj Subhashchandran * @param sbeError - An error object encapsulating details about the SBE 1716feeebd6SDhruvaraj Subhashchandran * error. 1726feeebd6SDhruvaraj Subhashchandran * @param chipPos - The position of the chip where the error occurred. 1736feeebd6SDhruvaraj Subhashchandran * @param sbeType - The type of SBE, used to determine the event log 1746feeebd6SDhruvaraj Subhashchandran * message. 1756feeebd6SDhruvaraj Subhashchandran * @param cmdClass - The command class associated with the SBE operation. 1766feeebd6SDhruvaraj Subhashchandran * @param cmdType - The specific type of command within the command class. 1776feeebd6SDhruvaraj Subhashchandran * 1786feeebd6SDhruvaraj Subhashchandran */ 1796feeebd6SDhruvaraj Subhashchandran void logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError, 1806feeebd6SDhruvaraj Subhashchandran uint64_t chipPos, SBETypes sbeType, 1816feeebd6SDhruvaraj Subhashchandran uint32_t cmdClass, uint32_t cmdType); 1826feeebd6SDhruvaraj Subhashchandran 1836feeebd6SDhruvaraj Subhashchandran /** 1846feeebd6SDhruvaraj Subhashchandran * Determines the type of SBE for a given chip target. 1856feeebd6SDhruvaraj Subhashchandran * 1866feeebd6SDhruvaraj Subhashchandran * @param chip - A pointer to a pdbg_target structure representing the chip. 1876feeebd6SDhruvaraj Subhashchandran * @return The SBE type for the given chip target. 1886feeebd6SDhruvaraj Subhashchandran */ 1896feeebd6SDhruvaraj Subhashchandran inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip) 1906feeebd6SDhruvaraj Subhashchandran { 1916feeebd6SDhruvaraj Subhashchandran return SBETypes::PROC; 1926feeebd6SDhruvaraj Subhashchandran } 193*f9f65b82SDhruvaraj Subhashchandran 194*f9f65b82SDhruvaraj Subhashchandran /** 195*f9f65b82SDhruvaraj Subhashchandran * @brief Executes thread stop on a processor target 196*f9f65b82SDhruvaraj Subhashchandran * 197*f9f65b82SDhruvaraj Subhashchandran * If the Self Boot Engine (SBE) is not ready to accept chip operations 198*f9f65b82SDhruvaraj Subhashchandran * (chip-ops), it logs the condition and excludes the processor from the 199*f9f65b82SDhruvaraj Subhashchandran * dump collection process. For critical errors, such as a timeout during 200*f9f65b82SDhruvaraj Subhashchandran * the stop operation, it logs the error and again excludes the processor. 201*f9f65b82SDhruvaraj Subhashchandran * In case of SBE command failure or non-critical errors, it continues with 202*f9f65b82SDhruvaraj Subhashchandran * the dump collection process. 203*f9f65b82SDhruvaraj Subhashchandran * 204*f9f65b82SDhruvaraj Subhashchandran * @param target Pointer to the pdbg target structure representing the 205*f9f65b82SDhruvaraj Subhashchandran * processor to perform the thread stop on. 206*f9f65b82SDhruvaraj Subhashchandran * @return true If the thread stop was successful or in case of non-critical 207*f9f65b82SDhruvaraj Subhashchandran * errors where dump collection can proceed. 208*f9f65b82SDhruvaraj Subhashchandran * @return false If the SBE is not ready for chip-ops or in case of critical 209*f9f65b82SDhruvaraj Subhashchandran * errors like timeouts, indicating the processor should be 210*f9f65b82SDhruvaraj Subhashchandran * excluded from the dump collection. 211*f9f65b82SDhruvaraj Subhashchandran */ 212*f9f65b82SDhruvaraj Subhashchandran bool executeThreadStop(struct pdbg_target* target); 213858d1aafSDhruvaraj Subhashchandran }; 214858d1aafSDhruvaraj Subhashchandran 215858d1aafSDhruvaraj Subhashchandran } // namespace openpower::dump::sbe_chipop 216