1 #pragma once
2 
3 extern "C"
4 {
5 #include <libpdbg.h>
6 #include <libpdbg_sbe.h>
7 }
8 
9 #include "dump_utils.hpp"
10 #include "sbe_consts.hpp"
11 #include "sbe_type.hpp"
12 
13 #include <phal_exception.H>
14 
15 #include <cstdint>
16 #include <filesystem>
17 #include <future>
18 #include <vector>
19 
20 namespace openpower::dump::sbe_chipop
21 {
22 
23 /**
24  * @class SbeDumpCollector
25  * @brief Manages the collection of dumps from SBEs on failure.
26  *
27  * This class provides functionalities to orchestrate the collection of
28  * diagnostic dumps from Self Boot Engines across multiple processors
29  * in response to failures or for diagnostic purposes.
30  */
31 class SbeDumpCollector
32 {
33   public:
34     /**
35      * @brief Constructs a new SbeDumpCollector object.
36      */
37     SbeDumpCollector() = default;
38 
39     /**
40      * @brief Destroys the SbeDumpCollector object.
41      */
42     ~SbeDumpCollector() = default;
43 
44     /**
45      * @brief Orchestrates the collection of dumps from all available SBEs.
46      *
47      * Initiates the process of collecting diagnostic dumps from SBEs. This
48      * involves identifying available processors, initiating the dump
49      * collection process, and managing the collected dump files.
50      *
51      * @param type The type of dump to collect.
52      * @param id A unique identifier for the dump collection operation.
53      * @param failingUnit The identifier of the failing unit prompting the dump
54      * collection.
55      * @param path The filesystem path where collected dumps should be stored.
56      */
57     void collectDump(uint8_t type, uint32_t id, uint64_t failingUnit,
58                      const std::filesystem::path& path);
59 
60   private:
61     /**
62      * @brief Collects a dump from a single SBE.
63      *
64      * Executes the low-level operations required to collect a diagnostic
65      * dump from the specified SBE.
66      *
67      * @param chip A pointer to the pdbg_target structure representing the SBE.
68      * @param path The filesystem path where the dump should be stored.
69      * @param id The unique identifier for this dump collection operation.
70      * @param type The type of dump to collect.
71      * @param clockState The clock state of the SBE during dump collection.
72      * @param failingUnit The identifier of the failing unit.
73      */
74     void collectDumpFromSBE(struct pdbg_target* chip,
75                             const std::filesystem::path& path, uint32_t id,
76                             uint8_t type, uint8_t clockState,
77                             uint64_t failingUnit);
78 
79     /**
80      * @brief Initializes the PDBG library.
81      *
82      * Prepares the PDBG library for interacting with processor targets. This
83      * must be called before any PDBG-related operations are performed.
84      */
85     void initializePdbg();
86 
87     /**
88      * @brief Launches asynchronous dump collection tasks for a set of targets.
89      *
90      * This method initiates the dump collection process asynchronously for each
91      * target provided in the `targets` vector. It launches a separate
92      * asynchronous task for each target, where each task calls
93      * `collectDumpFromSBE` with the specified parameters, including the clock
94      * state.
95      *
96      * @param type The type of the dump to collect. This could be a hardware
97      * dump, software dump, etc., as defined by the SBE dump type enumeration.
98      * @param id A unique identifier for the dump collection operation. This ID
99      * is used to tag the collected dump for identification.
100      * @param path The filesystem path where the collected dumps should be
101      * stored. Each dump file will be stored under this directory.
102      * @param failingUnit The identifier of the unit or component that is
103      * failing or suspected to be the cause of the issue prompting the dump
104      * collection. This is used for diagnostic purposes.
105      * @param cstate The clock state during the dump collection. This parameter
106      *               dictates whether the dump should be collected with the
107      * clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF).
108      * @param targets A vector of `pdbg_target*` representing the targets from
109      * which dumps should be collected. Each target corresponds to a physical or
110      * logical component in the system, such as a processor or an SBE.
111      *
112      * @return A vector of `std::future<void>` objects. Each future represents
113      * the completion state of an asynchronous dump collection task. The caller
114      *         can wait on these futures to determine when all dump collection
115      * tasks have completed. Exceptions thrown by the asynchronous tasks are
116      * captured by the futures and can be rethrown when the futures are
117      * accessed.
118      */
119     std::vector<std::future<void>> spawnDumpCollectionProcesses(
120         uint8_t type, uint32_t id, const std::filesystem::path& path,
121         uint64_t failingUnit, uint8_t cstate,
122         const std::vector<struct pdbg_target*>& targets);
123 
124     /** @brief This function creates the new dump file in dump file name
125      * format and then writes the contents into it.
126      *  @param path - Path to dump file
127      *  @param id - A unique id assigned to dump to be collected
128      *  @param clockState - Clock state, ON or Off
129      *  @param nodeNum - Node containing the chip
130      *  @param chipName - Name of the chip
131      *  @param chipPos - Chip position of the failing unit
132      *  @param dataPtr - Content to write to file
133      *  @param len - Length of the content
134      */
135     void writeDumpFile(const std::filesystem::path& path, const uint32_t id,
136                        const uint8_t clockState, const uint8_t nodeNum,
137                        const std::string& chipName, const uint8_t chipPos,
138                        util::DumpDataPtr& dataPtr, const uint32_t len);
139 
140     /**
141      * @brief Determines if fastarray collection is needed based on dump type
142      * and unit.
143      *
144      * @param clockState The current state of the clock.
145      * @param type The type of the dump being collected.
146      * @param failingUnit The ID of the failing unit.
147      * @param chipPos The position of the chip for which the dump is being
148      * collected.
149      *
150      * @return uint8_t - Returns 1 if fastarray collection is needed, 0
151      * otherwise.
152      */
153     inline uint8_t checkFastarrayCollectionNeeded(const uint8_t clockState,
154                                                   const uint8_t type,
155                                                   uint64_t failingUnit,
156                                                   const uint8_t chipPos) const
157     {
158         using namespace openpower::dump::SBE;
159 
160         return (clockState == SBE_CLOCK_OFF &&
161                 (type == SBE_DUMP_TYPE_HOSTBOOT ||
162                  (type == SBE_DUMP_TYPE_HARDWARE && chipPos == failingUnit)))
163                    ? 1
164                    : 0;
165     }
166 
167     /**
168      * Logs an error and creates a PEL for SBE chip-op failures.
169      *
170      * @param sbeError - An error object encapsulating details about the SBE
171      * error.
172      * @param chipPos - The position of the chip where the error occurred.
173      * @param sbeType - The type of SBE, used to determine the event log
174      * message.
175      * @param cmdClass - The command class associated with the SBE operation.
176      * @param cmdType - The specific type of command within the command class.
177      *
178      */
179     void logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError,
180                               uint64_t chipPos, SBETypes sbeType,
181                               uint32_t cmdClass, uint32_t cmdType);
182 
183     /**
184      * Determines the type of SBE for a given chip target.
185      *
186      * @param chip - A pointer to a pdbg_target structure representing the chip.
187      * @return The SBE type for the given chip target.
188      */
189     inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip)
190     {
191         return SBETypes::PROC;
192     }
193 
194     /**
195      * @brief Executes thread stop on a processor target
196      *
197      * If the Self Boot Engine (SBE) is not ready to accept chip operations
198      * (chip-ops), it logs the condition and excludes the processor from the
199      * dump collection process. For critical errors, such as a timeout during
200      * the stop operation, it logs the error and again excludes the processor.
201      * In case of SBE command failure or non-critical errors, it continues with
202      * the dump collection process.
203      *
204      * @param target Pointer to the pdbg target structure representing the
205      *               processor to perform the thread stop on.
206      * @return true If the thread stop was successful or in case of non-critical
207      *              errors where dump collection can proceed.
208      * @return false If the SBE is not ready for chip-ops or in case of critical
209      *               errors like timeouts, indicating the processor should be
210      *               excluded from the dump collection.
211      */
212     bool executeThreadStop(struct pdbg_target* target);
213 };
214 
215 } // namespace openpower::dump::sbe_chipop
216