xref: /openbmc/openpower-debug-collector/dump/sbe_dump_collector.hpp (revision 540521edd78007b78f8e6df4b38ca71496862f25)
1 #pragma once
2 
3 extern "C"
4 {
5 #include <libpdbg.h>
6 #include <libpdbg_sbe.h>
7 }
8 
9 #include "dump_utils.hpp"
10 #include "sbe_consts.hpp"
11 #include "sbe_type.hpp"
12 
13 #include <phal_exception.H>
14 
15 #include <cstdint>
16 #include <filesystem>
17 #include <future>
18 #include <vector>
19 
20 namespace openpower::dump::sbe_chipop
21 {
22 
23 using TargetMap =
24     std::map<struct pdbg_target*, std::vector<struct pdbg_target*>>;
25 
26 /**
27  * @class SbeDumpCollector
28  * @brief Manages the collection of dumps from SBEs on failure.
29  *
30  * This class provides functionalities to orchestrate the collection of
31  * diagnostic dumps from Self Boot Engines across multiple processors
32  * in response to failures or for diagnostic purposes.
33  */
34 class SbeDumpCollector
35 {
36   public:
37     /**
38      * @brief Constructs a new SbeDumpCollector object.
39      */
40     SbeDumpCollector() = default;
41 
42     /**
43      * @brief Destroys the SbeDumpCollector object.
44      */
45     ~SbeDumpCollector() = default;
46 
47     /**
48      * @brief Orchestrates the collection of dumps from all available SBEs.
49      *
50      * Initiates the process of collecting diagnostic dumps from SBEs. This
51      * involves identifying available processors, initiating the dump
52      * collection process, and managing the collected dump files.
53      *
54      * @param type The type of dump to collect.
55      * @param id A unique identifier for the dump collection operation.
56      * @param failingUnit The identifier of the failing unit prompting the dump
57      * collection.
58      * @param path The filesystem path where collected dumps should be stored.
59      */
60     void collectDump(uint8_t type, uint32_t id, uint64_t failingUnit,
61                      const std::filesystem::path& path);
62 
63   private:
64     /**
65      * @brief Collects a dump from a single SBE.
66      *
67      * Executes the low-level operations required to collect a diagnostic
68      * dump from the specified SBE.
69      *
70      * @param chip A pointer to the pdbg_target structure representing the SBE.
71      * @param path The filesystem path where the dump should be stored.
72      * @param id The unique identifier for this dump collection operation.
73      * @param type The type of dump to collect.
74      * @param clockState The clock state of the SBE during dump collection.
75      * @param failingUnit The identifier of the failing unit.
76      */
77     void collectDumpFromSBE(struct pdbg_target* chip,
78                             const std::filesystem::path& path, uint32_t id,
79                             uint8_t type, uint8_t clockState,
80                             uint64_t failingUnit);
81 
82     /**
83      * @brief Initializes the PDBG library.
84      *
85      * Prepares the PDBG library for interacting with processor targets. This
86      * must be called before any PDBG-related operations are performed.
87      */
88     void initializePdbg();
89 
90     /**
91      * @brief Launches asynchronous dump collection tasks for a set of targets.
92      *
93      * This method initiates the dump collection process asynchronously for each
94      * target provided in the `targets` vector. It launches a separate
95      * asynchronous task for each target, where each task calls
96      * `collectDumpFromSBE` with the specified parameters, including the clock
97      * state.
98      *
99      * @param type The type of the dump to collect. This could be a hardware
100      * dump, software dump, etc., as defined by the SBE dump type enumeration.
101      * @param id A unique identifier for the dump collection operation. This ID
102      * is used to tag the collected dump for identification.
103      * @param path The filesystem path where the collected dumps should be
104      * stored. Each dump file will be stored under this directory.
105      * @param failingUnit The identifier of the unit or component that is
106      * failing or suspected to be the cause of the issue prompting the dump
107      * collection. This is used for diagnostic purposes.
108      * @param cstate The clock state during the dump collection. This parameter
109      *               dictates whether the dump should be collected with the
110      * clocks running (SBE_CLOCK_ON) or with the clocks stopped (SBE_CLOCK_OFF).
111      * @param targetMap A map of `pdbg_target*` representing the targets from
112      * which dumps should be collected. The key is the proc target with the
113      * list of ocmb targets associated with the proc.
114      *
115      * @return A vector of `std::future<void>` objects. Each future represents
116      * the completion state of an asynchronous dump collection task. The caller
117      *         can wait on these futures to determine when all dump collection
118      * tasks have completed. Exceptions thrown by the asynchronous tasks are
119      * captured by the futures and can be rethrown when the futures are
120      * accessed.
121      */
122     std::vector<std::future<void>> spawnDumpCollectionProcesses(
123         uint8_t type, uint32_t id, const std::filesystem::path& path,
124         uint64_t failingUnit, uint8_t cstate, const TargetMap& targetMap);
125 
126     /** @brief This function creates the new dump file in dump file name
127      * format and then writes the contents into it.
128      *  @param path - Path to dump file
129      *  @param id - A unique id assigned to dump to be collected
130      *  @param clockState - Clock state, ON or Off
131      *  @param nodeNum - Node containing the chip
132      *  @param chipName - Name of the chip
133      *  @param chipPos - Chip position of the failing unit
134      *  @param dataPtr - Content to write to file
135      *  @param len - Length of the content
136      */
137     void writeDumpFile(const std::filesystem::path& path, const uint32_t id,
138                        const uint8_t clockState, const uint8_t nodeNum,
139                        const std::string& chipName, const uint8_t chipPos,
140                        util::DumpDataPtr& dataPtr, const uint32_t len);
141 
142     /**
143      * @brief Determines if fastarray collection is needed based on dump type
144      * and unit.
145      *
146      * @param clockState The current state of the clock.
147      * @param type The type of the dump being collected.
148      * @param failingUnit The ID of the failing unit.
149      * @param chipPos The position of the chip for which the dump is being
150      * collected.
151      *
152      * @return uint8_t - Returns 1 if fastarray collection is needed, 0
153      * otherwise.
154      */
checkFastarrayCollectionNeeded(const uint8_t clockState,const uint8_t type,uint64_t failingUnit,const uint8_t chipPos) const155     inline uint8_t checkFastarrayCollectionNeeded(
156         const uint8_t clockState, const uint8_t type, uint64_t failingUnit,
157         const uint8_t chipPos) const
158     {
159         using namespace openpower::dump::SBE;
160 
161         return (clockState == SBE_CLOCK_OFF &&
162                 (type == SBE_DUMP_TYPE_HOSTBOOT ||
163                  (type == SBE_DUMP_TYPE_HARDWARE && chipPos == failingUnit)))
164                    ? 1
165                    : 0;
166     }
167 
168     /**
169      * Logs an error and creates a PEL for SBE chip-op failures.
170      *
171      * @param sbeError - An error object encapsulating details about the SBE
172      * error.
173      * @param chipPos - The position of the chip where the error occurred.
174      * @param sbeType - The type of SBE, used to determine the event log
175      * message.
176      * @param cmdClass - The command class associated with the SBE operation.
177      * @param cmdType - The specific type of command within the command class.
178      *
179      */
180     bool logErrorAndCreatePEL(const openpower::phal::sbeError_t& sbeError,
181                               uint64_t chipPos, SBETypes sbeType,
182                               uint32_t cmdClass, uint32_t cmdType);
183 
184     /**
185      * Determines the type of SBE for a given chip target.
186      *
187      * @param chip - A pointer to a pdbg_target structure representing the chip.
188      * @return The SBE type for the given chip target.
189      */
getSBEType(struct pdbg_target * chip)190     inline SBETypes getSBEType([[maybe_unused]] struct pdbg_target* chip)
191     {
192         if (is_ody_ocmb_chip(chip))
193         {
194             return SBETypes::OCMB;
195         }
196         return SBETypes::PROC;
197     }
198 
199     /**
200      * @brief Executes thread stop on a processor target
201      *
202      * If the Self Boot Engine (SBE) is not ready to accept chip operations
203      * (chip-ops), it logs the condition and excludes the processor from the
204      * dump collection process. For critical errors, such as a timeout during
205      * the stop operation, it logs the error and again excludes the processor.
206      * In case of SBE command failure or non-critical errors, it continues with
207      * the dump collection process.
208      *
209      * @param target Pointer to the pdbg target structure representing the
210      *               processor to perform the thread stop on.
211      * @return true If the thread stop was successful or in case of non-critical
212      *              errors where dump collection can proceed.
213      * @return false If the SBE is not ready for chip-ops or in case of critical
214      *               errors like timeouts, indicating the processor should be
215      *               excluded from the dump collection.
216      */
217     bool executeThreadStop(struct pdbg_target* target);
218 };
219 
220 } // namespace openpower::dump::sbe_chipop
221