1d84ed6e9SZane Shelley #include <assert.h>
29fb7393eSZane Shelley #include <unistd.h>
387eabc65SBen Tyner 
4ebff0d37SZane Shelley #include <analyzer/analyzer_main.hpp>
5a9b44344SZane Shelley #include <analyzer/ras-data/ras-data-parser.hpp>
64ed4be56SZane Shelley #include <analyzer/service_data.hpp>
77029e525SBen Tyner #include <attn/attn_dump.hpp>
80205f3b3SBen Tyner #include <hei_main.hpp>
9f4bd5ff6SZane Shelley #include <util/pdbg.hpp>
10d84ed6e9SZane Shelley #include <util/trace.hpp>
110205f3b3SBen Tyner 
120205f3b3SBen Tyner namespace analyzer
130205f3b3SBen Tyner {
14f4bd5ff6SZane Shelley //------------------------------------------------------------------------------
15b1ebfcb1SBen Tyner 
16f4bd5ff6SZane Shelley // Forward references for externally defined functions.
1787eabc65SBen Tyner 
18d3b9bac9SZane Shelley /**
19d3b9bac9SZane Shelley  * @brief Will get the list of active chip and initialize the isolator.
20d3b9bac9SZane Shelley  * @param o_chips The returned list of active chips.
21d3b9bac9SZane Shelley  */
22171a2e04SZane Shelley void initializeIsolator(std::vector<libhei::Chip>& o_chips);
2387eabc65SBen Tyner 
24d3b9bac9SZane Shelley /**
2565fefb2cSZane Shelley  * @brief  Will get the list of active chip and initialize the isolator.
26ec227c2cSZane Shelley  * @param  i_type      The type of analysis to perform. See enum for details.
2765fefb2cSZane Shelley  * @param  i_isoData   The data gathered during isolation (for FFDC).
2865fefb2cSZane Shelley  * @param  o_rootCause The returned root cause signature.
291a4f0e70SCaleb Palmer  * @param  i_rasData   The RAS data parser.
3065fefb2cSZane Shelley  * @return True, if root cause has been found. False, otherwise.
3165fefb2cSZane Shelley  */
32ec227c2cSZane Shelley bool filterRootCause(AnalysisType i_type,
33ec227c2cSZane Shelley                      const libhei::IsolationData& i_isoData,
341a4f0e70SCaleb Palmer                      libhei::Signature& o_rootCause,
351a4f0e70SCaleb Palmer                      const RasDataParser& i_rasData);
3665fefb2cSZane Shelley 
3765fefb2cSZane Shelley /**
38d3b9bac9SZane Shelley  * @brief Will create and submit a PEL using the given data.
394ed4be56SZane Shelley  * @param i_servData  Data regarding service actions gathered during analysis.
40611b3442SZane Shelley  * @return The platform log ID. Will return zero if no PEL is generated.
41d3b9bac9SZane Shelley  */
42c1e1c000SBen Tyner uint32_t commitPel(const ServiceData& i_servData);
43d3b9bac9SZane Shelley 
44d84ed6e9SZane Shelley //------------------------------------------------------------------------------
45d84ed6e9SZane Shelley 
__attn(libhei::AttentionType_t i_type)46ebff0d37SZane Shelley const char* __attn(libhei::AttentionType_t i_type)
472f263181SZane Shelley {
482f263181SZane Shelley     const char* str = "";
49ebff0d37SZane Shelley     switch (i_type)
502f263181SZane Shelley     {
51*adda0540SZane Shelley         case libhei::ATTN_TYPE_CHIP_CS:
52*adda0540SZane Shelley             str = "CHIP_CS";
532f263181SZane Shelley             break;
542f263181SZane Shelley         case libhei::ATTN_TYPE_UNIT_CS:
552f263181SZane Shelley             str = "UNIT_CS";
562f263181SZane Shelley             break;
572f263181SZane Shelley         case libhei::ATTN_TYPE_RECOVERABLE:
582f263181SZane Shelley             str = "RECOVERABLE";
592f263181SZane Shelley             break;
602f263181SZane Shelley         case libhei::ATTN_TYPE_SP_ATTN:
612f263181SZane Shelley             str = "SP_ATTN";
622f263181SZane Shelley             break;
632f263181SZane Shelley         case libhei::ATTN_TYPE_HOST_ATTN:
642f263181SZane Shelley             str = "HOST_ATTN";
652f263181SZane Shelley             break;
662f263181SZane Shelley         default:
67ebff0d37SZane Shelley             trace::err("Unsupported attention type: %u", i_type);
682f263181SZane Shelley             assert(0);
692f263181SZane Shelley     }
702f263181SZane Shelley     return str;
712f263181SZane Shelley }
722f263181SZane Shelley 
732f263181SZane Shelley //------------------------------------------------------------------------------
742f263181SZane Shelley 
__analysisType(AnalysisType i_type)75ebff0d37SZane Shelley const char* __analysisType(AnalysisType i_type)
76ebff0d37SZane Shelley {
77ebff0d37SZane Shelley     const char* str = "";
78ebff0d37SZane Shelley     switch (i_type)
79ebff0d37SZane Shelley     {
80ebff0d37SZane Shelley         case AnalysisType::SYSTEM_CHECKSTOP:
81ebff0d37SZane Shelley             str = "SYSTEM_CHECKSTOP";
82ebff0d37SZane Shelley             break;
83ebff0d37SZane Shelley         case AnalysisType::TERMINATE_IMMEDIATE:
84ebff0d37SZane Shelley             str = "TERMINATE_IMMEDIATE";
85ebff0d37SZane Shelley             break;
86ebff0d37SZane Shelley         case AnalysisType::MANUAL:
87ebff0d37SZane Shelley             str = "MANUAL";
88ebff0d37SZane Shelley             break;
89ebff0d37SZane Shelley         default:
90ebff0d37SZane Shelley             trace::err("Unsupported analysis type: %u", i_type);
91ebff0d37SZane Shelley             assert(0);
92ebff0d37SZane Shelley     }
93ebff0d37SZane Shelley     return str;
94ebff0d37SZane Shelley }
95ebff0d37SZane Shelley 
96ebff0d37SZane Shelley //------------------------------------------------------------------------------
97ebff0d37SZane Shelley 
analyzeHardware(AnalysisType i_type,attn::DumpParameters & o_dump)98ebff0d37SZane Shelley uint32_t analyzeHardware(AnalysisType i_type, attn::DumpParameters& o_dump)
9987eabc65SBen Tyner {
100611b3442SZane Shelley     uint32_t o_plid = 0; // default, zero indicates PEL was not created
10187eabc65SBen Tyner 
102e5411f0fSZane Shelley     if (!util::pdbg::queryHardwareAnalysisSupported())
103e5411f0fSZane Shelley     {
104e5411f0fSZane Shelley         trace::err("Hardware error analysis is not supported on this system");
105611b3442SZane Shelley         return o_plid;
106e5411f0fSZane Shelley     }
107e5411f0fSZane Shelley 
108ebff0d37SZane Shelley     trace::inf(">>> enter analyzeHardware(%s)", __analysisType(i_type));
1092f263181SZane Shelley 
110171a2e04SZane Shelley     // Initialize the isolator and get all of the chips to be analyzed.
111f4bd5ff6SZane Shelley     trace::inf("Initializing the isolator...");
112171a2e04SZane Shelley     std::vector<libhei::Chip> chips;
113f4bd5ff6SZane Shelley     initializeIsolator(chips);
1142e994bcdSZane Shelley 
115097a71adSZane Shelley     // Isolate attentions.
116f4bd5ff6SZane Shelley     trace::inf("Isolating errors: # of chips=%u", chips.size());
117097a71adSZane Shelley     libhei::IsolationData isoData{};
118f4bd5ff6SZane Shelley     libhei::isolate(chips, isoData);
11987eabc65SBen Tyner 
12065fefb2cSZane Shelley     // For debug, trace out the original list of signatures before filtering.
12165fefb2cSZane Shelley     for (const auto& sig : isoData.getSignatureList())
12265fefb2cSZane Shelley     {
12365fefb2cSZane Shelley         trace::inf("Signature: %s 0x%0" PRIx32 " %s",
12465fefb2cSZane Shelley                    util::pdbg::getPath(sig.getChip()), sig.toUint32(),
12565fefb2cSZane Shelley                    __attn(sig.getAttnType()));
12665fefb2cSZane Shelley     }
12765fefb2cSZane Shelley 
128e5411f0fSZane Shelley     // Filter for root cause attention.
129e5411f0fSZane Shelley     libhei::Signature rootCause{};
1301a4f0e70SCaleb Palmer     RasDataParser rasData{};
1315836f4a6SZane Shelley     bool attnFound = false;
1325836f4a6SZane Shelley     try
1335836f4a6SZane Shelley     {
1345836f4a6SZane Shelley         attnFound = filterRootCause(i_type, isoData, rootCause, rasData);
1355836f4a6SZane Shelley     }
1365836f4a6SZane Shelley     catch (const std::exception& e)
1375836f4a6SZane Shelley     {
1385836f4a6SZane Shelley         trace::err("Exception caught during root cause filtering");
1395836f4a6SZane Shelley         trace::err(e.what());
1405836f4a6SZane Shelley         attnFound = false; // just in case
1415836f4a6SZane Shelley     }
142e5411f0fSZane Shelley 
143b7879d3dSZane Shelley     // If a root cause attention was found, or if this was a system checkstop,
144b7879d3dSZane Shelley     // generate a PEL.
145b7879d3dSZane Shelley     if (attnFound || AnalysisType::SYSTEM_CHECKSTOP == i_type)
146e5411f0fSZane Shelley     {
147b7879d3dSZane Shelley         if (attnFound)
148e5411f0fSZane Shelley         {
149e5411f0fSZane Shelley             trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
150e5411f0fSZane Shelley                        util::pdbg::getPath(rootCause.getChip()),
151e5411f0fSZane Shelley                        rootCause.toUint32(), __attn(rootCause.getAttnType()));
152b7879d3dSZane Shelley         }
153b7879d3dSZane Shelley         else
154b7879d3dSZane Shelley         {
155b7879d3dSZane Shelley             // This is bad. Analysis should have found a root cause attention
156b7879d3dSZane Shelley             // for a system checkstop. Issues could range from code bugs to SCOM
157b7879d3dSZane Shelley             // errors. Regardless, generate a PEL with FFDC to assist with
158b7879d3dSZane Shelley             // debug.
159b7879d3dSZane Shelley             trace::err("System checkstop with no root cause attention");
160b7879d3dSZane Shelley             rootCause = libhei::Signature{}; // just in case
161b7879d3dSZane Shelley         }
162e5411f0fSZane Shelley 
163b7879d3dSZane Shelley         // Start building the service data.
16462adf5c2SZane Shelley         ServiceData servData{rootCause, i_type, isoData};
165b7879d3dSZane Shelley 
166b7879d3dSZane Shelley         // Apply any service actions, if needed. Note that there are no
167b7879d3dSZane Shelley         // resolutions for manual analysis.
168b7879d3dSZane Shelley         if (AnalysisType::MANUAL != i_type)
169b7879d3dSZane Shelley         {
170b7879d3dSZane Shelley             if (attnFound)
171b7879d3dSZane Shelley             {
1722fbd267eSZane Shelley                 try
1732fbd267eSZane Shelley                 {
174b7879d3dSZane Shelley                     // Resolve the root cause attention.
175a9b44344SZane Shelley                     rasData.getResolution(rootCause)->resolve(servData);
176b7879d3dSZane Shelley                 }
1772fbd267eSZane Shelley                 catch (const std::exception& e)
1782fbd267eSZane Shelley                 {
1792fbd267eSZane Shelley                     trace::err("Exception caught during root cause analysis");
1802fbd267eSZane Shelley                     trace::err(e.what());
1812fbd267eSZane Shelley 
1822fbd267eSZane Shelley                     // We'll still want to create a PEL for the FFDC, but
1832fbd267eSZane Shelley                     // since the analysis failed, we need to callout Level 2
1842fbd267eSZane Shelley                     // Support.
1852fbd267eSZane Shelley                     servData.calloutProcedure(callout::Procedure::NEXTLVL,
1862fbd267eSZane Shelley                                               callout::Priority::HIGH);
1872fbd267eSZane Shelley                 }
1882fbd267eSZane Shelley             }
189b7879d3dSZane Shelley             else
190b7879d3dSZane Shelley             {
1912fbd267eSZane Shelley                 // Analysis failed so callout the Level 2 Support.
1928af56854SZane Shelley                 servData.calloutProcedure(callout::Procedure::NEXTLVL,
1938af56854SZane Shelley                                           callout::Priority::HIGH);
194b7879d3dSZane Shelley             }
195b7879d3dSZane Shelley         }
196e5411f0fSZane Shelley 
197e5411f0fSZane Shelley         // Create and commit a PEL.
198c1e1c000SBen Tyner         o_plid = commitPel(servData);
1997029e525SBen Tyner 
200611b3442SZane Shelley         if (0 == o_plid)
201611b3442SZane Shelley         {
202611b3442SZane Shelley             trace::err("Failed to create PEL");
203611b3442SZane Shelley         }
204611b3442SZane Shelley         else
205611b3442SZane Shelley         {
206611b3442SZane Shelley             trace::inf("PEL created: PLID=0x%0" PRIx32, o_plid);
207bf3326fbSZane Shelley 
20804f010abSZane Shelley             // Gather/return information needed for dump. A hardware dump will
209611b3442SZane Shelley             // always be used for system checkstop attenions. Software dumps
210611b3442SZane Shelley             // will be reserved for MP-IPLs during TI analysis.
211611b3442SZane Shelley             // TODO: Need ID from root cause. At the moment, HUID does not exist
212611b3442SZane Shelley             //       in devtree. Will need a better ID definition.
213ebff0d37SZane Shelley             o_dump.unitId = 0;
214ebff0d37SZane Shelley             o_dump.dumpType = attn::DumpType::Hardware;
215e5411f0fSZane Shelley         }
216611b3442SZane Shelley     }
217b7879d3dSZane Shelley     else
218b7879d3dSZane Shelley     {
219b7879d3dSZane Shelley         // It is possible for TI handling, or manually initiated analysis via
220b7879d3dSZane Shelley         // the command line, that there will not be an active attention. In
221b7879d3dSZane Shelley         // which case, we will do nothing and let the caller of this function
222b7879d3dSZane Shelley         // determine if this is the expected behavior.
223b7879d3dSZane Shelley         trace::inf("No active attentions found");
224b7879d3dSZane Shelley     }
22587eabc65SBen Tyner 
226097a71adSZane Shelley     // All done, clean up the isolator.
227f4bd5ff6SZane Shelley     trace::inf("Uninitializing isolator...");
228097a71adSZane Shelley     libhei::uninitialize();
229b1ebfcb1SBen Tyner 
2302f263181SZane Shelley     trace::inf("<<< exit analyzeHardware()");
2312f263181SZane Shelley 
232611b3442SZane Shelley     return o_plid;
2330205f3b3SBen Tyner }
2340205f3b3SBen Tyner 
235eea45427SBen Tyner //------------------------------------------------------------------------------
236eea45427SBen Tyner 
2370205f3b3SBen Tyner } // namespace analyzer
238