xref: /openbmc/openpower-hw-diags/analyzer/filter-root-cause.cpp (revision 34b0ce19b894e4e1b56d33134898a5919d8e2e48)
165fefb2cSZane Shelley #include <assert.h>
265fefb2cSZane Shelley 
31a4f0e70SCaleb Palmer #include <analyzer/analyzer_main.hpp>
41a4f0e70SCaleb Palmer #include <analyzer/ras-data/ras-data-parser.hpp>
565fefb2cSZane Shelley #include <hei_main.hpp>
619df3706SZane Shelley #include <hei_util.hpp>
7f4792d68SZane Shelley #include <util/pdbg.hpp>
865fefb2cSZane Shelley 
965fefb2cSZane Shelley #include <algorithm>
1065fefb2cSZane Shelley #include <limits>
1165fefb2cSZane Shelley #include <string>
1265fefb2cSZane Shelley 
1365fefb2cSZane Shelley namespace analyzer
1465fefb2cSZane Shelley {
1565fefb2cSZane Shelley //------------------------------------------------------------------------------
1665fefb2cSZane Shelley 
__findRcsOscError(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause)17a7369f86SZane Shelley bool __findRcsOscError(const std::vector<libhei::Signature>& i_list,
18a7369f86SZane Shelley                        libhei::Signature& o_rootCause)
19a7369f86SZane Shelley {
20a7369f86SZane Shelley     // TODO: Consider returning all of them instead of one as root cause.
21a7369f86SZane Shelley     auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
2219df3706SZane Shelley         return (libhei::hash<libhei::NodeId_t>("TP_LOCAL_FIR") == t.getId() &&
23a7369f86SZane Shelley                 (42 == t.getBit() || 43 == t.getBit()));
24a7369f86SZane Shelley     });
25a7369f86SZane Shelley 
26a7369f86SZane Shelley     if (i_list.end() != itr)
27a7369f86SZane Shelley     {
28a7369f86SZane Shelley         o_rootCause = *itr;
29a7369f86SZane Shelley         return true;
30a7369f86SZane Shelley     }
31a7369f86SZane Shelley 
32a7369f86SZane Shelley     return false;
33a7369f86SZane Shelley }
34a7369f86SZane Shelley 
35a7369f86SZane Shelley //------------------------------------------------------------------------------
36a7369f86SZane Shelley 
__findPllUnlock(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause)37a7369f86SZane Shelley bool __findPllUnlock(const std::vector<libhei::Signature>& i_list,
38a7369f86SZane Shelley                      libhei::Signature& o_rootCause)
39a7369f86SZane Shelley {
40c62813d4SZane Shelley     using namespace util::pdbg;
41c62813d4SZane Shelley 
42a7369f86SZane Shelley     // TODO: Consider returning all of them instead of one as root cause.
43c62813d4SZane Shelley 
44c62813d4SZane Shelley     auto nodeId = libhei::hash<libhei::NodeId_t>("PLL_UNLOCK");
45c62813d4SZane Shelley 
46c62813d4SZane Shelley     // First, look for any PLL unlock attentions reported by a processsor chip.
47c62813d4SZane Shelley     auto itr1 = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
48c62813d4SZane Shelley         return (nodeId == t.getId() &&
49c62813d4SZane Shelley                 TYPE_PROC == getTrgtType(getTrgt(t.getChip())));
50a7369f86SZane Shelley     });
51a7369f86SZane Shelley 
52c62813d4SZane Shelley     if (i_list.end() != itr1)
53a7369f86SZane Shelley     {
54c62813d4SZane Shelley         o_rootCause = *itr1;
55c62813d4SZane Shelley         return true;
56c62813d4SZane Shelley     }
57c62813d4SZane Shelley 
58c62813d4SZane Shelley     // Then, look for any PLL unlock attentions reported by an OCMB chip. This
59c62813d4SZane Shelley     // is specifically for Odyssey, which are the only OCMBs that would report
60c62813d4SZane Shelley     // PLL unlock attentions.
61c62813d4SZane Shelley     auto itr2 = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
62c62813d4SZane Shelley         return (nodeId == t.getId() &&
63c62813d4SZane Shelley                 TYPE_OCMB == getTrgtType(getTrgt(t.getChip())));
64c62813d4SZane Shelley     });
65c62813d4SZane Shelley 
66c62813d4SZane Shelley     if (i_list.end() != itr2)
67c62813d4SZane Shelley     {
68c62813d4SZane Shelley         o_rootCause = *itr2;
69a7369f86SZane Shelley         return true;
70a7369f86SZane Shelley     }
71a7369f86SZane Shelley 
72a7369f86SZane Shelley     return false;
73a7369f86SZane Shelley }
74a7369f86SZane Shelley 
75a7369f86SZane Shelley //------------------------------------------------------------------------------
76a7369f86SZane Shelley 
__findMemoryChannelFailure(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause,const RasDataParser & i_rasData)77f4792d68SZane Shelley bool __findMemoryChannelFailure(const std::vector<libhei::Signature>& i_list,
781a4f0e70SCaleb Palmer                                 libhei::Signature& o_rootCause,
791a4f0e70SCaleb Palmer                                 const RasDataParser& i_rasData)
80f4792d68SZane Shelley {
81f4792d68SZane Shelley     using namespace util::pdbg;
82f4792d68SZane Shelley 
8319df3706SZane Shelley     using func = libhei::NodeId_t (*)(const std::string& i_str);
8419df3706SZane Shelley     func __hash = libhei::hash<libhei::NodeId_t>;
8519df3706SZane Shelley 
86adda0540SZane Shelley     static const auto mc_dstl_fir = __hash("MC_DSTL_FIR");
87adda0540SZane Shelley     static const auto mc_ustl_fir = __hash("MC_USTL_FIR");
8819df3706SZane Shelley     static const auto mc_omi_dl_err_rpt = __hash("MC_OMI_DL_ERR_RPT");
89f4792d68SZane Shelley 
90adda0540SZane Shelley     // First, look for any chip checkstops from the connected OCMBs.
91adda0540SZane Shelley     for (const auto& s : i_list)
92f4792d68SZane Shelley     {
93adda0540SZane Shelley         if (TYPE_OCMB != getTrgtType(getTrgt(s.getChip())))
941a4f0e70SCaleb Palmer         {
95adda0540SZane Shelley             continue; // OCMBs only
96adda0540SZane Shelley         }
97adda0540SZane Shelley 
98adda0540SZane Shelley         // TODO: The chip data for Explorer chips currently report chip
99adda0540SZane Shelley         //       checkstops as unit checkstops. Once the chip data has been
100adda0540SZane Shelley         //       updated, the check for unit checkstops here will need to be
101adda0540SZane Shelley         //       removed.
102adda0540SZane Shelley         if (libhei::ATTN_TYPE_CHIP_CS == s.getAttnType() ||
103adda0540SZane Shelley             libhei::ATTN_TYPE_UNIT_CS == s.getAttnType())
104adda0540SZane Shelley         {
1051a4f0e70SCaleb Palmer             o_rootCause = s;
106adda0540SZane Shelley             return true;
1071a4f0e70SCaleb Palmer         }
108adda0540SZane Shelley     }
109adda0540SZane Shelley 
110adda0540SZane Shelley     // Now, look for any channel failure attentions on the processor side of the
111adda0540SZane Shelley     // memory bus.
112adda0540SZane Shelley     for (const auto& s : i_list)
113adda0540SZane Shelley     {
114adda0540SZane Shelley         if (TYPE_PROC != getTrgtType(getTrgt(s.getChip())))
115adda0540SZane Shelley         {
116adda0540SZane Shelley             continue; // processors only
117adda0540SZane Shelley         }
118adda0540SZane Shelley 
119adda0540SZane Shelley         // Any unit checkstop attentions that originated from the MC_DSTL_FIR or
120adda0540SZane Shelley         // MC_USTLFIR are considered a channel failure attention.
121adda0540SZane Shelley         // TODO: The "channel failure" designation is actually configurable via
122adda0540SZane Shelley         //       other registers. We just happen to expect anything that is
123adda0540SZane Shelley         //       configured to channel failure to also be configured to unit
124adda0540SZane Shelley         //       checkstop. Eventually, we will need some mechanism to check the
125adda0540SZane Shelley         //       configuration registers for a more accurate analysis.
126adda0540SZane Shelley         if (libhei::ATTN_TYPE_UNIT_CS == s.getAttnType() &&
127adda0540SZane Shelley             (mc_dstl_fir == s.getId() || mc_ustl_fir == s.getId()) &&
128adda0540SZane Shelley             !i_rasData.isFlagSet(s,
129adda0540SZane Shelley                                  RasDataParser::RasDataFlags::ATTN_FROM_OCMB))
130adda0540SZane Shelley         {
131adda0540SZane Shelley             o_rootCause = s;
132adda0540SZane Shelley             return true;
133adda0540SZane Shelley         }
134adda0540SZane Shelley         // Any signatures from MC_OMI_DL_ERR_RPT feed into the only bits in
135adda0540SZane Shelley         // MC_OMI_DL_FIR that are hardwired to channel failure.
1361a4f0e70SCaleb Palmer         else if (mc_omi_dl_err_rpt == s.getId())
1371a4f0e70SCaleb Palmer         {
1381a4f0e70SCaleb Palmer             o_rootCause = s;
1391a4f0e70SCaleb Palmer             return true;
1401a4f0e70SCaleb Palmer         }
1411a4f0e70SCaleb Palmer     }
142f4792d68SZane Shelley 
143f4792d68SZane Shelley     return false; // default, nothing found
144f4792d68SZane Shelley }
145f4792d68SZane Shelley 
146f4792d68SZane Shelley //------------------------------------------------------------------------------
147f4792d68SZane Shelley 
148f4792d68SZane Shelley // Will query if a signature is a potential system checkstop root cause.
149f4792d68SZane Shelley // attention. Note that this function excludes memory channel failure attentions
150ed3ab8f9SZane Shelley // which are checked in __findMemoryChannelFailure().
__findCsRootCause(const libhei::Signature & i_signature,const RasDataParser & i_rasData)1511a4f0e70SCaleb Palmer bool __findCsRootCause(const libhei::Signature& i_signature,
1521a4f0e70SCaleb Palmer                        const RasDataParser& i_rasData)
1531a4f0e70SCaleb Palmer {
15493b001c5SZane Shelley     // Check if the input signature has the CS_POSSIBLE or SUE_SOURCE flag set.
15593b001c5SZane Shelley     if (i_rasData.isFlagSet(i_signature,
1561a4f0e70SCaleb Palmer                             RasDataParser::RasDataFlags::CS_POSSIBLE) ||
1571a4f0e70SCaleb Palmer         i_rasData.isFlagSet(i_signature,
1581a4f0e70SCaleb Palmer                             RasDataParser::RasDataFlags::SUE_SOURCE))
1591a4f0e70SCaleb Palmer     {
1601a4f0e70SCaleb Palmer         return true;
1611a4f0e70SCaleb Palmer     }
162f4792d68SZane Shelley 
163f4792d68SZane Shelley     return false; // default, nothing found
164f4792d68SZane Shelley }
165f4792d68SZane Shelley 
166f4792d68SZane Shelley //------------------------------------------------------------------------------
167f4792d68SZane Shelley 
__findCsRootCause_RE(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause,const RasDataParser & i_rasData)168f4792d68SZane Shelley bool __findCsRootCause_RE(const std::vector<libhei::Signature>& i_list,
1691a4f0e70SCaleb Palmer                           libhei::Signature& o_rootCause,
1701a4f0e70SCaleb Palmer                           const RasDataParser& i_rasData)
171f4792d68SZane Shelley {
172adda0540SZane Shelley     for (const auto& s : i_list)
173f4792d68SZane Shelley     {
174f4792d68SZane Shelley         // Only looking for recoverable attentions.
175f4792d68SZane Shelley         if (libhei::ATTN_TYPE_RECOVERABLE != s.getAttnType())
176f4792d68SZane Shelley         {
177f4792d68SZane Shelley             continue;
178f4792d68SZane Shelley         }
179f4792d68SZane Shelley 
1801a4f0e70SCaleb Palmer         if (__findCsRootCause(s, i_rasData))
181f4792d68SZane Shelley         {
182f4792d68SZane Shelley             o_rootCause = s;
183f4792d68SZane Shelley             return true;
184f4792d68SZane Shelley         }
185f4792d68SZane Shelley     }
186f4792d68SZane Shelley 
187f4792d68SZane Shelley     return false; // default, nothing found
188f4792d68SZane Shelley }
189f4792d68SZane Shelley 
190f4792d68SZane Shelley //------------------------------------------------------------------------------
191f4792d68SZane Shelley 
__findCsRootCause_UCS(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause,const RasDataParser & i_rasData)192f4792d68SZane Shelley bool __findCsRootCause_UCS(const std::vector<libhei::Signature>& i_list,
1931a4f0e70SCaleb Palmer                            libhei::Signature& o_rootCause,
1941a4f0e70SCaleb Palmer                            const RasDataParser& i_rasData)
195f4792d68SZane Shelley {
196adda0540SZane Shelley     for (const auto& s : i_list)
197f4792d68SZane Shelley     {
198f4792d68SZane Shelley         // Only looking for unit checkstop attentions.
199f4792d68SZane Shelley         if (libhei::ATTN_TYPE_UNIT_CS != s.getAttnType())
200f4792d68SZane Shelley         {
201f4792d68SZane Shelley             continue;
202f4792d68SZane Shelley         }
203f4792d68SZane Shelley 
2041a4f0e70SCaleb Palmer         if (__findCsRootCause(s, i_rasData))
205f4792d68SZane Shelley         {
206f4792d68SZane Shelley             o_rootCause = s;
207f4792d68SZane Shelley             return true;
208f4792d68SZane Shelley         }
209f4792d68SZane Shelley     }
210f4792d68SZane Shelley 
211f4792d68SZane Shelley     return false; // default, nothing found
212f4792d68SZane Shelley }
213f4792d68SZane Shelley 
214f4792d68SZane Shelley //------------------------------------------------------------------------------
215f4792d68SZane Shelley 
__findOcmbAttnBits(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause,const RasDataParser & i_rasData)21651f8202cSCaleb Palmer bool __findOcmbAttnBits(const std::vector<libhei::Signature>& i_list,
21751f8202cSCaleb Palmer                         libhei::Signature& o_rootCause,
21851f8202cSCaleb Palmer                         const RasDataParser& i_rasData)
21951f8202cSCaleb Palmer {
22051f8202cSCaleb Palmer     using namespace util::pdbg;
22151f8202cSCaleb Palmer 
22251f8202cSCaleb Palmer     // If we have any attentions from an OCMB, assume isolation to the OCMBs
22351f8202cSCaleb Palmer     // was successful and the ATTN_FROM_OCMB flag does not need to be checked.
224adda0540SZane Shelley     for (const auto& s : i_list)
22551f8202cSCaleb Palmer     {
22651f8202cSCaleb Palmer         if (TYPE_OCMB == getTrgtType(getTrgt(s.getChip())))
22751f8202cSCaleb Palmer         {
22851f8202cSCaleb Palmer             return false;
22951f8202cSCaleb Palmer         }
23051f8202cSCaleb Palmer     }
23151f8202cSCaleb Palmer 
232adda0540SZane Shelley     for (const auto& s : i_list)
23351f8202cSCaleb Palmer     {
23493b001c5SZane Shelley         if (i_rasData.isFlagSet(s, RasDataParser::RasDataFlags::ATTN_FROM_OCMB))
23551f8202cSCaleb Palmer         {
23651f8202cSCaleb Palmer             o_rootCause = s;
23751f8202cSCaleb Palmer             return true;
23851f8202cSCaleb Palmer         }
23951f8202cSCaleb Palmer     }
24051f8202cSCaleb Palmer 
24151f8202cSCaleb Palmer     return false; // default, nothing found
24251f8202cSCaleb Palmer }
24351f8202cSCaleb Palmer 
24451f8202cSCaleb Palmer //------------------------------------------------------------------------------
24551f8202cSCaleb Palmer 
__findNonExternalCs(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause)246f4792d68SZane Shelley bool __findNonExternalCs(const std::vector<libhei::Signature>& i_list,
247f4792d68SZane Shelley                          libhei::Signature& o_rootCause)
248f4792d68SZane Shelley {
249f4792d68SZane Shelley     using namespace util::pdbg;
250f4792d68SZane Shelley 
25119df3706SZane Shelley     static const auto pb_ext_fir = libhei::hash<libhei::NodeId_t>("PB_EXT_FIR");
252f4792d68SZane Shelley 
253adda0540SZane Shelley     for (const auto& s : i_list)
254f4792d68SZane Shelley     {
255f4792d68SZane Shelley         const auto targetType = getTrgtType(getTrgt(s.getChip()));
256f4792d68SZane Shelley         const auto id = s.getId();
257f4792d68SZane Shelley         const auto attnType = s.getAttnType();
258f4792d68SZane Shelley 
259adda0540SZane Shelley         // Find any processor with chip checkstop attention that did not
260f4792d68SZane Shelley         // originate from the PB_EXT_FIR.
261f4792d68SZane Shelley         if ((TYPE_PROC == targetType) &&
262adda0540SZane Shelley             (libhei::ATTN_TYPE_CHIP_CS == attnType) && (pb_ext_fir != id))
263f4792d68SZane Shelley         {
264f4792d68SZane Shelley             o_rootCause = s;
265f4792d68SZane Shelley             return true;
266f4792d68SZane Shelley         }
267f4792d68SZane Shelley     }
268f4792d68SZane Shelley 
269f4792d68SZane Shelley     return false; // default, nothing found
270f4792d68SZane Shelley }
271f4792d68SZane Shelley 
272f4792d68SZane Shelley //------------------------------------------------------------------------------
273f4792d68SZane Shelley 
__findTiRootCause(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause,const RasDataParser & i_rasData)274baec7c01SZane Shelley bool __findTiRootCause(const std::vector<libhei::Signature>& i_list,
275622cd4beSCaleb Palmer                        libhei::Signature& o_rootCause,
276622cd4beSCaleb Palmer                        const RasDataParser& i_rasData)
277baec7c01SZane Shelley {
278baec7c01SZane Shelley     using namespace util::pdbg;
279622cd4beSCaleb Palmer     using rdf = RasDataParser::RasDataFlags;
280baec7c01SZane Shelley 
281baec7c01SZane Shelley     for (const auto& signature : i_list)
282baec7c01SZane Shelley     {
283baec7c01SZane Shelley         const auto attnType = signature.getAttnType();
284baec7c01SZane Shelley 
285baec7c01SZane Shelley         // Only looking for recoverable or unit checkstop attentions.
286baec7c01SZane Shelley         if (libhei::ATTN_TYPE_RECOVERABLE != attnType &&
287baec7c01SZane Shelley             libhei::ATTN_TYPE_UNIT_CS != attnType)
288baec7c01SZane Shelley         {
289baec7c01SZane Shelley             continue;
290baec7c01SZane Shelley         }
291baec7c01SZane Shelley 
292*34b0ce19SCaleb Palmer         // Skip any signature with the 'recovered_error', 'informational_only',
293*34b0ce19SCaleb Palmer         // or 'attn_from_ocmb' flags.
294622cd4beSCaleb Palmer         if (i_rasData.isFlagSet(signature, rdf::RECOVERED_ERROR) ||
295622cd4beSCaleb Palmer             i_rasData.isFlagSet(signature, rdf::INFORMATIONAL_ONLY) ||
296*34b0ce19SCaleb Palmer             i_rasData.isFlagSet(signature, rdf::MNFG_INFORMATIONAL_ONLY) ||
297*34b0ce19SCaleb Palmer             i_rasData.isFlagSet(signature, rdf::ATTN_FROM_OCMB))
298baec7c01SZane Shelley         {
299baec7c01SZane Shelley             continue;
300baec7c01SZane Shelley         }
301baec7c01SZane Shelley 
302baec7c01SZane Shelley         // At this point, the attention has not been explicitly ignored. So
303baec7c01SZane Shelley         // return this signature and exit.
304baec7c01SZane Shelley         o_rootCause = signature;
305baec7c01SZane Shelley         return true;
306baec7c01SZane Shelley     }
307baec7c01SZane Shelley 
308baec7c01SZane Shelley     return false; // default, nothing found
309baec7c01SZane Shelley }
310baec7c01SZane Shelley 
311baec7c01SZane Shelley //------------------------------------------------------------------------------
312baec7c01SZane Shelley 
findRootCause(AnalysisType i_type,const libhei::IsolationData & i_isoData,libhei::Signature & o_rootCause,const RasDataParser & i_rasData)313c3038c03SCaleb Palmer bool findRootCause(AnalysisType i_type, const libhei::IsolationData& i_isoData,
3141a4f0e70SCaleb Palmer                    libhei::Signature& o_rootCause,
3151a4f0e70SCaleb Palmer                    const RasDataParser& i_rasData)
31665fefb2cSZane Shelley {
31765fefb2cSZane Shelley     // We'll need to make a copy of the list so that the original list is
318ec227c2cSZane Shelley     // maintained for the PEL.
31965fefb2cSZane Shelley     std::vector<libhei::Signature> list{i_isoData.getSignatureList()};
32065fefb2cSZane Shelley 
32165fefb2cSZane Shelley     // START WORKAROUND
32265fefb2cSZane Shelley     // TODO: Filtering should be data driven. Until that support is available,
32365fefb2cSZane Shelley     //       use the following isolation rules.
32465fefb2cSZane Shelley 
325ec227c2cSZane Shelley     // Ensure the list is not empty before continuing.
326f4792d68SZane Shelley     if (list.empty())
327f4792d68SZane Shelley     {
328ec227c2cSZane Shelley         return false; // nothing more to do
329f4792d68SZane Shelley     }
330f4792d68SZane Shelley 
331f4792d68SZane Shelley     // First, look for any RCS OSC errors. This must always be first because
332f4792d68SZane Shelley     // they can cause downstream PLL unlock attentions.
333f4792d68SZane Shelley     if (__findRcsOscError(list, o_rootCause))
334a7369f86SZane Shelley     {
335a7369f86SZane Shelley         return true;
336a7369f86SZane Shelley     }
337a7369f86SZane Shelley 
338f4792d68SZane Shelley     // Second, look for any PLL unlock attentions. This must always be second
339f4792d68SZane Shelley     // because PLL unlock attentions can cause any number of downstream
340f4792d68SZane Shelley     // attentions, including a system checkstop.
341f4792d68SZane Shelley     if (__findPllUnlock(list, o_rootCause))
342f4792d68SZane Shelley     {
343f4792d68SZane Shelley         return true;
344f4792d68SZane Shelley     }
345f4792d68SZane Shelley 
346ec227c2cSZane Shelley     // Regardless of the analysis type, always look for anything that could be
347ec227c2cSZane Shelley     // blamed as the root cause of a system checkstop.
348ec227c2cSZane Shelley 
349f4792d68SZane Shelley     // Memory channel failure attentions will produce SUEs and likely cause
350f4792d68SZane Shelley     // downstream attentions, including a system checkstop.
3511a4f0e70SCaleb Palmer     if (__findMemoryChannelFailure(list, o_rootCause, i_rasData))
352f4792d68SZane Shelley     {
353f4792d68SZane Shelley         return true;
354f4792d68SZane Shelley     }
355f4792d68SZane Shelley 
356f4792d68SZane Shelley     // Look for any recoverable attentions that have been identified as a
357f4792d68SZane Shelley     // potential root cause of a system checkstop attention. These would include
358f4792d68SZane Shelley     // any attention that would generate an SUE. Note that is it possible for
359f4792d68SZane Shelley     // recoverables to generate unit checkstop attentions so we must check them
360f4792d68SZane Shelley     // first.
3611a4f0e70SCaleb Palmer     if (__findCsRootCause_RE(list, o_rootCause, i_rasData))
362f4792d68SZane Shelley     {
363f4792d68SZane Shelley         return true;
364f4792d68SZane Shelley     }
365f4792d68SZane Shelley 
366f4792d68SZane Shelley     // Look for any unit checkstop attentions (other than memory channel
367f4792d68SZane Shelley     // failures) that have been identified as a potential root cause of a
368f4792d68SZane Shelley     // system checkstop attention. These would include any attention that would
369f4792d68SZane Shelley     // generate an SUE.
3701a4f0e70SCaleb Palmer     if (__findCsRootCause_UCS(list, o_rootCause, i_rasData))
371f4792d68SZane Shelley     {
372f4792d68SZane Shelley         return true;
373f4792d68SZane Shelley     }
374f4792d68SZane Shelley 
37593b001c5SZane Shelley     // If no other viable root cause has been found, check for any signatures
37693b001c5SZane Shelley     // with the ATTN_FROM_OCMB flag in case there was an attention from an
37793b001c5SZane Shelley     // inaccessible OCMB.
37851f8202cSCaleb Palmer     if (__findOcmbAttnBits(list, o_rootCause, i_rasData))
37951f8202cSCaleb Palmer     {
38051f8202cSCaleb Palmer         return true;
38151f8202cSCaleb Palmer     }
38251f8202cSCaleb Palmer 
383f4792d68SZane Shelley     // Look for any system checkstop attentions that originated from within the
384f4792d68SZane Shelley     // chip that reported the attention. In other words, no external checkstop
385f4792d68SZane Shelley     // attentions.
386f4792d68SZane Shelley     if (__findNonExternalCs(list, o_rootCause))
387f4792d68SZane Shelley     {
388f4792d68SZane Shelley         return true;
389f4792d68SZane Shelley     }
390f4792d68SZane Shelley 
391ec227c2cSZane Shelley     if (AnalysisType::SYSTEM_CHECKSTOP != i_type)
392f4792d68SZane Shelley     {
393ec227c2cSZane Shelley         // No system checkstop root cause attentions were found. Next, look for
394ec227c2cSZane Shelley         // any recoverable or unit checkstop attentions that could be associated
395baec7c01SZane Shelley         // with a TI.
396622cd4beSCaleb Palmer         if (__findTiRootCause(list, o_rootCause, i_rasData))
397ec227c2cSZane Shelley         {
398ec227c2cSZane Shelley             return true;
399ec227c2cSZane Shelley         }
400ec227c2cSZane Shelley 
401ec227c2cSZane Shelley         if (AnalysisType::TERMINATE_IMMEDIATE != i_type)
402ec227c2cSZane Shelley         {
403ec227c2cSZane Shelley             // No attentions associated with a system checkstop or TI were
404ec227c2cSZane Shelley             // found. Simply, return the first entry in the list.
40565fefb2cSZane Shelley             o_rootCause = list.front();
40665fefb2cSZane Shelley             return true;
40765fefb2cSZane Shelley         }
408ec227c2cSZane Shelley     }
40965fefb2cSZane Shelley 
41065fefb2cSZane Shelley     // END WORKAROUND
41165fefb2cSZane Shelley 
41265fefb2cSZane Shelley     return false; // default, no active attentions found.
41365fefb2cSZane Shelley }
41465fefb2cSZane Shelley 
41565fefb2cSZane Shelley //------------------------------------------------------------------------------
41665fefb2cSZane Shelley 
__findIueTh(const std::vector<libhei::Signature> & i_list,libhei::Signature & o_rootCause)417c3038c03SCaleb Palmer bool __findIueTh(const std::vector<libhei::Signature>& i_list,
418c3038c03SCaleb Palmer                  libhei::Signature& o_rootCause)
419c3038c03SCaleb Palmer {
420c3038c03SCaleb Palmer     auto itr = std::find_if(i_list.begin(), i_list.end(), [&](const auto& t) {
421c3038c03SCaleb Palmer         return (libhei::hash<libhei::NodeId_t>("RDFFIR") == t.getId() &&
422c3038c03SCaleb Palmer                 (17 == t.getBit() || 37 == t.getBit())) ||
423c3038c03SCaleb Palmer                (libhei::hash<libhei::NodeId_t>("RDF_FIR") == t.getId() &&
424c3038c03SCaleb Palmer                 (18 == t.getBit() || 38 == t.getBit()));
425c3038c03SCaleb Palmer     });
426c3038c03SCaleb Palmer 
427c3038c03SCaleb Palmer     if (i_list.end() != itr)
428c3038c03SCaleb Palmer     {
429c3038c03SCaleb Palmer         o_rootCause = *itr;
430c3038c03SCaleb Palmer         return true;
431c3038c03SCaleb Palmer     }
432c3038c03SCaleb Palmer 
433c3038c03SCaleb Palmer     return false;
434c3038c03SCaleb Palmer }
435c3038c03SCaleb Palmer 
436c3038c03SCaleb Palmer //------------------------------------------------------------------------------
437c3038c03SCaleb Palmer 
rootCauseSpecialCases(const libhei::IsolationData & i_isoData,libhei::Signature & o_rootCause,const RasDataParser & i_rasData)438c3038c03SCaleb Palmer void rootCauseSpecialCases(const libhei::IsolationData& i_isoData,
439c3038c03SCaleb Palmer                            libhei::Signature& o_rootCause,
440c3038c03SCaleb Palmer                            const RasDataParser& i_rasData)
441c3038c03SCaleb Palmer {
442c3038c03SCaleb Palmer     using func = libhei::NodeId_t (*)(const std::string& i_str);
443c3038c03SCaleb Palmer     func __hash = libhei::hash<libhei::NodeId_t>;
444c3038c03SCaleb Palmer 
445c3038c03SCaleb Palmer     // Check for any special cases that exist for specific FIR bits.
446c3038c03SCaleb Palmer 
447c3038c03SCaleb Palmer     // If the channel fail was specifically a firmware initiated channel fail
448c3038c03SCaleb Palmer     // (SRQFIR[25] for Explorer OCMBs, SRQ_FIR[46] for Odyssey OCMBs) check for
449c3038c03SCaleb Palmer     // any IUE bits that are on that would have caused the channel fail
450c3038c03SCaleb Palmer     // (RDFFIR[17,37] for Explorer OCMBs, RDF_FIR_0[18,38] or RDF_FIR_1[18,38]
451c3038c03SCaleb Palmer     // for Odyssey OCMBs).
452c3038c03SCaleb Palmer 
453c3038c03SCaleb Palmer     // Explorer SRQFIR
454c3038c03SCaleb Palmer     static const auto srqfir = __hash("SRQFIR");
455c3038c03SCaleb Palmer     // Odyssey SRQ_FIR
456c3038c03SCaleb Palmer     static const auto srq_fir = __hash("SRQ_FIR");
457c3038c03SCaleb Palmer 
458c3038c03SCaleb Palmer     std::vector<libhei::Signature> list{i_isoData.getSignatureList()};
459c3038c03SCaleb Palmer 
460c3038c03SCaleb Palmer     if (((srqfir == o_rootCause.getId() && 25 == o_rootCause.getBit()) ||
461c3038c03SCaleb Palmer          (srq_fir == o_rootCause.getId() && 46 == o_rootCause.getBit())) &&
462c3038c03SCaleb Palmer         __findIueTh(list, o_rootCause))
463c3038c03SCaleb Palmer     {
464c3038c03SCaleb Palmer         // If __findIueTh returned true, o_rootCause was updated, return.
465c3038c03SCaleb Palmer         return;
466c3038c03SCaleb Palmer     }
467c3038c03SCaleb Palmer 
468c3038c03SCaleb Palmer     // Check if the root cause found was a potential side effect of an
469c3038c03SCaleb Palmer     // ODP data corruption error. If it was, check if any other signature
470c3038c03SCaleb Palmer     // in the signature list was a potential root cause.
471c3038c03SCaleb Palmer     auto OdpSide = RasDataParser::RasDataFlags::ODP_DATA_CORRUPT_SIDE_EFFECT;
472c3038c03SCaleb Palmer     auto OdpRoot = RasDataParser::RasDataFlags::ODP_DATA_CORRUPT_ROOT_CAUSE;
473c3038c03SCaleb Palmer     if (i_rasData.isFlagSet(o_rootCause, OdpSide))
474c3038c03SCaleb Palmer     {
475c3038c03SCaleb Palmer         for (const auto& s : list)
476c3038c03SCaleb Palmer         {
477c3038c03SCaleb Palmer             if (i_rasData.isFlagSet(s, OdpRoot))
478c3038c03SCaleb Palmer             {
479c3038c03SCaleb Palmer                 // ODP data corruption root cause found, return.
480c3038c03SCaleb Palmer                 o_rootCause = s;
481c3038c03SCaleb Palmer                 return;
482c3038c03SCaleb Palmer             }
483c3038c03SCaleb Palmer         }
484c3038c03SCaleb Palmer     }
485a4424050SCaleb Palmer 
486a4424050SCaleb Palmer     // Odyssey RDF_FIR
487a4424050SCaleb Palmer     static const auto rdf_fir = __hash("RDF_FIR");
488a4424050SCaleb Palmer 
489a4424050SCaleb Palmer     // RDF_FIR[41] can be the root cause of RDF_FIR[16], so if bit 16 is on,
490a4424050SCaleb Palmer     // check if bit 41 is also on.
491a4424050SCaleb Palmer     if (rdf_fir == o_rootCause.getId() && 16 == o_rootCause.getBit())
492a4424050SCaleb Palmer     {
493a4424050SCaleb Palmer         // Look for RDF_FIR[41]
494a4424050SCaleb Palmer         auto itr = std::find_if(list.begin(), list.end(), [&](const auto& t) {
495a4424050SCaleb Palmer             return (rdf_fir == t.getId() && 41 == t.getBit());
496a4424050SCaleb Palmer         });
497a4424050SCaleb Palmer         if (list.end() != itr)
498a4424050SCaleb Palmer         {
499a4424050SCaleb Palmer             o_rootCause = *itr;
500a4424050SCaleb Palmer         }
501a4424050SCaleb Palmer     }
502c3038c03SCaleb Palmer }
503c3038c03SCaleb Palmer 
504c3038c03SCaleb Palmer //------------------------------------------------------------------------------
505c3038c03SCaleb Palmer 
filterRootCause(AnalysisType i_type,const libhei::IsolationData & i_isoData,libhei::Signature & o_rootCause,const RasDataParser & i_rasData)506c3038c03SCaleb Palmer bool filterRootCause(AnalysisType i_type,
507c3038c03SCaleb Palmer                      const libhei::IsolationData& i_isoData,
508c3038c03SCaleb Palmer                      libhei::Signature& o_rootCause,
509c3038c03SCaleb Palmer                      const RasDataParser& i_rasData)
510c3038c03SCaleb Palmer {
511c3038c03SCaleb Palmer     // Find the initial root cause attention based on common rules for FIR
512c3038c03SCaleb Palmer     // isolation.
513c3038c03SCaleb Palmer     bool rc = findRootCause(i_type, i_isoData, o_rootCause, i_rasData);
514c3038c03SCaleb Palmer 
515c3038c03SCaleb Palmer     // If some root cause was found, handle any special cases for specific FIR
516c3038c03SCaleb Palmer     // bits that require additional logic to determine the root cause.
517c3038c03SCaleb Palmer     if (true == rc)
518c3038c03SCaleb Palmer     {
519c3038c03SCaleb Palmer         rootCauseSpecialCases(i_isoData, o_rootCause, i_rasData);
520c3038c03SCaleb Palmer     }
521c3038c03SCaleb Palmer 
522c3038c03SCaleb Palmer     return rc;
523c3038c03SCaleb Palmer }
524c3038c03SCaleb Palmer 
525c3038c03SCaleb Palmer //------------------------------------------------------------------------------
526c3038c03SCaleb Palmer 
52765fefb2cSZane Shelley } // namespace analyzer
528