1 #include <stdio.h>
2
3 #include <analyzer/analyzer_main.hpp>
4 #include <analyzer/plugins/plugin.hpp>
5 #include <analyzer/ras-data/ras-data-parser.hpp>
6 #include <hei_util.hpp>
7 #include <util/pdbg.hpp>
8 #include <util/trace.hpp>
9
10 #include "gtest/gtest.h"
11
12 namespace analyzer
13 {
14 // Forward reference of filterRootCause
15 bool filterRootCause(AnalysisType i_type,
16 const libhei::IsolationData& i_isoData,
17 libhei::Signature& o_rootCause,
18 const RasDataParser& i_rasData);
19 } // namespace analyzer
20
21 using namespace analyzer;
22
23 // Processor side FIRs
24 static const auto eqCoreFir = static_cast<libhei::NodeId_t>(
25 libhei::hash<libhei::NodeId_t>("EQ_CORE_FIR"));
26
27 static const auto mc_dstl_fir = static_cast<libhei::NodeId_t>(
28 libhei::hash<libhei::NodeId_t>("MC_DSTL_FIR"));
29
30 // Explorer OCMB FIRs
31 static const auto rdfFir =
32 static_cast<libhei::NodeId_t>(libhei::hash<libhei::NodeId_t>("RDFFIR"));
33
34 // Odyssey OCMB FIRs
35 static const auto srq_fir =
36 static_cast<libhei::NodeId_t>(libhei::hash<libhei::NodeId_t>("SRQ_FIR"));
37
38 static const auto rdf_fir =
39 static_cast<libhei::NodeId_t>(libhei::hash<libhei::NodeId_t>("RDF_FIR"));
40
41 static const auto odp_fir =
42 static_cast<libhei::NodeId_t>(libhei::hash<libhei::NodeId_t>("ODP_FIR"));
43
TEST(RootCauseFilter,Filter1)44 TEST(RootCauseFilter, Filter1)
45 {
46 pdbg_targets_init(nullptr);
47
48 RasDataParser rasData{};
49
50 // Test 1: Test a checkstop with a UE root cause on an OCMB
51
52 // Checkstop signature on the proc
53 auto proc0 = util::pdbg::getTrgt("/proc0");
54 libhei::Chip procChip0{proc0, P10_20};
55
56 // EQ_CORE_FIR[14]: ME = 0 checkstop
57 libhei::Signature checkstopSig{procChip0, eqCoreFir, 0, 14,
58 libhei::ATTN_TYPE_CHIP_CS};
59
60 // MC_DSTL_FIR[1]: AFU initiated Recoverable Attn on Subchannel A
61 libhei::Signature reAttnSig{procChip0, mc_dstl_fir, 0, 1,
62 libhei::ATTN_TYPE_RECOVERABLE};
63
64 // Root cause signature on the ocmb
65 auto ocmb0 =
66 util::pdbg::getTrgt("proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0");
67 libhei::Chip ocmbChip0{ocmb0, EXPLORER_20};
68
69 // RDFFIR[14]: Mainline read UE
70 libhei::Signature ueSig{ocmbChip0, rdfFir, 0, 14,
71 libhei::ATTN_TYPE_RECOVERABLE};
72
73 // Add the signatures to the isolation data
74 libhei::IsolationData isoData{};
75 isoData.addSignature(checkstopSig);
76 isoData.addSignature(reAttnSig);
77 isoData.addSignature(ueSig);
78
79 libhei::Signature rootCause;
80 bool attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData,
81 rootCause, rasData);
82 EXPECT_TRUE(attnFound);
83 EXPECT_EQ(ueSig.toUint32(), rootCause.toUint32());
84
85 // Test 2: Test a checkstop with an unknown RE attn on an OCMB
86
87 // Add the signatures to the isolation data
88 isoData.flush();
89 isoData.addSignature(checkstopSig);
90 isoData.addSignature(reAttnSig);
91
92 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData,
93 rootCause, rasData);
94 EXPECT_TRUE(attnFound);
95 EXPECT_EQ(reAttnSig.toUint32(), rootCause.toUint32());
96
97 // Test 3: Test a checkstop with an unknown UCS attn on an OCMB
98
99 // MC_DSTL_FIR[0]: AFU initiated Checkstop on Subchannel A
100 libhei::Signature ucsAttnSig{procChip0, mc_dstl_fir, 0, 0,
101 libhei::ATTN_TYPE_UNIT_CS};
102
103 isoData.flush();
104 isoData.addSignature(checkstopSig);
105 isoData.addSignature(ucsAttnSig);
106
107 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData,
108 rootCause, rasData);
109 EXPECT_TRUE(attnFound);
110 EXPECT_EQ(ucsAttnSig.toUint32(), rootCause.toUint32());
111
112 // Test 4: Test a checkstop with a non-root cause recoverable from an OCMB
113
114 // RDFFIR[42]: SCOM recoverable register parity error
115 libhei::Signature reSig{ocmbChip0, rdfFir, 0, 42,
116 libhei::ATTN_TYPE_RECOVERABLE};
117
118 isoData.flush();
119 isoData.addSignature(checkstopSig);
120 isoData.addSignature(reAttnSig);
121 isoData.addSignature(reSig);
122
123 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData,
124 rootCause, rasData);
125 EXPECT_TRUE(attnFound);
126 EXPECT_EQ(checkstopSig.toUint32(), rootCause.toUint32());
127
128 // Test 5: Test a firmware initiated channel fail due to an IUE threshold on
129 // a Odyssey OCMB
130 libhei::Chip odyChip0{ocmb0, ODYSSEY_10};
131
132 libhei::Signature fwInitChnlFail{odyChip0, srq_fir, 0, 46,
133 libhei::ATTN_TYPE_CHIP_CS};
134 libhei::Signature mainlineIue{odyChip0, rdf_fir, 0, 18,
135 libhei::ATTN_TYPE_RECOVERABLE};
136
137 isoData.flush();
138 isoData.addSignature(fwInitChnlFail);
139 isoData.addSignature(mainlineIue);
140
141 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData,
142 rootCause, rasData);
143 EXPECT_TRUE(attnFound);
144 EXPECT_EQ(mainlineIue.toUint32(), rootCause.toUint32());
145
146 // Test 6: Test a UE that is the side effect of an ODP data corruption error
147 // on an Odyssey OCMB
148 libhei::Signature mainlineUe{odyChip0, rdf_fir, 0, 15,
149 libhei::ATTN_TYPE_RECOVERABLE};
150 libhei::Signature odpRootCause{odyChip0, odp_fir, 0, 6,
151 libhei::ATTN_TYPE_RECOVERABLE};
152
153 isoData.flush();
154 isoData.addSignature(mainlineUe);
155 isoData.addSignature(odpRootCause);
156
157 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData,
158 rootCause, rasData);
159
160 EXPECT_TRUE(attnFound);
161 EXPECT_EQ(odpRootCause.toUint32(), rootCause.toUint32());
162
163 // Test 7: Test a Terminate Immediate with recoverable attentions, one which
164 // can be blamed as a root cause, and one that can't.
165
166 // MC_DSTL_FIR[14]: Subchannel A valid cmd timeout error
167 libhei::Signature unrelatedRe{procChip0, mc_dstl_fir, 0, 14,
168 libhei::ATTN_TYPE_RECOVERABLE};
169
170 // MC_DSTL_FIR[16]: Subchannel A buffer overuse error
171 libhei::Signature rootCauseRe{procChip0, mc_dstl_fir, 0, 16,
172 libhei::ATTN_TYPE_RECOVERABLE};
173
174 isoData.flush();
175 isoData.addSignature(unrelatedRe);
176 isoData.addSignature(rootCauseRe);
177
178 attnFound = filterRootCause(AnalysisType::TERMINATE_IMMEDIATE, isoData,
179 rootCause, rasData);
180
181 EXPECT_TRUE(attnFound);
182 EXPECT_EQ(rootCauseRe.toUint32(), rootCause.toUint32());
183 }
184