1 #include <stdio.h> 2 3 #include <analyzer/analyzer_main.hpp> 4 #include <analyzer/plugins/plugin.hpp> 5 #include <analyzer/ras-data/ras-data-parser.hpp> 6 #include <hei_util.hpp> 7 #include <util/pdbg.hpp> 8 #include <util/trace.hpp> 9 10 #include "gtest/gtest.h" 11 12 namespace analyzer 13 { 14 // Forward reference of filterRootCause 15 bool filterRootCause(AnalysisType i_type, 16 const libhei::IsolationData& i_isoData, 17 libhei::Signature& o_rootCause, 18 const RasDataParser& i_rasData); 19 } // namespace analyzer 20 21 using namespace analyzer; 22 23 // Processor side FIRs 24 static const auto eqCoreFir = static_cast<libhei::NodeId_t>( 25 libhei::hash<libhei::NodeId_t>("EQ_CORE_FIR")); 26 27 static const auto mc_dstl_fir = static_cast<libhei::NodeId_t>( 28 libhei::hash<libhei::NodeId_t>("MC_DSTL_FIR")); 29 30 // Explorer OCMB FIRs 31 static const auto rdfFir = 32 static_cast<libhei::NodeId_t>(libhei::hash<libhei::NodeId_t>("RDFFIR")); 33 34 // Odyssey OCMB FIRs 35 static const auto srq_fir = 36 static_cast<libhei::NodeId_t>(libhei::hash<libhei::NodeId_t>("SRQ_FIR")); 37 38 static const auto rdf_fir = 39 static_cast<libhei::NodeId_t>(libhei::hash<libhei::NodeId_t>("RDF_FIR")); 40 41 static const auto odp_fir = 42 static_cast<libhei::NodeId_t>(libhei::hash<libhei::NodeId_t>("ODP_FIR")); 43 44 TEST(RootCauseFilter, Filter1) 45 { 46 pdbg_targets_init(nullptr); 47 48 RasDataParser rasData{}; 49 50 // Test 1: Test a checkstop with a UE root cause on an OCMB 51 52 // Checkstop signature on the proc 53 auto proc0 = util::pdbg::getTrgt("/proc0"); 54 libhei::Chip procChip0{proc0, P10_20}; 55 56 // EQ_CORE_FIR[14]: ME = 0 checkstop 57 libhei::Signature checkstopSig{procChip0, eqCoreFir, 0, 14, 58 libhei::ATTN_TYPE_CHIP_CS}; 59 60 // MC_DSTL_FIR[1]: AFU initiated Recoverable Attn on Subchannel A 61 libhei::Signature reAttnSig{procChip0, mc_dstl_fir, 0, 1, 62 libhei::ATTN_TYPE_RECOVERABLE}; 63 64 // Root cause signature on the ocmb 65 auto ocmb0 = 66 util::pdbg::getTrgt("proc0/pib/perv12/mc0/mi0/mcc0/omi0/ocmb0"); 67 libhei::Chip ocmbChip0{ocmb0, EXPLORER_20}; 68 69 // RDFFIR[14]: Mainline read UE 70 libhei::Signature ueSig{ocmbChip0, rdfFir, 0, 14, 71 libhei::ATTN_TYPE_RECOVERABLE}; 72 73 // Add the signatures to the isolation data 74 libhei::IsolationData isoData{}; 75 isoData.addSignature(checkstopSig); 76 isoData.addSignature(reAttnSig); 77 isoData.addSignature(ueSig); 78 79 libhei::Signature rootCause; 80 bool attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData, 81 rootCause, rasData); 82 EXPECT_TRUE(attnFound); 83 EXPECT_EQ(ueSig.toUint32(), rootCause.toUint32()); 84 85 // Test 2: Test a checkstop with an unknown RE attn on an OCMB 86 87 // Add the signatures to the isolation data 88 isoData.flush(); 89 isoData.addSignature(checkstopSig); 90 isoData.addSignature(reAttnSig); 91 92 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData, 93 rootCause, rasData); 94 EXPECT_TRUE(attnFound); 95 EXPECT_EQ(reAttnSig.toUint32(), rootCause.toUint32()); 96 97 // Test 3: Test a checkstop with an unknown UCS attn on an OCMB 98 99 // MC_DSTL_FIR[0]: AFU initiated Checkstop on Subchannel A 100 libhei::Signature ucsAttnSig{procChip0, mc_dstl_fir, 0, 0, 101 libhei::ATTN_TYPE_UNIT_CS}; 102 103 isoData.flush(); 104 isoData.addSignature(checkstopSig); 105 isoData.addSignature(ucsAttnSig); 106 107 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData, 108 rootCause, rasData); 109 EXPECT_TRUE(attnFound); 110 EXPECT_EQ(ucsAttnSig.toUint32(), rootCause.toUint32()); 111 112 // Test 4: Test a checkstop with a non-root cause recoverable from an OCMB 113 114 // RDFFIR[42]: SCOM recoverable register parity error 115 libhei::Signature reSig{ocmbChip0, rdfFir, 0, 42, 116 libhei::ATTN_TYPE_RECOVERABLE}; 117 118 isoData.flush(); 119 isoData.addSignature(checkstopSig); 120 isoData.addSignature(reAttnSig); 121 isoData.addSignature(reSig); 122 123 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData, 124 rootCause, rasData); 125 EXPECT_TRUE(attnFound); 126 EXPECT_EQ(checkstopSig.toUint32(), rootCause.toUint32()); 127 128 // Test 5: Test a firmware initiated channel fail due to an IUE threshold on 129 // a Odyssey OCMB 130 libhei::Chip odyChip0{ocmb0, ODYSSEY_10}; 131 132 libhei::Signature fwInitChnlFail{odyChip0, srq_fir, 0, 46, 133 libhei::ATTN_TYPE_CHIP_CS}; 134 libhei::Signature mainlineIue{odyChip0, rdf_fir, 0, 18, 135 libhei::ATTN_TYPE_RECOVERABLE}; 136 137 isoData.flush(); 138 isoData.addSignature(fwInitChnlFail); 139 isoData.addSignature(mainlineIue); 140 141 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData, 142 rootCause, rasData); 143 EXPECT_TRUE(attnFound); 144 EXPECT_EQ(mainlineIue.toUint32(), rootCause.toUint32()); 145 146 // Test 6: Test a UE that is the side effect of an ODP data corruption error 147 // on an Odyssey OCMB 148 libhei::Signature mainlineUe{odyChip0, rdf_fir, 0, 15, 149 libhei::ATTN_TYPE_RECOVERABLE}; 150 libhei::Signature odpRootCause{odyChip0, odp_fir, 0, 6, 151 libhei::ATTN_TYPE_RECOVERABLE}; 152 153 isoData.flush(); 154 isoData.addSignature(mainlineUe); 155 isoData.addSignature(odpRootCause); 156 157 attnFound = filterRootCause(AnalysisType::SYSTEM_CHECKSTOP, isoData, 158 rootCause, rasData); 159 160 EXPECT_TRUE(attnFound); 161 EXPECT_EQ(odpRootCause.toUint32(), rootCause.toUint32()); 162 } 163