1 #include <assert.h> 2 #include <unistd.h> 3 4 #include <analyzer/ras-data/ras-data-parser.hpp> 5 #include <analyzer/service_data.hpp> 6 #include <attn/attn_dump.hpp> 7 #include <hei_main.hpp> 8 #include <util/pdbg.hpp> 9 #include <util/trace.hpp> 10 11 namespace analyzer 12 { 13 14 //------------------------------------------------------------------------------ 15 16 // Forward references for externally defined functions. 17 18 /** 19 * @brief Will get the list of active chip and initialize the isolator. 20 * @param o_chips The returned list of active chips. 21 */ 22 void initializeIsolator(std::vector<libhei::Chip>& o_chips); 23 24 /** 25 * @brief Will get the list of active chip and initialize the isolator. 26 * @param i_isoData The data gathered during isolation (for FFDC). 27 * @param o_rootCause The returned root cause signature. 28 * @return True, if root cause has been found. False, otherwise. 29 */ 30 bool filterRootCause(const libhei::IsolationData& i_isoData, 31 libhei::Signature& o_rootCause); 32 33 /** 34 * @brief Will create and submit a PEL using the given data. 35 * @param i_isoData The data gathered during isolation (for FFDC). 36 * @param i_servData Data regarding service actions gathered during analysis. 37 * @return Tuple of BMC log id, platform log id 38 */ 39 std::tuple<uint32_t, uint32_t> createPel(const libhei::IsolationData& i_isoData, 40 const ServiceData& i_servData); 41 42 //------------------------------------------------------------------------------ 43 44 const char* __attn(libhei::AttentionType_t i_attnType) 45 { 46 const char* str = ""; 47 switch (i_attnType) 48 { 49 case libhei::ATTN_TYPE_CHECKSTOP: 50 str = "CHECKSTOP"; 51 break; 52 case libhei::ATTN_TYPE_UNIT_CS: 53 str = "UNIT_CS"; 54 break; 55 case libhei::ATTN_TYPE_RECOVERABLE: 56 str = "RECOVERABLE"; 57 break; 58 case libhei::ATTN_TYPE_SP_ATTN: 59 str = "SP_ATTN"; 60 break; 61 case libhei::ATTN_TYPE_HOST_ATTN: 62 str = "HOST_ATTN"; 63 break; 64 default: 65 trace::err("Unsupported attention type: %u", i_attnType); 66 assert(0); 67 } 68 return str; 69 } 70 71 //------------------------------------------------------------------------------ 72 73 bool analyzeHardware(attn::DumpParameters& o_dumpParameters) 74 { 75 bool attnFound = false; 76 77 if (!util::pdbg::queryHardwareAnalysisSupported()) 78 { 79 trace::err("Hardware error analysis is not supported on this system"); 80 return attnFound; 81 } 82 83 trace::inf(">>> enter analyzeHardware()"); 84 85 // Initialize the isolator and get all of the chips to be analyzed. 86 trace::inf("Initializing the isolator..."); 87 std::vector<libhei::Chip> chips; 88 initializeIsolator(chips); 89 90 // Isolate attentions. 91 trace::inf("Isolating errors: # of chips=%u", chips.size()); 92 libhei::IsolationData isoData{}; 93 libhei::isolate(chips, isoData); 94 95 // For debug, trace out the original list of signatures before filtering. 96 for (const auto& sig : isoData.getSignatureList()) 97 { 98 trace::inf("Signature: %s 0x%0" PRIx32 " %s", 99 util::pdbg::getPath(sig.getChip()), sig.toUint32(), 100 __attn(sig.getAttnType())); 101 } 102 103 // Filter for root cause attention. 104 libhei::Signature rootCause{}; 105 attnFound = filterRootCause(isoData, rootCause); 106 107 if (!attnFound) 108 { 109 // It is possible for TI handling, or manually initiated analysis via 110 // the command line, that there will not be an active attention. In 111 // which case, we will do nothing and let the caller of this function 112 // determine if this is the expected behavior. 113 trace::inf("No active attentions found"); 114 } 115 else 116 { 117 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s", 118 util::pdbg::getPath(rootCause.getChip()), 119 rootCause.toUint32(), __attn(rootCause.getAttnType())); 120 121 // Resolve any service actions required by the root cause. 122 RasDataParser rasData{}; 123 ServiceData servData{rootCause, isoData.queryCheckstop()}; 124 rasData.getResolution(rootCause)->resolve(servData); 125 126 // Create and commit a PEL. 127 uint32_t logId = std::get<1>(createPel(isoData, servData)); 128 129 trace::inf("PEL created: PLID=0x%0" PRIx32, logId); 130 131 // Gather/return information needed for dump. A hardware dump will 132 // always be used for system checkstop attenions. Software dumps will be 133 // reserved for MP-IPLs during TI analysis. 134 // TODO: Need ID from root cause. At the moment, HUID does not exist in 135 // devtree. Will need a better ID definition. 136 o_dumpParameters.logId = logId; 137 o_dumpParameters.unitId = 0; 138 o_dumpParameters.dumpType = attn::DumpType::Hardware; 139 } 140 141 // All done, clean up the isolator. 142 trace::inf("Uninitializing isolator..."); 143 libhei::uninitialize(); 144 145 trace::inf("<<< exit analyzeHardware()"); 146 147 return attnFound; 148 } 149 150 //------------------------------------------------------------------------------ 151 152 /** 153 * @brief Get error isolator build information 154 * 155 * @return Pointer to build information 156 */ 157 const char* getBuildInfo() 158 { 159 return libhei::getBuildInfo(); 160 } 161 162 } // namespace analyzer 163