1 #include <assert.h> 2 #include <libpdbg.h> 3 #include <unistd.h> 4 5 #include <analyzer/ras-data/ras-data-parser.hpp> 6 #include <analyzer/service_data.hpp> 7 #include <attn/attn_dump.hpp> 8 #include <hei_main.hpp> 9 #include <phosphor-logging/log.hpp> 10 #include <util/pdbg.hpp> 11 #include <util/trace.hpp> 12 13 #include <algorithm> 14 #include <fstream> 15 #include <iostream> 16 #include <map> 17 #include <string> 18 19 namespace analyzer 20 { 21 22 //------------------------------------------------------------------------------ 23 24 // Forward references for externally defined functions. 25 26 /** 27 * @brief Will get the list of active chip and initialize the isolator. 28 * @param o_chips The returned list of active chips. 29 */ 30 void initializeIsolator(std::vector<libhei::Chip>& o_chips); 31 32 /** 33 * @brief Will create and submit a PEL using the given data. 34 * @param i_isoData The data gathered during isolation (for FFDC). 35 * @param i_servData Data regarding service actions gathered during analysis. 36 * @return Tuple of BMC log id, platform log id 37 */ 38 std::tuple<uint32_t, uint32_t> createPel(const libhei::IsolationData& i_isoData, 39 const ServiceData& i_servData); 40 41 //------------------------------------------------------------------------------ 42 43 const char* __attn(libhei::AttentionType_t i_attnType) 44 { 45 const char* str = ""; 46 switch (i_attnType) 47 { 48 case libhei::ATTN_TYPE_CHECKSTOP: 49 str = "CHECKSTOP"; 50 break; 51 case libhei::ATTN_TYPE_UNIT_CS: 52 str = "UNIT_CS"; 53 break; 54 case libhei::ATTN_TYPE_RECOVERABLE: 55 str = "RECOVERABLE"; 56 break; 57 case libhei::ATTN_TYPE_SP_ATTN: 58 str = "SP_ATTN"; 59 break; 60 case libhei::ATTN_TYPE_HOST_ATTN: 61 str = "HOST_ATTN"; 62 break; 63 default: 64 trace::err("Unsupported attention type: %u", i_attnType); 65 assert(0); 66 } 67 return str; 68 } 69 70 //------------------------------------------------------------------------------ 71 72 bool __filterRootCause(const libhei::IsolationData& i_isoData, 73 libhei::Signature& o_signature) 74 { 75 // We'll need to make a copy of the list so that the original list is 76 // maintained for the log. 77 std::vector<libhei::Signature> sigList{i_isoData.getSignatureList()}; 78 79 // For debug, trace out the original list of signatures before filtering. 80 for (const auto& sig : sigList) 81 { 82 trace::inf("Signature: %s 0x%0" PRIx32 " %s", 83 util::pdbg::getPath(sig.getChip()), sig.toUint32(), 84 __attn(sig.getAttnType())); 85 } 86 87 // Special and host attentions are not supported by this user application. 88 auto newEndItr = 89 std::remove_if(sigList.begin(), sigList.end(), [&](const auto& t) { 90 return (libhei::ATTN_TYPE_SP_ATTN == t.getAttnType() || 91 libhei::ATTN_TYPE_HOST_ATTN == t.getAttnType()); 92 }); 93 94 // Shrink the vector, if needed. 95 sigList.resize(std::distance(sigList.begin(), newEndItr)); 96 97 // START WORKAROUND 98 // TODO: Filtering should be determined by the RAS Data Files provided by 99 // the host firmware via the PNOR (similar to the Chip Data Files). 100 // Until that support is available, use a rudimentary filter that 101 // first looks for any recoverable attention, then any unit checkstop, 102 // and then any system checkstop. This is built on the premise that 103 // recoverable errors could be the root cause of an system checkstop 104 // attentions. Fortunately, we just need to sort the list by the 105 // greater attention type value. 106 std::sort(sigList.begin(), sigList.end(), 107 [&](const auto& a, const auto& b) { 108 return a.getAttnType() > b.getAttnType(); 109 }); 110 // END WORKAROUND 111 112 // Check if a root cause attention was found. 113 if (!sigList.empty()) 114 { 115 // The entry at the front of the list will be the root cause. 116 o_signature = sigList.front(); 117 return true; 118 } 119 120 return false; // default, no active attentions found. 121 } 122 123 //------------------------------------------------------------------------------ 124 125 bool analyzeHardware(attn::DumpParameters& o_dumpParameters) 126 { 127 bool attnFound = false; 128 129 if (!util::pdbg::queryHardwareAnalysisSupported()) 130 { 131 trace::err("Hardware error analysis is not supported on this system"); 132 return attnFound; 133 } 134 135 trace::inf(">>> enter analyzeHardware()"); 136 137 // Initialize the isolator and get all of the chips to be analyzed. 138 trace::inf("Initializing the isolator..."); 139 std::vector<libhei::Chip> chips; 140 initializeIsolator(chips); 141 142 // Isolate attentions. 143 trace::inf("Isolating errors: # of chips=%u", chips.size()); 144 libhei::IsolationData isoData{}; 145 libhei::isolate(chips, isoData); 146 147 // Filter for root cause attention. 148 libhei::Signature rootCause{}; 149 attnFound = __filterRootCause(isoData, rootCause); 150 151 if (!attnFound) 152 { 153 // It is possible for TI handling, or manually initiated analysis via 154 // the command line, that there will not be an active attention. In 155 // which case, we will do nothing and let the caller of this function 156 // determine if this is the expected behavior. 157 trace::inf("No active attentions found"); 158 } 159 else 160 { 161 trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s", 162 util::pdbg::getPath(rootCause.getChip()), 163 rootCause.toUint32(), __attn(rootCause.getAttnType())); 164 165 // Perform service actions based on the root cause. 166 RasDataParser rasData{}; 167 ServiceData servData{rootCause, isoData.queryCheckstop()}; 168 rasData.getResolution(rootCause)->resolve(servData); 169 170 // Create and commit a PEL. 171 uint32_t logId = std::get<1>(createPel(isoData, servData)); 172 173 // Populate dump parameters 174 o_dumpParameters.logId = logId; 175 o_dumpParameters.unitId = 0; 176 o_dumpParameters.dumpType = attn::DumpType::Hardware; 177 } 178 179 // All done, clean up the isolator. 180 trace::inf("Uninitializing isolator..."); 181 libhei::uninitialize(); 182 183 trace::inf("<<< exit analyzeHardware()"); 184 185 return attnFound; 186 } 187 188 //------------------------------------------------------------------------------ 189 190 /** 191 * @brief Get error isolator build information 192 * 193 * @return Pointer to build information 194 */ 195 const char* getBuildInfo() 196 { 197 return libhei::getBuildInfo(); 198 } 199 200 } // namespace analyzer 201