1d84ed6e9SZane Shelley #include <assert.h> 29fb7393eSZane Shelley #include <unistd.h> 387eabc65SBen Tyner 4ebff0d37SZane Shelley #include <analyzer/analyzer_main.hpp> 5a9b44344SZane Shelley #include <analyzer/ras-data/ras-data-parser.hpp> 64ed4be56SZane Shelley #include <analyzer/service_data.hpp> 77029e525SBen Tyner #include <attn/attn_dump.hpp> 80205f3b3SBen Tyner #include <hei_main.hpp> 9f4bd5ff6SZane Shelley #include <util/pdbg.hpp> 10d84ed6e9SZane Shelley #include <util/trace.hpp> 110205f3b3SBen Tyner 120205f3b3SBen Tyner namespace analyzer 130205f3b3SBen Tyner { 14f4bd5ff6SZane Shelley //------------------------------------------------------------------------------ 15b1ebfcb1SBen Tyner 16f4bd5ff6SZane Shelley // Forward references for externally defined functions. 1787eabc65SBen Tyner 18d3b9bac9SZane Shelley /** 19d3b9bac9SZane Shelley * @brief Will get the list of active chip and initialize the isolator. 20d3b9bac9SZane Shelley * @param o_chips The returned list of active chips. 21d3b9bac9SZane Shelley */ 22171a2e04SZane Shelley void initializeIsolator(std::vector<libhei::Chip>& o_chips); 2387eabc65SBen Tyner 24d3b9bac9SZane Shelley /** 2565fefb2cSZane Shelley * @brief Will get the list of active chip and initialize the isolator. 26ec227c2cSZane Shelley * @param i_type The type of analysis to perform. See enum for details. 2765fefb2cSZane Shelley * @param i_isoData The data gathered during isolation (for FFDC). 2865fefb2cSZane Shelley * @param o_rootCause The returned root cause signature. 291a4f0e70SCaleb Palmer * @param i_rasData The RAS data parser. 3065fefb2cSZane Shelley * @return True, if root cause has been found. False, otherwise. 3165fefb2cSZane Shelley */ 32ec227c2cSZane Shelley bool filterRootCause(AnalysisType i_type, 33ec227c2cSZane Shelley const libhei::IsolationData& i_isoData, 341a4f0e70SCaleb Palmer libhei::Signature& o_rootCause, 351a4f0e70SCaleb Palmer const RasDataParser& i_rasData); 3665fefb2cSZane Shelley 3765fefb2cSZane Shelley /** 38d3b9bac9SZane Shelley * @brief Will create and submit a PEL using the given data. 394ed4be56SZane Shelley * @param i_servData Data regarding service actions gathered during analysis. 40611b3442SZane Shelley * @return The platform log ID. Will return zero if no PEL is generated. 41d3b9bac9SZane Shelley */ 42c1e1c000SBen Tyner uint32_t commitPel(const ServiceData& i_servData); 43d3b9bac9SZane Shelley 44d84ed6e9SZane Shelley //------------------------------------------------------------------------------ 45d84ed6e9SZane Shelley 46ebff0d37SZane Shelley const char* __attn(libhei::AttentionType_t i_type) 472f263181SZane Shelley { 482f263181SZane Shelley const char* str = ""; 49ebff0d37SZane Shelley switch (i_type) 502f263181SZane Shelley { 512f263181SZane Shelley case libhei::ATTN_TYPE_CHECKSTOP: 522f263181SZane Shelley str = "CHECKSTOP"; 532f263181SZane Shelley break; 542f263181SZane Shelley case libhei::ATTN_TYPE_UNIT_CS: 552f263181SZane Shelley str = "UNIT_CS"; 562f263181SZane Shelley break; 572f263181SZane Shelley case libhei::ATTN_TYPE_RECOVERABLE: 582f263181SZane Shelley str = "RECOVERABLE"; 592f263181SZane Shelley break; 602f263181SZane Shelley case libhei::ATTN_TYPE_SP_ATTN: 612f263181SZane Shelley str = "SP_ATTN"; 622f263181SZane Shelley break; 632f263181SZane Shelley case libhei::ATTN_TYPE_HOST_ATTN: 642f263181SZane Shelley str = "HOST_ATTN"; 652f263181SZane Shelley break; 662f263181SZane Shelley default: 67ebff0d37SZane Shelley trace::err("Unsupported attention type: %u", i_type); 682f263181SZane Shelley assert(0); 692f263181SZane Shelley } 702f263181SZane Shelley return str; 712f263181SZane Shelley } 722f263181SZane Shelley 732f263181SZane Shelley //------------------------------------------------------------------------------ 742f263181SZane Shelley 75ebff0d37SZane Shelley const char* __analysisType(AnalysisType i_type) 76ebff0d37SZane Shelley { 77ebff0d37SZane Shelley const char* str = ""; 78ebff0d37SZane Shelley switch (i_type) 79ebff0d37SZane Shelley { 80ebff0d37SZane Shelley case AnalysisType::SYSTEM_CHECKSTOP: 81ebff0d37SZane Shelley str = "SYSTEM_CHECKSTOP"; 82ebff0d37SZane Shelley break; 83ebff0d37SZane Shelley case AnalysisType::TERMINATE_IMMEDIATE: 84ebff0d37SZane Shelley str = "TERMINATE_IMMEDIATE"; 85ebff0d37SZane Shelley break; 86ebff0d37SZane Shelley case AnalysisType::MANUAL: 87ebff0d37SZane Shelley str = "MANUAL"; 88ebff0d37SZane Shelley break; 89ebff0d37SZane Shelley default: 90ebff0d37SZane Shelley trace::err("Unsupported analysis type: %u", i_type); 91ebff0d37SZane Shelley assert(0); 92ebff0d37SZane Shelley } 93ebff0d37SZane Shelley return str; 94ebff0d37SZane Shelley } 95ebff0d37SZane Shelley 96ebff0d37SZane Shelley //------------------------------------------------------------------------------ 97ebff0d37SZane Shelley 98ebff0d37SZane Shelley uint32_t analyzeHardware(AnalysisType i_type, attn::DumpParameters& o_dump) 9987eabc65SBen Tyner { 100611b3442SZane Shelley uint32_t o_plid = 0; // default, zero indicates PEL was not created 10187eabc65SBen Tyner 102e5411f0fSZane Shelley if (!util::pdbg::queryHardwareAnalysisSupported()) 103e5411f0fSZane Shelley { 104e5411f0fSZane Shelley trace::err("Hardware error analysis is not supported on this system"); 105611b3442SZane Shelley return o_plid; 106e5411f0fSZane Shelley } 107e5411f0fSZane Shelley 108ebff0d37SZane Shelley trace::inf(">>> enter analyzeHardware(%s)", __analysisType(i_type)); 1092f263181SZane Shelley 110171a2e04SZane Shelley // Initialize the isolator and get all of the chips to be analyzed. 111f4bd5ff6SZane Shelley trace::inf("Initializing the isolator..."); 112171a2e04SZane Shelley std::vector<libhei::Chip> chips; 113f4bd5ff6SZane Shelley initializeIsolator(chips); 1142e994bcdSZane Shelley 115097a71adSZane Shelley // Isolate attentions. 116f4bd5ff6SZane Shelley trace::inf("Isolating errors: # of chips=%u", chips.size()); 117097a71adSZane Shelley libhei::IsolationData isoData{}; 118f4bd5ff6SZane Shelley libhei::isolate(chips, isoData); 11987eabc65SBen Tyner 12065fefb2cSZane Shelley // For debug, trace out the original list of signatures before filtering. 12165fefb2cSZane Shelley for (const auto& sig : isoData.getSignatureList()) 12265fefb2cSZane Shelley { 12365fefb2cSZane Shelley trace::inf("Signature: %s 0x%0" PRIx32 " %s", 12465fefb2cSZane Shelley util::pdbg::getPath(sig.getChip()), sig.toUint32(), 12565fefb2cSZane Shelley __attn(sig.getAttnType())); 12665fefb2cSZane Shelley } 12765fefb2cSZane Shelley 128e5411f0fSZane Shelley // Filter for root cause attention. 129e5411f0fSZane Shelley libhei::Signature rootCause{}; 1301a4f0e70SCaleb Palmer RasDataParser rasData{}; 131*5836f4a6SZane Shelley bool attnFound = false; 132*5836f4a6SZane Shelley try 133*5836f4a6SZane Shelley { 134*5836f4a6SZane Shelley attnFound = filterRootCause(i_type, isoData, rootCause, rasData); 135*5836f4a6SZane Shelley } 136*5836f4a6SZane Shelley catch (const std::exception& e) 137*5836f4a6SZane Shelley { 138*5836f4a6SZane Shelley trace::err("Exception caught during root cause filtering"); 139*5836f4a6SZane Shelley trace::err(e.what()); 140*5836f4a6SZane Shelley attnFound = false; // just in case 141*5836f4a6SZane Shelley } 142e5411f0fSZane Shelley 143b7879d3dSZane Shelley // If a root cause attention was found, or if this was a system checkstop, 144b7879d3dSZane Shelley // generate a PEL. 145b7879d3dSZane Shelley if (attnFound || AnalysisType::SYSTEM_CHECKSTOP == i_type) 146e5411f0fSZane Shelley { 147b7879d3dSZane Shelley if (attnFound) 148e5411f0fSZane Shelley { 149e5411f0fSZane Shelley trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s", 150e5411f0fSZane Shelley util::pdbg::getPath(rootCause.getChip()), 151e5411f0fSZane Shelley rootCause.toUint32(), __attn(rootCause.getAttnType())); 152b7879d3dSZane Shelley } 153b7879d3dSZane Shelley else 154b7879d3dSZane Shelley { 155b7879d3dSZane Shelley // This is bad. Analysis should have found a root cause attention 156b7879d3dSZane Shelley // for a system checkstop. Issues could range from code bugs to SCOM 157b7879d3dSZane Shelley // errors. Regardless, generate a PEL with FFDC to assist with 158b7879d3dSZane Shelley // debug. 159b7879d3dSZane Shelley trace::err("System checkstop with no root cause attention"); 160b7879d3dSZane Shelley rootCause = libhei::Signature{}; // just in case 161b7879d3dSZane Shelley } 162e5411f0fSZane Shelley 163b7879d3dSZane Shelley // Start building the service data. 16462adf5c2SZane Shelley ServiceData servData{rootCause, i_type, isoData}; 165b7879d3dSZane Shelley 166b7879d3dSZane Shelley // Apply any service actions, if needed. Note that there are no 167b7879d3dSZane Shelley // resolutions for manual analysis. 168b7879d3dSZane Shelley if (AnalysisType::MANUAL != i_type) 169b7879d3dSZane Shelley { 170b7879d3dSZane Shelley if (attnFound) 171b7879d3dSZane Shelley { 1722fbd267eSZane Shelley try 1732fbd267eSZane Shelley { 174b7879d3dSZane Shelley // Resolve the root cause attention. 175a9b44344SZane Shelley rasData.getResolution(rootCause)->resolve(servData); 176b7879d3dSZane Shelley } 1772fbd267eSZane Shelley catch (const std::exception& e) 1782fbd267eSZane Shelley { 1792fbd267eSZane Shelley trace::err("Exception caught during root cause analysis"); 1802fbd267eSZane Shelley trace::err(e.what()); 1812fbd267eSZane Shelley 1822fbd267eSZane Shelley // We'll still want to create a PEL for the FFDC, but 1832fbd267eSZane Shelley // since the analysis failed, we need to callout Level 2 1842fbd267eSZane Shelley // Support. 1852fbd267eSZane Shelley servData.calloutProcedure(callout::Procedure::NEXTLVL, 1862fbd267eSZane Shelley callout::Priority::HIGH); 1872fbd267eSZane Shelley } 1882fbd267eSZane Shelley } 189b7879d3dSZane Shelley else 190b7879d3dSZane Shelley { 1912fbd267eSZane Shelley // Analysis failed so callout the Level 2 Support. 1928af56854SZane Shelley servData.calloutProcedure(callout::Procedure::NEXTLVL, 1938af56854SZane Shelley callout::Priority::HIGH); 194b7879d3dSZane Shelley } 195b7879d3dSZane Shelley } 196e5411f0fSZane Shelley 197e5411f0fSZane Shelley // Create and commit a PEL. 198c1e1c000SBen Tyner o_plid = commitPel(servData); 1997029e525SBen Tyner 200611b3442SZane Shelley if (0 == o_plid) 201611b3442SZane Shelley { 202611b3442SZane Shelley trace::err("Failed to create PEL"); 203611b3442SZane Shelley } 204611b3442SZane Shelley else 205611b3442SZane Shelley { 206611b3442SZane Shelley trace::inf("PEL created: PLID=0x%0" PRIx32, o_plid); 207bf3326fbSZane Shelley 20804f010abSZane Shelley // Gather/return information needed for dump. A hardware dump will 209611b3442SZane Shelley // always be used for system checkstop attenions. Software dumps 210611b3442SZane Shelley // will be reserved for MP-IPLs during TI analysis. 211611b3442SZane Shelley // TODO: Need ID from root cause. At the moment, HUID does not exist 212611b3442SZane Shelley // in devtree. Will need a better ID definition. 213ebff0d37SZane Shelley o_dump.unitId = 0; 214ebff0d37SZane Shelley o_dump.dumpType = attn::DumpType::Hardware; 215e5411f0fSZane Shelley } 216611b3442SZane Shelley } 217b7879d3dSZane Shelley else 218b7879d3dSZane Shelley { 219b7879d3dSZane Shelley // It is possible for TI handling, or manually initiated analysis via 220b7879d3dSZane Shelley // the command line, that there will not be an active attention. In 221b7879d3dSZane Shelley // which case, we will do nothing and let the caller of this function 222b7879d3dSZane Shelley // determine if this is the expected behavior. 223b7879d3dSZane Shelley trace::inf("No active attentions found"); 224b7879d3dSZane Shelley } 22587eabc65SBen Tyner 226097a71adSZane Shelley // All done, clean up the isolator. 227f4bd5ff6SZane Shelley trace::inf("Uninitializing isolator..."); 228097a71adSZane Shelley libhei::uninitialize(); 229b1ebfcb1SBen Tyner 2302f263181SZane Shelley trace::inf("<<< exit analyzeHardware()"); 2312f263181SZane Shelley 232611b3442SZane Shelley return o_plid; 2330205f3b3SBen Tyner } 2340205f3b3SBen Tyner 235eea45427SBen Tyner //------------------------------------------------------------------------------ 236eea45427SBen Tyner 2370205f3b3SBen Tyner } // namespace analyzer 238