1 #include <assert.h>
2 #include <unistd.h>
3 
4 #include <analyzer/ras-data/ras-data-parser.hpp>
5 #include <analyzer/service_data.hpp>
6 #include <attn/attn_dump.hpp>
7 #include <hei_main.hpp>
8 #include <util/pdbg.hpp>
9 #include <util/trace.hpp>
10 
11 namespace analyzer
12 {
13 
14 //------------------------------------------------------------------------------
15 
16 // Forward references for externally defined functions.
17 
18 /**
19  * @brief Will get the list of active chip and initialize the isolator.
20  * @param o_chips The returned list of active chips.
21  */
22 void initializeIsolator(std::vector<libhei::Chip>& o_chips);
23 
24 /**
25  * @brief  Will get the list of active chip and initialize the isolator.
26  * @param  i_isoData   The data gathered during isolation (for FFDC).
27  * @param  o_rootCause The returned root cause signature.
28  * @return True, if root cause has been found. False, otherwise.
29  */
30 bool filterRootCause(const libhei::IsolationData& i_isoData,
31                      libhei::Signature& o_rootCause);
32 
33 /**
34  * @brief Will create and submit a PEL using the given data.
35  * @param i_isoData   The data gathered during isolation (for FFDC).
36  * @param i_servData  Data regarding service actions gathered during analysis.
37  * @return Tuple of BMC log id, platform log id
38  */
39 std::tuple<uint32_t, uint32_t> createPel(const libhei::IsolationData& i_isoData,
40                                          const ServiceData& i_servData);
41 
42 //------------------------------------------------------------------------------
43 
44 const char* __attn(libhei::AttentionType_t i_attnType)
45 {
46     const char* str = "";
47     switch (i_attnType)
48     {
49         case libhei::ATTN_TYPE_CHECKSTOP:
50             str = "CHECKSTOP";
51             break;
52         case libhei::ATTN_TYPE_UNIT_CS:
53             str = "UNIT_CS";
54             break;
55         case libhei::ATTN_TYPE_RECOVERABLE:
56             str = "RECOVERABLE";
57             break;
58         case libhei::ATTN_TYPE_SP_ATTN:
59             str = "SP_ATTN";
60             break;
61         case libhei::ATTN_TYPE_HOST_ATTN:
62             str = "HOST_ATTN";
63             break;
64         default:
65             trace::err("Unsupported attention type: %u", i_attnType);
66             assert(0);
67     }
68     return str;
69 }
70 
71 //------------------------------------------------------------------------------
72 
73 bool analyzeHardware(attn::DumpParameters& o_dumpParameters)
74 {
75     bool attnFound = false;
76 
77     if (!util::pdbg::queryHardwareAnalysisSupported())
78     {
79         trace::err("Hardware error analysis is not supported on this system");
80         return attnFound;
81     }
82 
83     trace::inf(">>> enter analyzeHardware()");
84 
85     // Initialize the isolator and get all of the chips to be analyzed.
86     trace::inf("Initializing the isolator...");
87     std::vector<libhei::Chip> chips;
88     initializeIsolator(chips);
89 
90     // Isolate attentions.
91     trace::inf("Isolating errors: # of chips=%u", chips.size());
92     libhei::IsolationData isoData{};
93     libhei::isolate(chips, isoData);
94 
95     // For debug, trace out the original list of signatures before filtering.
96     for (const auto& sig : isoData.getSignatureList())
97     {
98         trace::inf("Signature: %s 0x%0" PRIx32 " %s",
99                    util::pdbg::getPath(sig.getChip()), sig.toUint32(),
100                    __attn(sig.getAttnType()));
101     }
102 
103     // Filter for root cause attention.
104     libhei::Signature rootCause{};
105     attnFound = filterRootCause(isoData, rootCause);
106 
107     if (!attnFound)
108     {
109         // It is possible for TI handling, or manually initiated analysis via
110         // the command line, that there will not be an active attention. In
111         // which case, we will do nothing and let the caller of this function
112         // determine if this is the expected behavior.
113         trace::inf("No active attentions found");
114     }
115     else
116     {
117         trace::inf("Root cause attention: %s 0x%0" PRIx32 " %s",
118                    util::pdbg::getPath(rootCause.getChip()),
119                    rootCause.toUint32(), __attn(rootCause.getAttnType()));
120 
121         // Resolve any service actions required by the root cause.
122         RasDataParser rasData{};
123         ServiceData servData{rootCause, isoData.queryCheckstop()};
124         rasData.getResolution(rootCause)->resolve(servData);
125 
126         // Create and commit a PEL.
127         uint32_t logId = std::get<1>(createPel(isoData, servData));
128 
129         trace::inf("PEL created: PLID=0x%0" PRIx32, logId);
130 
131         // Gather/return information needed for dump.
132         // TODO: Need ID from root cause. At the moment, HUID does not exist in
133         //       devtree. Will need a better ID definition.
134         // TODO: HW dump is default, but some attentions may require something
135         //       different. Will need to investigate adding that information to
136         //       the RAS data files.
137         o_dumpParameters.logId    = logId;
138         o_dumpParameters.unitId   = 0;
139         o_dumpParameters.dumpType = attn::DumpType::Hardware;
140     }
141 
142     // All done, clean up the isolator.
143     trace::inf("Uninitializing isolator...");
144     libhei::uninitialize();
145 
146     trace::inf("<<< exit analyzeHardware()");
147 
148     return attnFound;
149 }
150 
151 //------------------------------------------------------------------------------
152 
153 /**
154  * @brief Get error isolator build information
155  *
156  * @return Pointer to build information
157  */
158 const char* getBuildInfo()
159 {
160     return libhei::getBuildInfo();
161 }
162 
163 } // namespace analyzer
164