1
2 #include <analyzer/plugins/plugin.hpp>
3 #include <hei_util.hpp>
4 #include <util/pdbg.hpp>
5 #include <util/trace.hpp>
6
7 namespace analyzer
8 {
9
10 namespace P10
11 {
12
13 /**
14 * @brief Adds all clocks/chips reporting PLL unlock attentions to the callout
15 * list.
16 *
17 * Processors are always called out at medium priority and never guarded. If
18 * more than one processor is reporting a PLL unlock attention on the same
19 * clock, the clock is called out with high priority. Otherwise, the clock
20 * callout priority is medium.
21 */
pll_unlock(unsigned int i_instance,const libhei::Chip &,ServiceData & io_servData)22 void pll_unlock(unsigned int i_instance, const libhei::Chip&,
23 ServiceData& io_servData)
24 {
25 auto nodeId = libhei::hash<libhei::NodeId_t>("PLL_UNLOCK");
26
27 auto sigList = io_servData.getIsolationData().getSignatureList();
28
29 // The PLL list is initially the same size of the signature list.
30 std::vector<libhei::Signature> pllList{sigList.size()};
31
32 // Copy all signatures that match the node ID and bit position. Note that
33 // in this case the bit position is the same as the plugin instance.
34 auto itr = std::copy_if(sigList.begin(), sigList.end(), pllList.begin(),
35 [&nodeId, &i_instance](const auto& s) {
36 return (nodeId == s.getId() &&
37 i_instance == s.getBit());
38 });
39
40 // Shrink the size of the PLL list if necessary.
41 pllList.resize(std::distance(pllList.begin(), itr));
42
43 // The clock callout priority is dependent on the number of chips with PLL
44 // unlock attentions.
45 auto clockPriority =
46 (1 < pllList.size()) ? callout::Priority::HIGH : callout::Priority::MED;
47
48 // Callout the clock.
49 auto clockCallout = (0 == i_instance) ? callout::ClockType::OSC_REF_CLOCK_0
50 : callout::ClockType::OSC_REF_CLOCK_1;
51 io_servData.calloutClock(clockCallout, clockPriority, true);
52
53 // Callout the processors connected to this clock that are reporting PLL
54 // unlock attentions. Always a medium callout and no guarding.
55 for (const auto& sig : pllList)
56 {
57 io_servData.calloutTarget(util::pdbg::getTrgt(sig.getChip()),
58 callout::Priority::MED, false);
59 }
60 }
61
lpc_timeout_callout(const libhei::Chip & i_chip,ServiceData & io_servData)62 void lpc_timeout_callout(const libhei::Chip& i_chip, ServiceData& io_servData)
63 {
64 auto target = util::pdbg::getTrgt(i_chip);
65 auto path = util::pdbg::getPath(target);
66
67 // Callout the PNOR.
68 io_servData.calloutPart(callout::PartType::PNOR, callout::Priority::MED);
69
70 // Callout the associated clock, no guard.
71 auto chipPos = util::pdbg::getChipPos(target);
72 if (0 == chipPos)
73 {
74 // Clock 0 is hardwired to proc 0.
75 io_servData.calloutClock(callout::ClockType::OSC_REF_CLOCK_0,
76 callout::Priority::MED, false);
77 }
78 else if (1 == chipPos)
79 {
80 // Clock 1 is hardwired to proc 1.
81 io_servData.calloutClock(callout::ClockType::OSC_REF_CLOCK_1,
82 callout::Priority::MED, false);
83 }
84 else
85 {
86 trace::err("LPC timeout on unexpected processor: %s", path);
87 }
88
89 // Callout the processor, no guard.
90 io_servData.calloutTarget(target, callout::Priority::MED, false);
91 }
92
93 /**
94 * @brief Queries for an LPC timeout. If present, will callout all appropriate
95 * hardware.
96 */
lpc_timeout(unsigned int,const libhei::Chip & i_chip,ServiceData & io_servData)97 void lpc_timeout(unsigned int, const libhei::Chip& i_chip,
98 ServiceData& io_servData)
99 {
100 auto target = util::pdbg::getTrgt(i_chip);
101 auto path = util::pdbg::getPath(target);
102
103 if (util::pdbg::queryLpcTimeout(target))
104 {
105 trace::inf("LPC timeout detected on %s", path);
106
107 lpc_timeout_callout(i_chip, io_servData);
108 }
109 else
110 {
111 trace::inf("No LPC timeout detected on %s", path);
112
113 io_servData.calloutProcedure(callout::Procedure::NEXTLVL,
114 callout::Priority::HIGH);
115 }
116 }
117
118 /**
119 * @brief If Hostboot detects an LPC timeout, it will manually trigger a
120 * checkstop attention. We will have to bypass checking for an LPC
121 * timeout via the HWP because it will not find the timeout. Instead,
122 * simply make the callout when Hostboot triggers the attention.
123 */
lpc_timeout_workaround(unsigned int,const libhei::Chip & i_chip,ServiceData & io_servData)124 void lpc_timeout_workaround(unsigned int, const libhei::Chip& i_chip,
125 ServiceData& io_servData)
126 {
127 trace::inf("Host detected LPC timeout %s", util::pdbg::getPath(i_chip));
128
129 lpc_timeout_callout(i_chip, io_servData);
130 }
131
132 /**
133 * @brief Calls out all DIMMs attached to an OCMB.
134 */
callout_attached_dimms(unsigned int i_instance,const libhei::Chip & i_chip,ServiceData & io_servData)135 void callout_attached_dimms(unsigned int i_instance, const libhei::Chip& i_chip,
136 ServiceData& io_servData)
137 {
138 // Get the OMI target for this instance
139 auto procTarget = util::pdbg::getTrgt(i_chip);
140 auto omiTarget =
141 util::pdbg::getChipUnit(procTarget, util::pdbg::TYPE_OMI, i_instance);
142
143 if (nullptr != omiTarget)
144 {
145 // Get the connected OCMB from the OMI
146 auto ocmbTarget = util::pdbg::getConnectedTarget(
147 omiTarget, callout::BusType::OMI_BUS);
148
149 // Loop through all DIMMs connected to the OCMB
150 pdbg_target* dimmTarget = nullptr;
151 pdbg_for_each_target("dimm", ocmbTarget, dimmTarget)
152 {
153 if (nullptr != dimmTarget)
154 {
155 // Call out the DIMM, medium priority and guard
156 io_servData.calloutTarget(dimmTarget, callout::Priority::MED,
157 true);
158 }
159 }
160 }
161 }
162
163 /**
164 * @brief Performs channel timeout callouts.
165 */
channel_timeout(unsigned int i_instance,const libhei::Chip & i_chip,ServiceData & io_servData)166 void channel_timeout(unsigned int i_instance, const libhei::Chip& i_chip,
167 ServiceData& io_servData)
168 {
169 // Get the OMI target for this instance
170 auto procTarget = util::pdbg::getTrgt(i_chip);
171 auto omiTarget =
172 util::pdbg::getChipUnit(procTarget, util::pdbg::TYPE_OMI, i_instance);
173
174 if (nullptr != omiTarget)
175 {
176 // Callout the bus and both endpoints, low priority
177 io_servData.calloutBus(omiTarget, callout::BusType::OMI_BUS,
178 callout::Priority::LOW, false);
179
180 auto sigs = io_servData.getIsolationData().getSignatureList();
181
182 // Check if multiple channel timeout bits (MC_DSTL_FIR[22,23]) are on.
183 const auto dstlfir = libhei::hash<libhei::NodeId_t>("MC_DSTL_FIR");
184
185 // Check for the first channel timeout
186 auto itr = std::find_if(sigs.begin(), sigs.end(), [&](const auto& t) {
187 return (i_chip == t.getChip() && dstlfir == t.getId() &&
188 (22 == t.getBit() || 23 == t.getBit()));
189 });
190 if (sigs.end() != itr)
191 {
192 // Check for a second channel timeout starting from after itr
193 itr = std::find_if(++itr, sigs.end(), [&](const auto& t) {
194 return (i_chip == t.getChip() && dstlfir == t.getId() &&
195 (22 == t.getBit() || 23 == t.getBit()));
196 });
197 }
198
199 // Multiple chnl timeouts found, callout the proc side high priority
200 if (sigs.end() != itr)
201 {
202 io_servData.calloutTarget(omiTarget, callout::Priority::HIGH, true);
203 }
204 // Only one chnl timeout, callout the OCMB side high priority
205 else
206 {
207 io_servData.calloutConnected(omiTarget, callout::BusType::OMI_BUS,
208 callout::Priority::HIGH, true);
209 }
210 }
211 else
212 {
213 trace::err("channel_timeout: Failed to get OMI target %d on %s",
214 i_instance, util::pdbg::getPath(procTarget));
215 }
216 }
217
218 } // namespace P10
219
220 PLUGIN_DEFINE_NS(P10_10, P10, pll_unlock);
221 PLUGIN_DEFINE_NS(P10_20, P10, pll_unlock);
222
223 PLUGIN_DEFINE_NS(P10_10, P10, lpc_timeout);
224 PLUGIN_DEFINE_NS(P10_20, P10, lpc_timeout);
225
226 PLUGIN_DEFINE_NS(P10_10, P10, lpc_timeout_workaround);
227 PLUGIN_DEFINE_NS(P10_20, P10, lpc_timeout_workaround);
228
229 PLUGIN_DEFINE_NS(P10_10, P10, callout_attached_dimms);
230 PLUGIN_DEFINE_NS(P10_20, P10, callout_attached_dimms);
231
232 PLUGIN_DEFINE_NS(P10_10, P10, channel_timeout);
233 PLUGIN_DEFINE_NS(P10_20, P10, channel_timeout);
234
235 } // namespace analyzer
236