1 // SPDX-License-Identifier: Apache-2.0
2 // SPDX-FileCopyrightText: Copyright 2019 IBM Corporation
3
4 #include "host_notifier.hpp"
5
6 #include <phosphor-logging/lg2.hpp>
7
8 namespace openpower::pels
9 {
10
11 const auto subscriptionName = "PELHostNotifier";
12 const size_t maxRetryAttempts = 15;
13
HostNotifier(Repository & repo,DataInterfaceBase & dataIface,std::unique_ptr<HostInterface> hostIface)14 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
15 std::unique_ptr<HostInterface> hostIface) :
16 _repo(repo), _dataIface(dataIface), _hostIface(std::move(hostIface)),
17 _retryTimer(_hostIface->getEvent(),
18 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
19 _hostFullTimer(
20 _hostIface->getEvent(),
21 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)),
22 _hostUpTimer(_hostIface->getEvent(),
23 std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired),
24 this))
25 {
26 // Subscribe to be told about new PELs.
27 _repo.subscribeToAdds(subscriptionName,
28 std::bind(std::mem_fn(&HostNotifier::newLogCallback),
29 this, std::placeholders::_1));
30
31 // Subscribe to be told about deleted PELs.
32 _repo.subscribeToDeletes(
33 subscriptionName,
34 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this,
35 std::placeholders::_1));
36
37 // Add any existing PELs to the queue to send them if necessary.
38 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
39 std::placeholders::_1));
40
41 // Subscribe to be told about host state changes.
42 _dataIface.subscribeToHostStateChange(
43 subscriptionName, std::bind(std::mem_fn(&HostNotifier::hostStateChange),
44 this, std::placeholders::_1));
45
46 // Set the function to call when the async reponse is received.
47 _hostIface->setResponseFunction(
48 std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
49 std::placeholders::_1));
50
51 // Start sending logs if the host is running
52 if (!_pelQueue.empty() && _dataIface.isHostUp())
53 {
54 lg2::debug("Host is already up at startup");
55 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
56 }
57 }
58
~HostNotifier()59 HostNotifier::~HostNotifier()
60 {
61 _repo.unsubscribeFromAdds(subscriptionName);
62 _dataIface.unsubscribeFromHostStateChange(subscriptionName);
63 }
64
hostUpTimerExpired()65 void HostNotifier::hostUpTimerExpired()
66 {
67 lg2::debug("Host up timer expired");
68 doNewLogNotify();
69 }
70
addPELToQueue(const PEL & pel)71 bool HostNotifier::addPELToQueue(const PEL& pel)
72 {
73 if (enqueueRequired(pel.id()))
74 {
75 _pelQueue.push_back(pel.id());
76 }
77
78 // Return false so that Repo::for_each keeps going.
79 return false;
80 }
81
enqueueRequired(uint32_t id) const82 bool HostNotifier::enqueueRequired(uint32_t id) const
83 {
84 bool required = true;
85 Repository::LogID i{Repository::LogID::Pel{id}};
86
87 // Manufacturing testing may turn off sending up PELs
88 if (!_dataIface.getHostPELEnablement())
89 {
90 return false;
91 }
92
93 if (auto attributes = _repo.getPELAttributes(i); attributes)
94 {
95 auto a = attributes.value().get();
96
97 if ((a.hostState == TransmissionState::acked) ||
98 (a.hostState == TransmissionState::badPEL))
99 {
100 required = false;
101 }
102 else if (a.actionFlags.test(hiddenFlagBit) &&
103 (a.hmcState == TransmissionState::acked))
104 {
105 required = false;
106 }
107 else if (a.actionFlags.test(dontReportToHostFlagBit))
108 {
109 required = false;
110 }
111 }
112 else
113 {
114 using namespace phosphor::logging;
115 lg2::error("Host Enqueue: Unable to find PEL ID {ID} in repository",
116 "ID", lg2::hex, id);
117 required = false;
118 }
119
120 return required;
121 }
122
notifyRequired(uint32_t id) const123 bool HostNotifier::notifyRequired(uint32_t id) const
124 {
125 bool notify = true;
126 Repository::LogID i{Repository::LogID::Pel{id}};
127
128 if (auto attributes = _repo.getPELAttributes(i); attributes)
129 {
130 // If already acked by the host, don't send again.
131 // (A safety check as it shouldn't get to this point.)
132 auto a = attributes.value().get();
133 if (a.hostState == TransmissionState::acked)
134 {
135 notify = false;
136 }
137 else if (a.actionFlags.test(hiddenFlagBit))
138 {
139 // If hidden and acked (or will be) acked by the HMC,
140 // also don't send it. (HMC management can come and
141 // go at any time)
142 if ((a.hmcState == TransmissionState::acked) ||
143 _dataIface.isHMCManaged())
144 {
145 notify = false;
146 }
147 }
148 }
149 else
150 {
151 // Must have been deleted since put on the queue.
152 notify = false;
153 }
154
155 return notify;
156 }
157
newLogCallback(const PEL & pel)158 void HostNotifier::newLogCallback(const PEL& pel)
159 {
160 if (!enqueueRequired(pel.id()))
161 {
162 return;
163 }
164
165 lg2::debug("New PEL added to queue, PEL ID = {ID}", "ID", lg2::hex,
166 pel.id());
167
168 _pelQueue.push_back(pel.id());
169
170 // Notify shouldn't happen if host is down, not up long enough, or full
171 if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled())
172 {
173 return;
174 }
175
176 // Dispatch a command now if there isn't currently a command
177 // in progress and this is the first log in the queue or it
178 // previously gave up from a hard failure.
179 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
180 _retryTimer.isEnabled();
181
182 auto firstPEL = _pelQueue.size() == 1;
183 auto gaveUp = _retryCount >= maxRetryAttempts;
184
185 if (!inProgress && (firstPEL || gaveUp))
186 {
187 _retryCount = 0;
188
189 // Send a log, but from the event loop, not from here.
190 scheduleDispatch();
191 }
192 }
193
deleteLogCallback(uint32_t id)194 void HostNotifier::deleteLogCallback(uint32_t id)
195 {
196 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id);
197 if (queueIt != _pelQueue.end())
198 {
199 lg2::debug("Host notifier removing deleted log from queue");
200 _pelQueue.erase(queueIt);
201 }
202
203 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id);
204 if (sentIt != _sentPELs.end())
205 {
206 lg2::debug("Host notifier removing deleted log from sent list");
207 _sentPELs.erase(sentIt);
208 }
209
210 // Nothing we can do about this...
211 if (id == _inProgressPEL)
212 {
213 lg2::warning(
214 "A PEL was deleted while its host notification was in progress, PEL ID = {ID}",
215 "ID", lg2::hex, id);
216 }
217 }
218
scheduleDispatch()219 void HostNotifier::scheduleDispatch()
220 {
221 _dispatcher = std::make_unique<sdeventplus::source::Defer>(
222 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
223 this, std::placeholders::_1));
224 }
225
dispatch(sdeventplus::source::EventBase &)226 void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/)
227 {
228 _dispatcher.reset();
229
230 doNewLogNotify();
231 }
232
doNewLogNotify()233 void HostNotifier::doNewLogNotify()
234 {
235 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
236 _hostFullTimer.isEnabled())
237 {
238 return;
239 }
240
241 if (_retryCount >= maxRetryAttempts)
242 {
243 // Give up until a new log comes in.
244 if (_retryCount == maxRetryAttempts)
245 {
246 // If this were to really happen, the PLDM interface
247 // would be down and isolating that shouldn't left to
248 // a logging daemon, so just trace. Also, this will start
249 // trying again when the next new log comes in.
250 lg2::error(
251 "PEL Host notifier hit max retry attempts. Giving up for now. PEL ID = {ID}",
252 "ID", lg2::hex, _pelQueue.front());
253
254 // Tell the host interface object to clean itself up, especially to
255 // release the PLDM instance ID it's been using.
256 _hostIface->cancelCmd();
257 }
258 return;
259 }
260
261 bool doNotify = false;
262 uint32_t id = 0;
263
264 // Find the PEL to send
265 while (!doNotify && !_pelQueue.empty())
266 {
267 id = _pelQueue.front();
268 _pelQueue.pop_front();
269
270 if (notifyRequired(id))
271 {
272 doNotify = true;
273 }
274 }
275
276 if (doNotify)
277 {
278 // Get the size using the repo attributes
279 Repository::LogID i{Repository::LogID::Pel{id}};
280 if (auto attributes = _repo.getPELAttributes(i); attributes)
281 {
282 auto size = static_cast<size_t>(
283 std::filesystem::file_size((*attributes).get().path));
284
285 lg2::debug("sendNewLogCmd: ID {ID} size {SIZE}", "ID", lg2::hex, id,
286 "SIZE", size);
287
288 auto rc = _hostIface->sendNewLogCmd(id, size);
289
290 if (rc == CmdStatus::success)
291 {
292 _inProgressPEL = id;
293 }
294 else
295 {
296 // It failed. Retry
297 lg2::error("PLDM send failed, PEL ID = {ID}", "ID", lg2::hex,
298 id);
299 _pelQueue.push_front(id);
300 _inProgressPEL = 0;
301 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
302 }
303 }
304 else
305 {
306 lg2::error(
307 "PEL ID is not in repository. Cannot notify host. PEL ID = {ID}",
308 "ID", lg2::hex, id);
309 }
310 }
311 }
312
hostStateChange(bool hostUp)313 void HostNotifier::hostStateChange(bool hostUp)
314 {
315 _retryCount = 0;
316 _hostFull = false;
317
318 if (hostUp && !_pelQueue.empty())
319 {
320 lg2::debug("Host state change to on");
321 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
322 }
323 else if (!hostUp)
324 {
325 lg2::debug("Host state change to off");
326
327 stopCommand();
328
329 // Reset the state on any PELs that were sent but not acked back
330 // to new so they'll get sent again.
331 for (auto id : _sentPELs)
332 {
333 _pelQueue.push_back(id);
334 _repo.setPELHostTransState(id, TransmissionState::newPEL);
335 }
336
337 _sentPELs.clear();
338
339 if (_hostFullTimer.isEnabled())
340 {
341 _hostFullTimer.setEnabled(false);
342 }
343
344 if (_hostUpTimer.isEnabled())
345 {
346 _hostUpTimer.setEnabled(false);
347 }
348 }
349 }
350
commandResponse(ResponseStatus status)351 void HostNotifier::commandResponse(ResponseStatus status)
352 {
353 auto id = _inProgressPEL;
354 _inProgressPEL = 0;
355
356 if (status == ResponseStatus::success)
357 {
358 lg2::debug("HostNotifier command response success, PEL ID = {ID}", "ID",
359 lg2::hex, id);
360 _retryCount = 0;
361
362 _sentPELs.push_back(id);
363
364 _repo.setPELHostTransState(id, TransmissionState::sent);
365
366 // If the host is full, don't send off the next PEL
367 if (!_hostFull && !_pelQueue.empty())
368 {
369 doNewLogNotify();
370 }
371 }
372 else
373 {
374 lg2::error("PLDM command response failure, PEL ID = {ID}", "ID",
375 lg2::hex, id);
376 // Retry
377 _pelQueue.push_front(id);
378 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
379 }
380 }
381
retryTimerExpired()382 void HostNotifier::retryTimerExpired()
383 {
384 if (_dataIface.isHostUp())
385 {
386 lg2::info("Attempting command retry, PEL ID = {ID}", "ID", lg2::hex,
387 _pelQueue.front());
388 _retryCount++;
389 doNewLogNotify();
390 }
391 }
392
hostFullTimerExpired()393 void HostNotifier::hostFullTimerExpired()
394 {
395 lg2::debug("Host full timer expired, trying send again");
396 doNewLogNotify();
397 }
398
stopCommand()399 void HostNotifier::stopCommand()
400 {
401 _retryCount = 0;
402
403 if (_inProgressPEL != 0)
404 {
405 _pelQueue.push_front(_inProgressPEL);
406 _inProgressPEL = 0;
407 }
408
409 if (_retryTimer.isEnabled())
410 {
411 _retryTimer.setEnabled(false);
412 }
413
414 // Ensure the PLDM instance ID is released
415 _hostIface->cancelCmd();
416 }
417
ackPEL(uint32_t id)418 void HostNotifier::ackPEL(uint32_t id)
419 {
420 _repo.setPELHostTransState(id, TransmissionState::acked);
421
422 // No longer just 'sent', so remove it from the sent list.
423 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
424 if (sent != _sentPELs.end())
425 {
426 _sentPELs.erase(sent);
427 }
428
429 // An ack means the host is no longer full
430 if (_hostFullTimer.isEnabled())
431 {
432 _hostFullTimer.setEnabled(false);
433 }
434
435 if (_hostFull)
436 {
437 _hostFull = false;
438
439 lg2::debug("Host previously full, not anymore after this ack");
440
441 // Start sending PELs again, from the event loop
442 if (!_pelQueue.empty())
443 {
444 scheduleDispatch();
445 }
446 }
447 }
448
setHostFull(uint32_t id)449 void HostNotifier::setHostFull(uint32_t id)
450 {
451 lg2::debug("Received Host full indication, PEL ID = {ID}", "ID", lg2::hex,
452 id);
453
454 _hostFull = true;
455
456 // This PEL needs to get re-sent
457 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
458 if (sent != _sentPELs.end())
459 {
460 _sentPELs.erase(sent);
461 _repo.setPELHostTransState(id, TransmissionState::newPEL);
462
463 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
464 _pelQueue.end())
465 {
466 _pelQueue.push_front(id);
467 }
468 }
469
470 // The only PELs that will be sent when the
471 // host is full is from this timer callback.
472 if (!_hostFullTimer.isEnabled())
473 {
474 lg2::debug("Starting host full timer");
475 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
476 }
477 }
478
setBadPEL(uint32_t id)479 void HostNotifier::setBadPEL(uint32_t id)
480 {
481 lg2::error("PEL rejected by the host, PEL ID = {ID}", "ID", lg2::hex, id);
482
483 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
484 if (sent != _sentPELs.end())
485 {
486 _sentPELs.erase(sent);
487 }
488
489 _repo.setPELHostTransState(id, TransmissionState::badPEL);
490 }
491
492 } // namespace openpower::pels
493