xref: /openbmc/phosphor-logging/extensions/openpower-pels/host_notifier.cpp (revision 075c79237505ea3b810a461f5f514e4d520a0c44)
1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "host_notifier.hpp"
17 
18 #include <phosphor-logging/lg2.hpp>
19 
20 namespace openpower::pels
21 {
22 
23 const auto subscriptionName = "PELHostNotifier";
24 const size_t maxRetryAttempts = 15;
25 
HostNotifier(Repository & repo,DataInterfaceBase & dataIface,std::unique_ptr<HostInterface> hostIface)26 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
27                            std::unique_ptr<HostInterface> hostIface) :
28     _repo(repo), _dataIface(dataIface), _hostIface(std::move(hostIface)),
29     _retryTimer(_hostIface->getEvent(),
30                 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
31     _hostFullTimer(
32         _hostIface->getEvent(),
33         std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)),
34     _hostUpTimer(_hostIface->getEvent(),
35                  std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired),
36                            this))
37 {
38     // Subscribe to be told about new PELs.
39     _repo.subscribeToAdds(subscriptionName,
40                           std::bind(std::mem_fn(&HostNotifier::newLogCallback),
41                                     this, std::placeholders::_1));
42 
43     // Subscribe to be told about deleted PELs.
44     _repo.subscribeToDeletes(
45         subscriptionName,
46         std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this,
47                   std::placeholders::_1));
48 
49     // Add any existing PELs to the queue to send them if necessary.
50     _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
51                              std::placeholders::_1));
52 
53     // Subscribe to be told about host state changes.
54     _dataIface.subscribeToHostStateChange(
55         subscriptionName, std::bind(std::mem_fn(&HostNotifier::hostStateChange),
56                                     this, std::placeholders::_1));
57 
58     // Set the function to call when the async reponse is received.
59     _hostIface->setResponseFunction(
60         std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
61                   std::placeholders::_1));
62 
63     // Start sending logs if the host is running
64     if (!_pelQueue.empty() && _dataIface.isHostUp())
65     {
66         lg2::debug("Host is already up at startup");
67         _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
68     }
69 }
70 
~HostNotifier()71 HostNotifier::~HostNotifier()
72 {
73     _repo.unsubscribeFromAdds(subscriptionName);
74     _dataIface.unsubscribeFromHostStateChange(subscriptionName);
75 }
76 
hostUpTimerExpired()77 void HostNotifier::hostUpTimerExpired()
78 {
79     lg2::debug("Host up timer expired");
80     doNewLogNotify();
81 }
82 
addPELToQueue(const PEL & pel)83 bool HostNotifier::addPELToQueue(const PEL& pel)
84 {
85     if (enqueueRequired(pel.id()))
86     {
87         _pelQueue.push_back(pel.id());
88     }
89 
90     // Return false so that Repo::for_each keeps going.
91     return false;
92 }
93 
enqueueRequired(uint32_t id) const94 bool HostNotifier::enqueueRequired(uint32_t id) const
95 {
96     bool required = true;
97     Repository::LogID i{Repository::LogID::Pel{id}};
98 
99     // Manufacturing testing may turn off sending up PELs
100     if (!_dataIface.getHostPELEnablement())
101     {
102         return false;
103     }
104 
105     if (auto attributes = _repo.getPELAttributes(i); attributes)
106     {
107         auto a = attributes.value().get();
108 
109         if ((a.hostState == TransmissionState::acked) ||
110             (a.hostState == TransmissionState::badPEL))
111         {
112             required = false;
113         }
114         else if (a.actionFlags.test(hiddenFlagBit) &&
115                  (a.hmcState == TransmissionState::acked))
116         {
117             required = false;
118         }
119         else if (a.actionFlags.test(dontReportToHostFlagBit))
120         {
121             required = false;
122         }
123     }
124     else
125     {
126         using namespace phosphor::logging;
127         lg2::error("Host Enqueue: Unable to find PEL ID {ID} in repository",
128                    "ID", lg2::hex, id);
129         required = false;
130     }
131 
132     return required;
133 }
134 
notifyRequired(uint32_t id) const135 bool HostNotifier::notifyRequired(uint32_t id) const
136 {
137     bool notify = true;
138     Repository::LogID i{Repository::LogID::Pel{id}};
139 
140     if (auto attributes = _repo.getPELAttributes(i); attributes)
141     {
142         // If already acked by the host, don't send again.
143         // (A safety check as it shouldn't get to this point.)
144         auto a = attributes.value().get();
145         if (a.hostState == TransmissionState::acked)
146         {
147             notify = false;
148         }
149         else if (a.actionFlags.test(hiddenFlagBit))
150         {
151             // If hidden and acked (or will be) acked by the HMC,
152             // also don't send it. (HMC management can come and
153             // go at any time)
154             if ((a.hmcState == TransmissionState::acked) ||
155                 _dataIface.isHMCManaged())
156             {
157                 notify = false;
158             }
159         }
160     }
161     else
162     {
163         // Must have been deleted since put on the queue.
164         notify = false;
165     }
166 
167     return notify;
168 }
169 
newLogCallback(const PEL & pel)170 void HostNotifier::newLogCallback(const PEL& pel)
171 {
172     if (!enqueueRequired(pel.id()))
173     {
174         return;
175     }
176 
177     lg2::debug("New PEL added to queue, PEL ID = {ID}", "ID", lg2::hex,
178                pel.id());
179 
180     _pelQueue.push_back(pel.id());
181 
182     // Notify shouldn't happen if host is down, not up long enough, or full
183     if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled())
184     {
185         return;
186     }
187 
188     // Dispatch a command now if there isn't currently a command
189     // in progress and this is the first log in the queue or it
190     // previously gave up from a hard failure.
191     auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
192                       _retryTimer.isEnabled();
193 
194     auto firstPEL = _pelQueue.size() == 1;
195     auto gaveUp = _retryCount >= maxRetryAttempts;
196 
197     if (!inProgress && (firstPEL || gaveUp))
198     {
199         _retryCount = 0;
200 
201         // Send a log, but from the event loop, not from here.
202         scheduleDispatch();
203     }
204 }
205 
deleteLogCallback(uint32_t id)206 void HostNotifier::deleteLogCallback(uint32_t id)
207 {
208     auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id);
209     if (queueIt != _pelQueue.end())
210     {
211         lg2::debug("Host notifier removing deleted log from queue");
212         _pelQueue.erase(queueIt);
213     }
214 
215     auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id);
216     if (sentIt != _sentPELs.end())
217     {
218         lg2::debug("Host notifier removing deleted log from sent list");
219         _sentPELs.erase(sentIt);
220     }
221 
222     // Nothing we can do about this...
223     if (id == _inProgressPEL)
224     {
225         lg2::warning(
226             "A PEL was deleted while its host notification was in progress, PEL ID = {ID}",
227             "ID", lg2::hex, id);
228     }
229 }
230 
scheduleDispatch()231 void HostNotifier::scheduleDispatch()
232 {
233     _dispatcher = std::make_unique<sdeventplus::source::Defer>(
234         _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
235                                           this, std::placeholders::_1));
236 }
237 
dispatch(sdeventplus::source::EventBase &)238 void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/)
239 {
240     _dispatcher.reset();
241 
242     doNewLogNotify();
243 }
244 
doNewLogNotify()245 void HostNotifier::doNewLogNotify()
246 {
247     if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
248         _hostFullTimer.isEnabled())
249     {
250         return;
251     }
252 
253     if (_retryCount >= maxRetryAttempts)
254     {
255         // Give up until a new log comes in.
256         if (_retryCount == maxRetryAttempts)
257         {
258             // If this were to really happen, the PLDM interface
259             // would be down and isolating that shouldn't left to
260             // a logging daemon, so just trace.  Also, this will start
261             // trying again when the next new log comes in.
262             lg2::error(
263                 "PEL Host notifier hit max retry attempts. Giving up for now. PEL ID = {ID}",
264                 "ID", lg2::hex, _pelQueue.front());
265 
266             // Tell the host interface object to clean itself up, especially to
267             // release the PLDM instance ID it's been using.
268             _hostIface->cancelCmd();
269         }
270         return;
271     }
272 
273     bool doNotify = false;
274     uint32_t id = 0;
275 
276     // Find the PEL to send
277     while (!doNotify && !_pelQueue.empty())
278     {
279         id = _pelQueue.front();
280         _pelQueue.pop_front();
281 
282         if (notifyRequired(id))
283         {
284             doNotify = true;
285         }
286     }
287 
288     if (doNotify)
289     {
290         // Get the size using the repo attributes
291         Repository::LogID i{Repository::LogID::Pel{id}};
292         if (auto attributes = _repo.getPELAttributes(i); attributes)
293         {
294             auto size = static_cast<size_t>(
295                 std::filesystem::file_size((*attributes).get().path));
296 
297             lg2::debug("sendNewLogCmd: ID {ID} size {SIZE}", "ID", lg2::hex, id,
298                        "SIZE", size);
299 
300             auto rc = _hostIface->sendNewLogCmd(id, size);
301 
302             if (rc == CmdStatus::success)
303             {
304                 _inProgressPEL = id;
305             }
306             else
307             {
308                 // It failed.  Retry
309                 lg2::error("PLDM send failed, PEL ID = {ID}", "ID", lg2::hex,
310                            id);
311                 _pelQueue.push_front(id);
312                 _inProgressPEL = 0;
313                 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
314             }
315         }
316         else
317         {
318             lg2::error(
319                 "PEL ID is not in repository. Cannot notify host. PEL ID = {ID}",
320                 "ID", lg2::hex, id);
321         }
322     }
323 }
324 
hostStateChange(bool hostUp)325 void HostNotifier::hostStateChange(bool hostUp)
326 {
327     _retryCount = 0;
328     _hostFull = false;
329 
330     if (hostUp && !_pelQueue.empty())
331     {
332         lg2::debug("Host state change to on");
333         _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
334     }
335     else if (!hostUp)
336     {
337         lg2::debug("Host state change to off");
338 
339         stopCommand();
340 
341         // Reset the state on any PELs that were sent but not acked back
342         // to new so they'll get sent again.
343         for (auto id : _sentPELs)
344         {
345             _pelQueue.push_back(id);
346             _repo.setPELHostTransState(id, TransmissionState::newPEL);
347         }
348 
349         _sentPELs.clear();
350 
351         if (_hostFullTimer.isEnabled())
352         {
353             _hostFullTimer.setEnabled(false);
354         }
355 
356         if (_hostUpTimer.isEnabled())
357         {
358             _hostUpTimer.setEnabled(false);
359         }
360     }
361 }
362 
commandResponse(ResponseStatus status)363 void HostNotifier::commandResponse(ResponseStatus status)
364 {
365     auto id = _inProgressPEL;
366     _inProgressPEL = 0;
367 
368     if (status == ResponseStatus::success)
369     {
370         lg2::debug("HostNotifier command response success, PEL ID = {ID}", "ID",
371                    lg2::hex, id);
372         _retryCount = 0;
373 
374         _sentPELs.push_back(id);
375 
376         _repo.setPELHostTransState(id, TransmissionState::sent);
377 
378         // If the host is full, don't send off the next PEL
379         if (!_hostFull && !_pelQueue.empty())
380         {
381             doNewLogNotify();
382         }
383     }
384     else
385     {
386         lg2::error("PLDM command response failure, PEL ID = {ID}", "ID",
387                    lg2::hex, id);
388         // Retry
389         _pelQueue.push_front(id);
390         _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
391     }
392 }
393 
retryTimerExpired()394 void HostNotifier::retryTimerExpired()
395 {
396     if (_dataIface.isHostUp())
397     {
398         lg2::info("Attempting command retry, PEL ID = {ID}", "ID", lg2::hex,
399                   _pelQueue.front());
400         _retryCount++;
401         doNewLogNotify();
402     }
403 }
404 
hostFullTimerExpired()405 void HostNotifier::hostFullTimerExpired()
406 {
407     lg2::debug("Host full timer expired, trying send again");
408     doNewLogNotify();
409 }
410 
stopCommand()411 void HostNotifier::stopCommand()
412 {
413     _retryCount = 0;
414 
415     if (_inProgressPEL != 0)
416     {
417         _pelQueue.push_front(_inProgressPEL);
418         _inProgressPEL = 0;
419     }
420 
421     if (_retryTimer.isEnabled())
422     {
423         _retryTimer.setEnabled(false);
424     }
425 
426     // Ensure the PLDM instance ID is released
427     _hostIface->cancelCmd();
428 }
429 
ackPEL(uint32_t id)430 void HostNotifier::ackPEL(uint32_t id)
431 {
432     _repo.setPELHostTransState(id, TransmissionState::acked);
433 
434     // No longer just 'sent', so remove it from the sent list.
435     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
436     if (sent != _sentPELs.end())
437     {
438         _sentPELs.erase(sent);
439     }
440 
441     // An ack means the host is no longer full
442     if (_hostFullTimer.isEnabled())
443     {
444         _hostFullTimer.setEnabled(false);
445     }
446 
447     if (_hostFull)
448     {
449         _hostFull = false;
450 
451         lg2::debug("Host previously full, not anymore after this ack");
452 
453         // Start sending PELs again, from the event loop
454         if (!_pelQueue.empty())
455         {
456             scheduleDispatch();
457         }
458     }
459 }
460 
setHostFull(uint32_t id)461 void HostNotifier::setHostFull(uint32_t id)
462 {
463     lg2::debug("Received Host full indication, PEL ID = {ID}", "ID", lg2::hex,
464                id);
465 
466     _hostFull = true;
467 
468     // This PEL needs to get re-sent
469     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
470     if (sent != _sentPELs.end())
471     {
472         _sentPELs.erase(sent);
473         _repo.setPELHostTransState(id, TransmissionState::newPEL);
474 
475         if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
476             _pelQueue.end())
477         {
478             _pelQueue.push_front(id);
479         }
480     }
481 
482     // The only PELs that will be sent when the
483     // host is full is from this timer callback.
484     if (!_hostFullTimer.isEnabled())
485     {
486         lg2::debug("Starting host full timer");
487         _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
488     }
489 }
490 
setBadPEL(uint32_t id)491 void HostNotifier::setBadPEL(uint32_t id)
492 {
493     lg2::error("PEL rejected by the host, PEL ID = {ID}", "ID", lg2::hex, id);
494 
495     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
496     if (sent != _sentPELs.end())
497     {
498         _sentPELs.erase(sent);
499     }
500 
501     _repo.setPELHostTransState(id, TransmissionState::badPEL);
502 }
503 
504 } // namespace openpower::pels
505