xref: /openbmc/phosphor-logging/extensions/openpower-pels/host_notifier.cpp (revision 81a91e3ee4bf962111cf555ab9d3c3c51000fa3b)
1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "host_notifier.hpp"
17 
18 #include <phosphor-logging/log.hpp>
19 
20 namespace openpower::pels
21 {
22 
23 const auto subscriptionName = "PELHostNotifier";
24 const size_t maxRetryAttempts = 15;
25 
26 using namespace phosphor::logging;
27 
28 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
29                            std::unique_ptr<HostInterface> hostIface) :
30     _repo(repo),
31     _dataIface(dataIface), _hostIface(std::move(hostIface)),
32     _retryTimer(_hostIface->getEvent(),
33                 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
34     _hostFullTimer(
35         _hostIface->getEvent(),
36         std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this))
37 {
38     // Subscribe to be told about new PELs.
39     _repo.subscribeToAdds(subscriptionName,
40                           std::bind(std::mem_fn(&HostNotifier::newLogCallback),
41                                     this, std::placeholders::_1));
42 
43     // Subscribe to be told about deleted PELs.
44     _repo.subscribeToDeletes(
45         subscriptionName,
46         std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this,
47                   std::placeholders::_1));
48 
49     // Add any existing PELs to the queue to send them if necessary.
50     _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
51                              std::placeholders::_1));
52 
53     // Subscribe to be told about host state changes.
54     _dataIface.subscribeToHostStateChange(
55         subscriptionName,
56         std::bind(std::mem_fun(&HostNotifier::hostStateChange), this,
57                   std::placeholders::_1));
58 
59     // Set the function to call when the async reponse is received.
60     _hostIface->setResponseFunction(
61         std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
62                   std::placeholders::_1));
63 
64     // Start sending logs if the host is running
65     if (!_pelQueue.empty() && _dataIface.isHostUp())
66     {
67         doNewLogNotify();
68     }
69 }
70 
71 HostNotifier::~HostNotifier()
72 {
73     _repo.unsubscribeFromAdds(subscriptionName);
74     _dataIface.unsubscribeFromHostStateChange(subscriptionName);
75 }
76 
77 bool HostNotifier::addPELToQueue(const PEL& pel)
78 {
79     if (enqueueRequired(pel.id()))
80     {
81         _pelQueue.push_back(pel.id());
82     }
83 
84     // Return false so that Repo::for_each keeps going.
85     return false;
86 }
87 
88 bool HostNotifier::enqueueRequired(uint32_t id) const
89 {
90     bool required = true;
91     Repository::LogID i{Repository::LogID::Pel{id}};
92 
93     // Manufacturing testing may turn off sending up PELs
94     if (!_dataIface.getHostPELEnablement())
95     {
96         return false;
97     }
98 
99     if (auto attributes = _repo.getPELAttributes(i); attributes)
100     {
101         auto a = attributes.value().get();
102 
103         if ((a.hostState == TransmissionState::acked) ||
104             (a.hostState == TransmissionState::badPEL))
105         {
106             required = false;
107         }
108         else if (a.actionFlags.test(hiddenFlagBit) &&
109                  (a.hmcState == TransmissionState::acked))
110         {
111             required = false;
112         }
113         else if (a.actionFlags.test(dontReportToHostFlagBit))
114         {
115             required = false;
116         }
117     }
118     else
119     {
120         using namespace phosphor::logging;
121         log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository",
122                         entry("PEL_ID=0x%X", id));
123         required = false;
124     }
125 
126     return required;
127 }
128 
129 bool HostNotifier::notifyRequired(uint32_t id) const
130 {
131     bool notify = true;
132     Repository::LogID i{Repository::LogID::Pel{id}};
133 
134     if (auto attributes = _repo.getPELAttributes(i); attributes)
135     {
136         // If already acked by the host, don't send again.
137         // (A safety check as it shouldn't get to this point.)
138         auto a = attributes.value().get();
139         if (a.hostState == TransmissionState::acked)
140         {
141             notify = false;
142         }
143         else if (a.actionFlags.test(hiddenFlagBit))
144         {
145             // If hidden and acked (or will be) acked by the HMC,
146             // also don't send it. (HMC management can come and
147             // go at any time)
148             if ((a.hmcState == TransmissionState::acked) ||
149                 _dataIface.isHMCManaged())
150             {
151                 notify = false;
152             }
153         }
154     }
155     else
156     {
157         // Must have been deleted since put on the queue.
158         notify = false;
159     }
160 
161     return notify;
162 }
163 
164 void HostNotifier::newLogCallback(const PEL& pel)
165 {
166     if (!enqueueRequired(pel.id()))
167     {
168         return;
169     }
170 
171     log<level::DEBUG>("new PEL added to queue", entry("PEL_ID=0x%X", pel.id()));
172 
173     _pelQueue.push_back(pel.id());
174 
175     // Notify shouldn't happen if host is down or full
176     if (!_dataIface.isHostUp() || _hostFull)
177     {
178         return;
179     }
180 
181     // Dispatch a command now if there isn't currently a command
182     // in progress and this is the first log in the queue or it
183     // previously gave up from a hard failure.
184     auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
185                       _retryTimer.isEnabled();
186 
187     auto firstPEL = _pelQueue.size() == 1;
188     auto gaveUp = _retryCount >= maxRetryAttempts;
189 
190     if (!inProgress && (firstPEL || gaveUp))
191     {
192         _retryCount = 0;
193 
194         // Send a log, but from the event loop, not from here.
195         scheduleDispatch();
196     }
197 }
198 
199 void HostNotifier::deleteLogCallback(uint32_t id)
200 {
201     auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id);
202     if (queueIt != _pelQueue.end())
203     {
204         log<level::DEBUG>("Host notifier removing deleted log from queue");
205         _pelQueue.erase(queueIt);
206     }
207 
208     auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id);
209     if (sentIt != _sentPELs.end())
210     {
211         log<level::DEBUG>("Host notifier removing deleted log from sent list");
212         _sentPELs.erase(sentIt);
213     }
214 
215     // Nothing we can do about this...
216     if (id == _inProgressPEL)
217     {
218         log<level::WARNING>(
219             "A PEL was deleted while its host notification was in progress",
220             entry("PEL_ID=0x%X", id));
221     }
222 }
223 
224 void HostNotifier::scheduleDispatch()
225 {
226     _dispatcher = std::make_unique<sdeventplus::source::Defer>(
227         _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
228                                           this, std::placeholders::_1));
229 }
230 
231 void HostNotifier::dispatch(sdeventplus::source::EventBase& source)
232 {
233     _dispatcher.reset();
234 
235     doNewLogNotify();
236 }
237 
238 void HostNotifier::doNewLogNotify()
239 {
240     if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
241         _hostFullTimer.isEnabled())
242     {
243         return;
244     }
245 
246     if (_retryCount >= maxRetryAttempts)
247     {
248         // Give up until a new log comes in.
249         if (_retryCount == maxRetryAttempts)
250         {
251             // If this were to really happen, the PLDM interface
252             // would be down and isolating that shouldn't left to
253             // a logging daemon, so just trace.  Also, this will start
254             // trying again when the next new log comes in.
255             log<level::ERR>(
256                 "PEL Host notifier hit max retry attempts. Giving up for now.",
257                 entry("PEL_ID=0x%X", _pelQueue.front()));
258 
259             // Tell the host interface object to clean itself up, especially to
260             // release the PLDM instance ID it's been using.
261             _hostIface->cancelCmd();
262         }
263         return;
264     }
265 
266     bool doNotify = false;
267     uint32_t id = 0;
268 
269     // Find the PEL to send
270     while (!doNotify && !_pelQueue.empty())
271     {
272         id = _pelQueue.front();
273         _pelQueue.pop_front();
274 
275         if (notifyRequired(id))
276         {
277             doNotify = true;
278         }
279     }
280 
281     if (doNotify)
282     {
283         // Get the size using the repo attributes
284         Repository::LogID i{Repository::LogID::Pel{id}};
285         if (auto attributes = _repo.getPELAttributes(i); attributes)
286         {
287             auto size = static_cast<size_t>(
288                 std::filesystem::file_size((*attributes).get().path));
289 
290             log<level::DEBUG>("sendNewLogCmd", entry("PEL_ID=0x%X", id),
291                               entry("PEL_SIZE=%d", size));
292 
293             auto rc = _hostIface->sendNewLogCmd(id, size);
294 
295             if (rc == CmdStatus::success)
296             {
297                 _inProgressPEL = id;
298             }
299             else
300             {
301                 // It failed.  Retry
302                 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id));
303                 _pelQueue.push_front(id);
304                 _inProgressPEL = 0;
305                 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
306             }
307         }
308         else
309         {
310             log<level::ERR>("PEL ID not in repository.  Cannot notify host",
311                             entry("PEL_ID=0x%X", id));
312         }
313     }
314 }
315 
316 void HostNotifier::hostStateChange(bool hostUp)
317 {
318     _retryCount = 0;
319     _hostFull = false;
320 
321     if (hostUp && !_pelQueue.empty())
322     {
323         log<level::DEBUG>("Host state change to on");
324 
325         doNewLogNotify();
326     }
327     else if (!hostUp)
328     {
329         log<level::DEBUG>("Host state change to off");
330 
331         stopCommand();
332 
333         // Reset the state on any PELs that were sent but not acked back
334         // to new so they'll get sent again.
335         for (auto id : _sentPELs)
336         {
337             _pelQueue.push_back(id);
338             _repo.setPELHostTransState(id, TransmissionState::newPEL);
339         }
340 
341         _sentPELs.clear();
342 
343         if (_hostFullTimer.isEnabled())
344         {
345             _hostFullTimer.setEnabled(false);
346         }
347     }
348 }
349 
350 void HostNotifier::commandResponse(ResponseStatus status)
351 {
352     auto id = _inProgressPEL;
353     _inProgressPEL = 0;
354 
355     if (status == ResponseStatus::success)
356     {
357         log<level::DEBUG>("HostNotifier command response success",
358                           entry("PEL_ID=0x%X", id));
359         _retryCount = 0;
360 
361         _sentPELs.push_back(id);
362 
363         _repo.setPELHostTransState(id, TransmissionState::sent);
364 
365         // If the host is full, don't send off the next PEL
366         if (!_hostFull && !_pelQueue.empty())
367         {
368             doNewLogNotify();
369         }
370     }
371     else
372     {
373         log<level::ERR>("PLDM command response failure",
374                         entry("PEL_ID=0x%X", id));
375         // Retry
376         _pelQueue.push_front(id);
377         _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
378     }
379 }
380 
381 void HostNotifier::retryTimerExpired()
382 {
383     if (_dataIface.isHostUp())
384     {
385         log<level::INFO>("Attempting command retry",
386                          entry("PEL_ID=0x%X", _pelQueue.front()));
387         _retryCount++;
388         doNewLogNotify();
389     }
390 }
391 
392 void HostNotifier::hostFullTimerExpired()
393 {
394     log<level::DEBUG>("Host full timer expired, trying send again");
395     doNewLogNotify();
396 }
397 
398 void HostNotifier::stopCommand()
399 {
400     _retryCount = 0;
401 
402     if (_inProgressPEL != 0)
403     {
404         _pelQueue.push_front(_inProgressPEL);
405         _inProgressPEL = 0;
406     }
407 
408     if (_retryTimer.isEnabled())
409     {
410         _retryTimer.setEnabled(false);
411     }
412 
413     // Ensure the PLDM instance ID is released
414     _hostIface->cancelCmd();
415 }
416 
417 void HostNotifier::ackPEL(uint32_t id)
418 {
419     _repo.setPELHostTransState(id, TransmissionState::acked);
420 
421     // No longer just 'sent', so remove it from the sent list.
422     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
423     if (sent != _sentPELs.end())
424     {
425         _sentPELs.erase(sent);
426     }
427 
428     // An ack means the host is no longer full
429     if (_hostFullTimer.isEnabled())
430     {
431         _hostFullTimer.setEnabled(false);
432     }
433 
434     if (_hostFull)
435     {
436         _hostFull = false;
437 
438         log<level::DEBUG>("Host previously full, not anymore after this ack");
439 
440         // Start sending PELs again, from the event loop
441         if (!_pelQueue.empty())
442         {
443             scheduleDispatch();
444         }
445     }
446 }
447 
448 void HostNotifier::setHostFull(uint32_t id)
449 {
450     log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id));
451 
452     _hostFull = true;
453 
454     // This PEL needs to get re-sent
455     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
456     if (sent != _sentPELs.end())
457     {
458         _sentPELs.erase(sent);
459         _repo.setPELHostTransState(id, TransmissionState::newPEL);
460 
461         if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
462             _pelQueue.end())
463         {
464             _pelQueue.push_front(id);
465         }
466     }
467 
468     // The only PELs that will be sent when the
469     // host is full is from this timer callback.
470     if (!_hostFullTimer.isEnabled())
471     {
472         log<level::DEBUG>("Starting host full timer");
473         _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
474     }
475 }
476 
477 void HostNotifier::setBadPEL(uint32_t id)
478 {
479     log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id));
480 
481     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
482     if (sent != _sentPELs.end())
483     {
484         _sentPELs.erase(sent);
485     }
486 
487     _repo.setPELHostTransState(id, TransmissionState::badPEL);
488 }
489 
490 } // namespace openpower::pels
491