1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "host_notifier.hpp"
17 
18 #include <phosphor-logging/log.hpp>
19 
20 namespace openpower::pels
21 {
22 
23 const auto subscriptionName = "PELHostNotifier";
24 const size_t maxRetryAttempts = 15;
25 
26 using namespace phosphor::logging;
27 
28 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
29                            std::unique_ptr<HostInterface> hostIface) :
30     _repo(repo),
31     _dataIface(dataIface), _hostIface(std::move(hostIface)),
32     _retryTimer(_hostIface->getEvent(),
33                 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
34     _hostFullTimer(
35         _hostIface->getEvent(),
36         std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)),
37     _hostUpTimer(
38         _hostIface->getEvent(),
39         std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired), this))
40 {
41     // Subscribe to be told about new PELs.
42     _repo.subscribeToAdds(subscriptionName,
43                           std::bind(std::mem_fn(&HostNotifier::newLogCallback),
44                                     this, std::placeholders::_1));
45 
46     // Subscribe to be told about deleted PELs.
47     _repo.subscribeToDeletes(
48         subscriptionName,
49         std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this,
50                   std::placeholders::_1));
51 
52     // Add any existing PELs to the queue to send them if necessary.
53     _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
54                              std::placeholders::_1));
55 
56     // Subscribe to be told about host state changes.
57     _dataIface.subscribeToHostStateChange(
58         subscriptionName,
59         std::bind(std::mem_fun(&HostNotifier::hostStateChange), this,
60                   std::placeholders::_1));
61 
62     // Set the function to call when the async reponse is received.
63     _hostIface->setResponseFunction(
64         std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
65                   std::placeholders::_1));
66 
67     // Start sending logs if the host is running
68     if (!_pelQueue.empty() && _dataIface.isHostUp())
69     {
70         log<level::DEBUG>("Host is already up at startup");
71         _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
72     }
73 }
74 
75 HostNotifier::~HostNotifier()
76 {
77     _repo.unsubscribeFromAdds(subscriptionName);
78     _dataIface.unsubscribeFromHostStateChange(subscriptionName);
79 }
80 
81 void HostNotifier::hostUpTimerExpired()
82 {
83     log<level::DEBUG>("Host up timer expired");
84     doNewLogNotify();
85 }
86 
87 bool HostNotifier::addPELToQueue(const PEL& pel)
88 {
89     if (enqueueRequired(pel.id()))
90     {
91         _pelQueue.push_back(pel.id());
92     }
93 
94     // Return false so that Repo::for_each keeps going.
95     return false;
96 }
97 
98 bool HostNotifier::enqueueRequired(uint32_t id) const
99 {
100     bool required = true;
101     Repository::LogID i{Repository::LogID::Pel{id}};
102 
103     // Manufacturing testing may turn off sending up PELs
104     if (!_dataIface.getHostPELEnablement())
105     {
106         return false;
107     }
108 
109     if (auto attributes = _repo.getPELAttributes(i); attributes)
110     {
111         auto a = attributes.value().get();
112 
113         if ((a.hostState == TransmissionState::acked) ||
114             (a.hostState == TransmissionState::badPEL))
115         {
116             required = false;
117         }
118         else if (a.actionFlags.test(hiddenFlagBit) &&
119                  (a.hmcState == TransmissionState::acked))
120         {
121             required = false;
122         }
123         else if (a.actionFlags.test(dontReportToHostFlagBit))
124         {
125             required = false;
126         }
127     }
128     else
129     {
130         using namespace phosphor::logging;
131         log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository",
132                         entry("PEL_ID=0x%X", id));
133         required = false;
134     }
135 
136     return required;
137 }
138 
139 bool HostNotifier::notifyRequired(uint32_t id) const
140 {
141     bool notify = true;
142     Repository::LogID i{Repository::LogID::Pel{id}};
143 
144     if (auto attributes = _repo.getPELAttributes(i); attributes)
145     {
146         // If already acked by the host, don't send again.
147         // (A safety check as it shouldn't get to this point.)
148         auto a = attributes.value().get();
149         if (a.hostState == TransmissionState::acked)
150         {
151             notify = false;
152         }
153         else if (a.actionFlags.test(hiddenFlagBit))
154         {
155             // If hidden and acked (or will be) acked by the HMC,
156             // also don't send it. (HMC management can come and
157             // go at any time)
158             if ((a.hmcState == TransmissionState::acked) ||
159                 _dataIface.isHMCManaged())
160             {
161                 notify = false;
162             }
163         }
164     }
165     else
166     {
167         // Must have been deleted since put on the queue.
168         notify = false;
169     }
170 
171     return notify;
172 }
173 
174 void HostNotifier::newLogCallback(const PEL& pel)
175 {
176     if (!enqueueRequired(pel.id()))
177     {
178         return;
179     }
180 
181     log<level::DEBUG>("new PEL added to queue", entry("PEL_ID=0x%X", pel.id()));
182 
183     _pelQueue.push_back(pel.id());
184 
185     // Notify shouldn't happen if host is down, not up long enough, or full
186     if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled())
187     {
188         return;
189     }
190 
191     // Dispatch a command now if there isn't currently a command
192     // in progress and this is the first log in the queue or it
193     // previously gave up from a hard failure.
194     auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
195                       _retryTimer.isEnabled();
196 
197     auto firstPEL = _pelQueue.size() == 1;
198     auto gaveUp = _retryCount >= maxRetryAttempts;
199 
200     if (!inProgress && (firstPEL || gaveUp))
201     {
202         _retryCount = 0;
203 
204         // Send a log, but from the event loop, not from here.
205         scheduleDispatch();
206     }
207 }
208 
209 void HostNotifier::deleteLogCallback(uint32_t id)
210 {
211     auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id);
212     if (queueIt != _pelQueue.end())
213     {
214         log<level::DEBUG>("Host notifier removing deleted log from queue");
215         _pelQueue.erase(queueIt);
216     }
217 
218     auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id);
219     if (sentIt != _sentPELs.end())
220     {
221         log<level::DEBUG>("Host notifier removing deleted log from sent list");
222         _sentPELs.erase(sentIt);
223     }
224 
225     // Nothing we can do about this...
226     if (id == _inProgressPEL)
227     {
228         log<level::WARNING>(
229             "A PEL was deleted while its host notification was in progress",
230             entry("PEL_ID=0x%X", id));
231     }
232 }
233 
234 void HostNotifier::scheduleDispatch()
235 {
236     _dispatcher = std::make_unique<sdeventplus::source::Defer>(
237         _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
238                                           this, std::placeholders::_1));
239 }
240 
241 void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/)
242 {
243     _dispatcher.reset();
244 
245     doNewLogNotify();
246 }
247 
248 void HostNotifier::doNewLogNotify()
249 {
250     if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
251         _hostFullTimer.isEnabled())
252     {
253         return;
254     }
255 
256     if (_retryCount >= maxRetryAttempts)
257     {
258         // Give up until a new log comes in.
259         if (_retryCount == maxRetryAttempts)
260         {
261             // If this were to really happen, the PLDM interface
262             // would be down and isolating that shouldn't left to
263             // a logging daemon, so just trace.  Also, this will start
264             // trying again when the next new log comes in.
265             log<level::ERR>(
266                 "PEL Host notifier hit max retry attempts. Giving up for now.",
267                 entry("PEL_ID=0x%X", _pelQueue.front()));
268 
269             // Tell the host interface object to clean itself up, especially to
270             // release the PLDM instance ID it's been using.
271             _hostIface->cancelCmd();
272         }
273         return;
274     }
275 
276     bool doNotify = false;
277     uint32_t id = 0;
278 
279     // Find the PEL to send
280     while (!doNotify && !_pelQueue.empty())
281     {
282         id = _pelQueue.front();
283         _pelQueue.pop_front();
284 
285         if (notifyRequired(id))
286         {
287             doNotify = true;
288         }
289     }
290 
291     if (doNotify)
292     {
293         // Get the size using the repo attributes
294         Repository::LogID i{Repository::LogID::Pel{id}};
295         if (auto attributes = _repo.getPELAttributes(i); attributes)
296         {
297             auto size = static_cast<size_t>(
298                 std::filesystem::file_size((*attributes).get().path));
299 
300             log<level::DEBUG>("sendNewLogCmd", entry("PEL_ID=0x%X", id),
301                               entry("PEL_SIZE=%d", size));
302 
303             auto rc = _hostIface->sendNewLogCmd(id, size);
304 
305             if (rc == CmdStatus::success)
306             {
307                 _inProgressPEL = id;
308             }
309             else
310             {
311                 // It failed.  Retry
312                 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id));
313                 _pelQueue.push_front(id);
314                 _inProgressPEL = 0;
315                 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
316             }
317         }
318         else
319         {
320             log<level::ERR>("PEL ID not in repository.  Cannot notify host",
321                             entry("PEL_ID=0x%X", id));
322         }
323     }
324 }
325 
326 void HostNotifier::hostStateChange(bool hostUp)
327 {
328     _retryCount = 0;
329     _hostFull = false;
330 
331     if (hostUp && !_pelQueue.empty())
332     {
333         log<level::DEBUG>("Host state change to on");
334         _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
335     }
336     else if (!hostUp)
337     {
338         log<level::DEBUG>("Host state change to off");
339 
340         stopCommand();
341 
342         // Reset the state on any PELs that were sent but not acked back
343         // to new so they'll get sent again.
344         for (auto id : _sentPELs)
345         {
346             _pelQueue.push_back(id);
347             _repo.setPELHostTransState(id, TransmissionState::newPEL);
348         }
349 
350         _sentPELs.clear();
351 
352         if (_hostFullTimer.isEnabled())
353         {
354             _hostFullTimer.setEnabled(false);
355         }
356 
357         if (_hostUpTimer.isEnabled())
358         {
359             _hostUpTimer.setEnabled(false);
360         }
361     }
362 }
363 
364 void HostNotifier::commandResponse(ResponseStatus status)
365 {
366     auto id = _inProgressPEL;
367     _inProgressPEL = 0;
368 
369     if (status == ResponseStatus::success)
370     {
371         log<level::DEBUG>("HostNotifier command response success",
372                           entry("PEL_ID=0x%X", id));
373         _retryCount = 0;
374 
375         _sentPELs.push_back(id);
376 
377         _repo.setPELHostTransState(id, TransmissionState::sent);
378 
379         // If the host is full, don't send off the next PEL
380         if (!_hostFull && !_pelQueue.empty())
381         {
382             doNewLogNotify();
383         }
384     }
385     else
386     {
387         log<level::ERR>("PLDM command response failure",
388                         entry("PEL_ID=0x%X", id));
389         // Retry
390         _pelQueue.push_front(id);
391         _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
392     }
393 }
394 
395 void HostNotifier::retryTimerExpired()
396 {
397     if (_dataIface.isHostUp())
398     {
399         log<level::INFO>("Attempting command retry",
400                          entry("PEL_ID=0x%X", _pelQueue.front()));
401         _retryCount++;
402         doNewLogNotify();
403     }
404 }
405 
406 void HostNotifier::hostFullTimerExpired()
407 {
408     log<level::DEBUG>("Host full timer expired, trying send again");
409     doNewLogNotify();
410 }
411 
412 void HostNotifier::stopCommand()
413 {
414     _retryCount = 0;
415 
416     if (_inProgressPEL != 0)
417     {
418         _pelQueue.push_front(_inProgressPEL);
419         _inProgressPEL = 0;
420     }
421 
422     if (_retryTimer.isEnabled())
423     {
424         _retryTimer.setEnabled(false);
425     }
426 
427     // Ensure the PLDM instance ID is released
428     _hostIface->cancelCmd();
429 }
430 
431 void HostNotifier::ackPEL(uint32_t id)
432 {
433     _repo.setPELHostTransState(id, TransmissionState::acked);
434 
435     // No longer just 'sent', so remove it from the sent list.
436     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
437     if (sent != _sentPELs.end())
438     {
439         _sentPELs.erase(sent);
440     }
441 
442     // An ack means the host is no longer full
443     if (_hostFullTimer.isEnabled())
444     {
445         _hostFullTimer.setEnabled(false);
446     }
447 
448     if (_hostFull)
449     {
450         _hostFull = false;
451 
452         log<level::DEBUG>("Host previously full, not anymore after this ack");
453 
454         // Start sending PELs again, from the event loop
455         if (!_pelQueue.empty())
456         {
457             scheduleDispatch();
458         }
459     }
460 }
461 
462 void HostNotifier::setHostFull(uint32_t id)
463 {
464     log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id));
465 
466     _hostFull = true;
467 
468     // This PEL needs to get re-sent
469     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
470     if (sent != _sentPELs.end())
471     {
472         _sentPELs.erase(sent);
473         _repo.setPELHostTransState(id, TransmissionState::newPEL);
474 
475         if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
476             _pelQueue.end())
477         {
478             _pelQueue.push_front(id);
479         }
480     }
481 
482     // The only PELs that will be sent when the
483     // host is full is from this timer callback.
484     if (!_hostFullTimer.isEnabled())
485     {
486         log<level::DEBUG>("Starting host full timer");
487         _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
488     }
489 }
490 
491 void HostNotifier::setBadPEL(uint32_t id)
492 {
493     log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id));
494 
495     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
496     if (sent != _sentPELs.end())
497     {
498         _sentPELs.erase(sent);
499     }
500 
501     _repo.setPELHostTransState(id, TransmissionState::badPEL);
502 }
503 
504 } // namespace openpower::pels
505