1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "host_notifier.hpp"
17 
18 #include <phosphor-logging/log.hpp>
19 
20 namespace openpower::pels
21 {
22 
23 const auto subscriptionName = "PELHostNotifier";
24 const size_t maxRetryAttempts = 15;
25 
26 using namespace phosphor::logging;
27 
28 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
29                            std::unique_ptr<HostInterface> hostIface) :
30     _repo(repo),
31     _dataIface(dataIface), _hostIface(std::move(hostIface)),
32     _retryTimer(_hostIface->getEvent(),
33                 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
34     _hostFullTimer(
35         _hostIface->getEvent(),
36         std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this))
37 {
38     // Subscribe to be told about new PELs.
39     _repo.subscribeToAdds(subscriptionName,
40                           std::bind(std::mem_fn(&HostNotifier::newLogCallback),
41                                     this, std::placeholders::_1));
42 
43     // Subscribe to be told about deleted PELs.
44     _repo.subscribeToDeletes(
45         subscriptionName,
46         std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this,
47                   std::placeholders::_1));
48 
49     // Add any existing PELs to the queue to send them if necessary.
50     _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
51                              std::placeholders::_1));
52 
53     // Subscribe to be told about host state changes.
54     _dataIface.subscribeToHostStateChange(
55         subscriptionName,
56         std::bind(std::mem_fun(&HostNotifier::hostStateChange), this,
57                   std::placeholders::_1));
58 
59     // Set the function to call when the async reponse is received.
60     _hostIface->setResponseFunction(
61         std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
62                   std::placeholders::_1));
63 
64     // Start sending logs if the host is running
65     if (!_pelQueue.empty() && _dataIface.isHostUp())
66     {
67         doNewLogNotify();
68     }
69 }
70 
71 HostNotifier::~HostNotifier()
72 {
73     _repo.unsubscribeFromAdds(subscriptionName);
74     _dataIface.unsubscribeFromHostStateChange(subscriptionName);
75 }
76 
77 bool HostNotifier::addPELToQueue(const PEL& pel)
78 {
79     if (enqueueRequired(pel.id()))
80     {
81         _pelQueue.push_back(pel.id());
82     }
83 
84     // Return false so that Repo::for_each keeps going.
85     return false;
86 }
87 
88 bool HostNotifier::enqueueRequired(uint32_t id) const
89 {
90     bool required = true;
91     Repository::LogID i{Repository::LogID::Pel{id}};
92 
93     // Manufacturing testing may turn off sending up PELs
94     if (!_dataIface.getHostPELEnablement())
95     {
96         return false;
97     }
98 
99     if (auto attributes = _repo.getPELAttributes(i); attributes)
100     {
101         auto a = attributes.value().get();
102 
103         if ((a.hostState == TransmissionState::acked) ||
104             (a.hostState == TransmissionState::badPEL))
105         {
106             required = false;
107         }
108         else if (a.actionFlags.test(hiddenFlagBit) &&
109                  (a.hmcState == TransmissionState::acked))
110         {
111             required = false;
112         }
113         else if (a.actionFlags.test(dontReportToHostFlagBit))
114         {
115             required = false;
116         }
117     }
118     else
119     {
120         using namespace phosphor::logging;
121         log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository",
122                         entry("PEL_ID=0x%X", id));
123         required = false;
124     }
125 
126     return required;
127 }
128 
129 bool HostNotifier::notifyRequired(uint32_t id) const
130 {
131     bool notify = true;
132     Repository::LogID i{Repository::LogID::Pel{id}};
133 
134     if (auto attributes = _repo.getPELAttributes(i); attributes)
135     {
136         // If already acked by the host, don't send again.
137         // (A safety check as it shouldn't get to this point.)
138         auto a = attributes.value().get();
139         if (a.hostState == TransmissionState::acked)
140         {
141             notify = false;
142         }
143         else if (a.actionFlags.test(hiddenFlagBit))
144         {
145             // If hidden and acked (or will be) acked by the HMC,
146             // also don't send it. (HMC management can come and
147             // go at any time)
148             if ((a.hmcState == TransmissionState::acked) ||
149                 _dataIface.isHMCManaged())
150             {
151                 notify = false;
152             }
153         }
154     }
155     else
156     {
157         // Must have been deleted since put on the queue.
158         notify = false;
159     }
160 
161     return notify;
162 }
163 
164 void HostNotifier::newLogCallback(const PEL& pel)
165 {
166     if (!enqueueRequired(pel.id()))
167     {
168         return;
169     }
170 
171     _pelQueue.push_back(pel.id());
172 
173     // Notify shouldn't happen if host is down or full
174     if (!_dataIface.isHostUp() || _hostFull)
175     {
176         return;
177     }
178 
179     // Dispatch a command now if there isn't currently a command
180     // in progress and this is the first log in the queue or it
181     // previously gave up from a hard failure.
182     auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
183                       _retryTimer.isEnabled();
184 
185     auto firstPEL = _pelQueue.size() == 1;
186     auto gaveUp = _retryCount >= maxRetryAttempts;
187 
188     if (!inProgress && (firstPEL || gaveUp))
189     {
190         _retryCount = 0;
191 
192         // Send a log, but from the event loop, not from here.
193         scheduleDispatch();
194     }
195 }
196 
197 void HostNotifier::deleteLogCallback(uint32_t id)
198 {
199     auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id);
200     if (queueIt != _pelQueue.end())
201     {
202         log<level::DEBUG>("Host notifier removing deleted log from queue");
203         _pelQueue.erase(queueIt);
204     }
205 
206     auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id);
207     if (sentIt != _sentPELs.end())
208     {
209         log<level::DEBUG>("Host notifier removing deleted log from sent list");
210         _sentPELs.erase(sentIt);
211     }
212 
213     // Nothing we can do about this...
214     if (id == _inProgressPEL)
215     {
216         log<level::WARNING>(
217             "A PEL was deleted while its host notification was in progress",
218             entry("PEL_ID=0x%X", id));
219     }
220 }
221 
222 void HostNotifier::scheduleDispatch()
223 {
224     _dispatcher = std::make_unique<sdeventplus::source::Defer>(
225         _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
226                                           this, std::placeholders::_1));
227 }
228 
229 void HostNotifier::dispatch(sdeventplus::source::EventBase& source)
230 {
231     _dispatcher.reset();
232 
233     doNewLogNotify();
234 }
235 
236 void HostNotifier::doNewLogNotify()
237 {
238     if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
239         _hostFullTimer.isEnabled())
240     {
241         return;
242     }
243 
244     if (_retryCount >= maxRetryAttempts)
245     {
246         // Give up until a new log comes in.
247         if (_retryCount == maxRetryAttempts)
248         {
249             // If this were to really happen, the PLDM interface
250             // would be down and isolating that shouldn't left to
251             // a logging daemon, so just trace.  Also, this will start
252             // trying again when the next new log comes in.
253             log<level::ERR>(
254                 "PEL Host notifier hit max retry attempts. Giving up for now.",
255                 entry("PEL_ID=0x%X", _pelQueue.front()));
256 
257             // Tell the host interface object to clean itself up, especially to
258             // release the PLDM instance ID it's been using.
259             _hostIface->cancelCmd();
260         }
261         return;
262     }
263 
264     bool doNotify = false;
265     uint32_t id = 0;
266 
267     // Find the PEL to send
268     while (!doNotify && !_pelQueue.empty())
269     {
270         id = _pelQueue.front();
271         _pelQueue.pop_front();
272 
273         if (notifyRequired(id))
274         {
275             doNotify = true;
276         }
277     }
278 
279     if (doNotify)
280     {
281         // Get the size using the repo attributes
282         Repository::LogID i{Repository::LogID::Pel{id}};
283         if (auto attributes = _repo.getPELAttributes(i); attributes)
284         {
285             auto size = static_cast<size_t>(
286                 std::filesystem::file_size((*attributes).get().path));
287             auto rc = _hostIface->sendNewLogCmd(id, size);
288 
289             if (rc == CmdStatus::success)
290             {
291                 _inProgressPEL = id;
292             }
293             else
294             {
295                 // It failed.  Retry
296                 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id));
297                 _pelQueue.push_front(id);
298                 _inProgressPEL = 0;
299                 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
300             }
301         }
302         else
303         {
304             log<level::ERR>("PEL ID not in repository.  Cannot notify host",
305                             entry("PEL_ID=0x%X", id));
306         }
307     }
308 }
309 
310 void HostNotifier::hostStateChange(bool hostUp)
311 {
312     _retryCount = 0;
313     _hostFull = false;
314 
315     if (hostUp && !_pelQueue.empty())
316     {
317         doNewLogNotify();
318     }
319     else if (!hostUp)
320     {
321         stopCommand();
322 
323         // Reset the state on any PELs that were sent but not acked back
324         // to new so they'll get sent again.
325         for (auto id : _sentPELs)
326         {
327             _pelQueue.push_back(id);
328             _repo.setPELHostTransState(id, TransmissionState::newPEL);
329         }
330 
331         _sentPELs.clear();
332 
333         if (_hostFullTimer.isEnabled())
334         {
335             _hostFullTimer.setEnabled(false);
336         }
337     }
338 }
339 
340 void HostNotifier::commandResponse(ResponseStatus status)
341 {
342     auto id = _inProgressPEL;
343     _inProgressPEL = 0;
344 
345     if (status == ResponseStatus::success)
346     {
347         _retryCount = 0;
348 
349         _sentPELs.push_back(id);
350 
351         _repo.setPELHostTransState(id, TransmissionState::sent);
352 
353         // If the host is full, don't send off the next PEL
354         if (!_hostFull && !_pelQueue.empty())
355         {
356             doNewLogNotify();
357         }
358     }
359     else
360     {
361         log<level::ERR>("PLDM command response failure",
362                         entry("PEL_ID=0x%X", id));
363         // Retry
364         _pelQueue.push_front(id);
365         _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
366     }
367 }
368 
369 void HostNotifier::retryTimerExpired()
370 {
371     if (_dataIface.isHostUp())
372     {
373         log<level::INFO>("Attempting command retry",
374                          entry("PEL_ID=0x%X", _pelQueue.front()));
375         _retryCount++;
376         doNewLogNotify();
377     }
378 }
379 
380 void HostNotifier::hostFullTimerExpired()
381 {
382     doNewLogNotify();
383 }
384 
385 void HostNotifier::stopCommand()
386 {
387     _retryCount = 0;
388 
389     if (_inProgressPEL != 0)
390     {
391         _pelQueue.push_front(_inProgressPEL);
392         _inProgressPEL = 0;
393     }
394 
395     if (_retryTimer.isEnabled())
396     {
397         _retryTimer.setEnabled(false);
398     }
399 
400     // Ensure the PLDM instance ID is released
401     _hostIface->cancelCmd();
402 }
403 
404 void HostNotifier::ackPEL(uint32_t id)
405 {
406     _repo.setPELHostTransState(id, TransmissionState::acked);
407 
408     // No longer just 'sent', so remove it from the sent list.
409     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
410     if (sent != _sentPELs.end())
411     {
412         _sentPELs.erase(sent);
413     }
414 
415     // An ack means the host is no longer full
416     if (_hostFullTimer.isEnabled())
417     {
418         _hostFullTimer.setEnabled(false);
419     }
420 
421     if (_hostFull)
422     {
423         _hostFull = false;
424 
425         // Start sending PELs again, from the event loop
426         if (!_pelQueue.empty())
427         {
428             scheduleDispatch();
429         }
430     }
431 }
432 
433 void HostNotifier::setHostFull(uint32_t id)
434 {
435     log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id));
436 
437     _hostFull = true;
438 
439     // This PEL needs to get re-sent
440     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
441     if (sent != _sentPELs.end())
442     {
443         _sentPELs.erase(sent);
444         _repo.setPELHostTransState(id, TransmissionState::newPEL);
445 
446         if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
447             _pelQueue.end())
448         {
449             _pelQueue.push_front(id);
450         }
451     }
452 
453     // The only PELs that will be sent when the
454     // host is full is from this timer callback.
455     if (!_hostFullTimer.isEnabled())
456     {
457         _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
458     }
459 }
460 
461 void HostNotifier::setBadPEL(uint32_t id)
462 {
463     log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id));
464 
465     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
466     if (sent != _sentPELs.end())
467     {
468         _sentPELs.erase(sent);
469     }
470 
471     _repo.setPELHostTransState(id, TransmissionState::badPEL);
472 }
473 
474 } // namespace openpower::pels
475