1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "host_notifier.hpp"
17 
18 #include <phosphor-logging/log.hpp>
19 
20 namespace openpower::pels
21 {
22 
23 const auto subscriptionName = "PELHostNotifier";
24 const size_t maxRetryAttempts = 15;
25 
26 using namespace phosphor::logging;
27 
28 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
29                            std::unique_ptr<HostInterface> hostIface) :
30     _repo(repo),
31     _dataIface(dataIface), _hostIface(std::move(hostIface)),
32     _retryTimer(_hostIface->getEvent(),
33                 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
34     _hostFullTimer(
35         _hostIface->getEvent(),
36         std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this))
37 {
38     // Subscribe to be told about new PELs.
39     _repo.subscribeToAdds(subscriptionName,
40                           std::bind(std::mem_fn(&HostNotifier::newLogCallback),
41                                     this, std::placeholders::_1));
42 
43     // Add any existing PELs to the queue to send them if necessary.
44     _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
45                              std::placeholders::_1));
46 
47     // Subscribe to be told about host state changes.
48     _dataIface.subscribeToHostStateChange(
49         subscriptionName,
50         std::bind(std::mem_fun(&HostNotifier::hostStateChange), this,
51                   std::placeholders::_1));
52 
53     // Set the function to call when the async reponse is received.
54     _hostIface->setResponseFunction(
55         std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
56                   std::placeholders::_1));
57 
58     // Start sending logs if the host is running
59     if (!_pelQueue.empty() && _dataIface.isHostUp())
60     {
61         doNewLogNotify();
62     }
63 }
64 
65 HostNotifier::~HostNotifier()
66 {
67     _repo.unsubscribeFromAdds(subscriptionName);
68     _dataIface.unsubscribeFromHostStateChange(subscriptionName);
69 }
70 
71 bool HostNotifier::addPELToQueue(const PEL& pel)
72 {
73     if (enqueueRequired(pel.id()))
74     {
75         _pelQueue.push_back(pel.id());
76     }
77 
78     // Return false so that Repo::for_each keeps going.
79     return false;
80 }
81 
82 bool HostNotifier::enqueueRequired(uint32_t id) const
83 {
84     bool required = true;
85     Repository::LogID i{Repository::LogID::Pel{id}};
86 
87     if (auto attributes = _repo.getPELAttributes(i); attributes)
88     {
89         auto a = attributes.value().get();
90 
91         if ((a.hostState == TransmissionState::acked) ||
92             (a.hostState == TransmissionState::badPEL))
93         {
94             required = false;
95         }
96         else if (a.actionFlags.test(hiddenFlagBit) &&
97                  (a.hmcState == TransmissionState::acked))
98         {
99             required = false;
100         }
101         else if (a.actionFlags.test(dontReportToHostFlagBit))
102         {
103             required = false;
104         }
105     }
106     else
107     {
108         using namespace phosphor::logging;
109         log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository",
110                         entry("PEL_ID=0x%X", id));
111         required = false;
112     }
113 
114     return required;
115 }
116 
117 bool HostNotifier::notifyRequired(uint32_t id) const
118 {
119     bool notify = true;
120     Repository::LogID i{Repository::LogID::Pel{id}};
121 
122     if (auto attributes = _repo.getPELAttributes(i); attributes)
123     {
124         // If already acked by the host, don't send again.
125         // (A safety check as it shouldn't get to this point.)
126         auto a = attributes.value().get();
127         if (a.hostState == TransmissionState::acked)
128         {
129             notify = false;
130         }
131         else if (a.actionFlags.test(hiddenFlagBit))
132         {
133             // If hidden and acked (or will be) acked by the HMC,
134             // also don't send it. (HMC management can come and
135             // go at any time)
136             if ((a.hmcState == TransmissionState::acked) ||
137                 _dataIface.isHMCManaged())
138             {
139                 notify = false;
140             }
141         }
142     }
143     else
144     {
145         // Must have been deleted since put on the queue.
146         notify = false;
147     }
148 
149     return notify;
150 }
151 
152 void HostNotifier::newLogCallback(const PEL& pel)
153 {
154     if (!enqueueRequired(pel.id()))
155     {
156         return;
157     }
158 
159     _pelQueue.push_back(pel.id());
160 
161     // Notify shouldn't happen if host is down or full
162     if (!_dataIface.isHostUp() || _hostFull)
163     {
164         return;
165     }
166 
167     // Dispatch a command now if there isn't currently a command
168     // in progress and this is the first log in the queue or it
169     // previously gave up from a hard failure.
170     auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
171                       _retryTimer.isEnabled();
172 
173     auto firstPEL = _pelQueue.size() == 1;
174     auto gaveUp = _retryCount >= maxRetryAttempts;
175 
176     if (!inProgress && (firstPEL || gaveUp))
177     {
178         _retryCount = 0;
179 
180         // Send a log, but from the event loop, not from here.
181         scheduleDispatch();
182     }
183 }
184 
185 void HostNotifier::scheduleDispatch()
186 {
187     _dispatcher = std::make_unique<sdeventplus::source::Defer>(
188         _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
189                                           this, std::placeholders::_1));
190 }
191 
192 void HostNotifier::dispatch(sdeventplus::source::EventBase& source)
193 {
194     _dispatcher.reset();
195 
196     doNewLogNotify();
197 }
198 
199 void HostNotifier::doNewLogNotify()
200 {
201     if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
202         _hostFullTimer.isEnabled())
203     {
204         return;
205     }
206 
207     if (_retryCount >= maxRetryAttempts)
208     {
209         // Give up until a new log comes in.
210         if (_retryCount == maxRetryAttempts)
211         {
212             // If this were to really happen, the PLDM interface
213             // would be down and isolating that shouldn't left to
214             // a logging daemon, so just trace.  Also, this will start
215             // trying again when the next new log comes in.
216             log<level::ERR>(
217                 "PEL Host notifier hit max retry attempts. Giving up for now.",
218                 entry("PEL_ID=0x%X", _pelQueue.front()));
219         }
220         return;
221     }
222 
223     bool doNotify = false;
224     uint32_t id = 0;
225 
226     // Find the PEL to send
227     while (!doNotify && !_pelQueue.empty())
228     {
229         id = _pelQueue.front();
230         _pelQueue.pop_front();
231 
232         if (notifyRequired(id))
233         {
234             doNotify = true;
235         }
236     }
237 
238     if (doNotify)
239     {
240         // Get the size using the repo attributes
241         Repository::LogID i{Repository::LogID::Pel{id}};
242         if (auto attributes = _repo.getPELAttributes(i); attributes)
243         {
244             auto size = static_cast<size_t>(
245                 std::filesystem::file_size((*attributes).get().path));
246             auto rc = _hostIface->sendNewLogCmd(id, size);
247 
248             if (rc == CmdStatus::success)
249             {
250                 _inProgressPEL = id;
251             }
252             else
253             {
254                 // It failed.  Retry
255                 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id));
256                 _pelQueue.push_front(id);
257                 _inProgressPEL = 0;
258                 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
259             }
260         }
261         else
262         {
263             log<level::ERR>("PEL ID not in repository.  Cannot notify host",
264                             entry("PEL_ID=0x%X", id));
265         }
266     }
267 }
268 
269 void HostNotifier::hostStateChange(bool hostUp)
270 {
271     _retryCount = 0;
272     _hostFull = false;
273 
274     if (hostUp && !_pelQueue.empty())
275     {
276         doNewLogNotify();
277     }
278     else if (!hostUp)
279     {
280         stopCommand();
281 
282         // Reset the state on any PELs that were sent but not acked back
283         // to new so they'll get sent again.
284         for (auto id : _sentPELs)
285         {
286             _pelQueue.push_back(id);
287             _repo.setPELHostTransState(id, TransmissionState::newPEL);
288         }
289 
290         _sentPELs.clear();
291 
292         if (_hostFullTimer.isEnabled())
293         {
294             _hostFullTimer.setEnabled(false);
295         }
296     }
297 }
298 
299 void HostNotifier::commandResponse(ResponseStatus status)
300 {
301     auto id = _inProgressPEL;
302     _inProgressPEL = 0;
303 
304     if (status == ResponseStatus::success)
305     {
306         _retryCount = 0;
307 
308         _sentPELs.push_back(id);
309 
310         _repo.setPELHostTransState(id, TransmissionState::sent);
311 
312         // If the host is full, don't send off the next PEL
313         if (!_hostFull && !_pelQueue.empty())
314         {
315             doNewLogNotify();
316         }
317     }
318     else
319     {
320         log<level::ERR>("PLDM command response failure",
321                         entry("PEL_ID=0x%X", id));
322         // Retry
323         _pelQueue.push_front(id);
324         _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
325     }
326 }
327 
328 void HostNotifier::retryTimerExpired()
329 {
330     if (_dataIface.isHostUp())
331     {
332         log<level::INFO>("Attempting command retry",
333                          entry("PEL_ID=0x%X", _pelQueue.front()));
334         _retryCount++;
335         doNewLogNotify();
336     }
337 }
338 
339 void HostNotifier::hostFullTimerExpired()
340 {
341     doNewLogNotify();
342 }
343 
344 void HostNotifier::stopCommand()
345 {
346     _retryCount = 0;
347 
348     if (_inProgressPEL != 0)
349     {
350         _pelQueue.push_front(_inProgressPEL);
351         _inProgressPEL = 0;
352     }
353 
354     if (_retryTimer.isEnabled())
355     {
356         _retryTimer.setEnabled(false);
357     }
358 
359     if (_hostIface->cmdInProgress())
360     {
361         _hostIface->cancelCmd();
362     }
363 }
364 
365 void HostNotifier::ackPEL(uint32_t id)
366 {
367     _repo.setPELHostTransState(id, TransmissionState::acked);
368 
369     // No longer just 'sent', so remove it from the sent list.
370     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
371     if (sent != _sentPELs.end())
372     {
373         _sentPELs.erase(sent);
374     }
375 
376     // An ack means the host is no longer full
377     if (_hostFullTimer.isEnabled())
378     {
379         _hostFullTimer.setEnabled(false);
380     }
381 
382     if (_hostFull)
383     {
384         _hostFull = false;
385 
386         // Start sending PELs again, from the event loop
387         if (!_pelQueue.empty())
388         {
389             scheduleDispatch();
390         }
391     }
392 }
393 
394 void HostNotifier::setHostFull(uint32_t id)
395 {
396     log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id));
397 
398     _hostFull = true;
399 
400     // This PEL needs to get re-sent
401     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
402     if (sent != _sentPELs.end())
403     {
404         _sentPELs.erase(sent);
405         _repo.setPELHostTransState(id, TransmissionState::newPEL);
406 
407         if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
408             _pelQueue.end())
409         {
410             _pelQueue.push_front(id);
411         }
412     }
413 
414     // The only PELs that will be sent when the
415     // host is full is from this timer callback.
416     if (!_hostFullTimer.isEnabled())
417     {
418         _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
419     }
420 }
421 
422 void HostNotifier::setBadPEL(uint32_t id)
423 {
424     log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id));
425 
426     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
427     if (sent != _sentPELs.end())
428     {
429         _sentPELs.erase(sent);
430     }
431 
432     _repo.setPELHostTransState(id, TransmissionState::badPEL);
433 }
434 
435 } // namespace openpower::pels
436