1 /**
2  * Copyright © 2019 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "host_notifier.hpp"
17 
18 #include <phosphor-logging/lg2.hpp>
19 
20 namespace openpower::pels
21 {
22 
23 const auto subscriptionName = "PELHostNotifier";
24 const size_t maxRetryAttempts = 15;
25 
26 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface,
27                            std::unique_ptr<HostInterface> hostIface) :
28     _repo(repo),
29     _dataIface(dataIface), _hostIface(std::move(hostIface)),
30     _retryTimer(_hostIface->getEvent(),
31                 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)),
32     _hostFullTimer(
33         _hostIface->getEvent(),
34         std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)),
35     _hostUpTimer(
36         _hostIface->getEvent(),
37         std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired), this))
38 {
39     // Subscribe to be told about new PELs.
40     _repo.subscribeToAdds(subscriptionName,
41                           std::bind(std::mem_fn(&HostNotifier::newLogCallback),
42                                     this, std::placeholders::_1));
43 
44     // Subscribe to be told about deleted PELs.
45     _repo.subscribeToDeletes(
46         subscriptionName,
47         std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this,
48                   std::placeholders::_1));
49 
50     // Add any existing PELs to the queue to send them if necessary.
51     _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this,
52                              std::placeholders::_1));
53 
54     // Subscribe to be told about host state changes.
55     _dataIface.subscribeToHostStateChange(
56         subscriptionName, std::bind(std::mem_fn(&HostNotifier::hostStateChange),
57                                     this, std::placeholders::_1));
58 
59     // Set the function to call when the async reponse is received.
60     _hostIface->setResponseFunction(
61         std::bind(std::mem_fn(&HostNotifier::commandResponse), this,
62                   std::placeholders::_1));
63 
64     // Start sending logs if the host is running
65     if (!_pelQueue.empty() && _dataIface.isHostUp())
66     {
67         lg2::debug("Host is already up at startup");
68         _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
69     }
70 }
71 
72 HostNotifier::~HostNotifier()
73 {
74     _repo.unsubscribeFromAdds(subscriptionName);
75     _dataIface.unsubscribeFromHostStateChange(subscriptionName);
76 }
77 
78 void HostNotifier::hostUpTimerExpired()
79 {
80     lg2::debug("Host up timer expired");
81     doNewLogNotify();
82 }
83 
84 bool HostNotifier::addPELToQueue(const PEL& pel)
85 {
86     if (enqueueRequired(pel.id()))
87     {
88         _pelQueue.push_back(pel.id());
89     }
90 
91     // Return false so that Repo::for_each keeps going.
92     return false;
93 }
94 
95 bool HostNotifier::enqueueRequired(uint32_t id) const
96 {
97     bool required = true;
98     Repository::LogID i{Repository::LogID::Pel{id}};
99 
100     // Manufacturing testing may turn off sending up PELs
101     if (!_dataIface.getHostPELEnablement())
102     {
103         return false;
104     }
105 
106     if (auto attributes = _repo.getPELAttributes(i); attributes)
107     {
108         auto a = attributes.value().get();
109 
110         if ((a.hostState == TransmissionState::acked) ||
111             (a.hostState == TransmissionState::badPEL))
112         {
113             required = false;
114         }
115         else if (a.actionFlags.test(hiddenFlagBit) &&
116                  (a.hmcState == TransmissionState::acked))
117         {
118             required = false;
119         }
120         else if (a.actionFlags.test(dontReportToHostFlagBit))
121         {
122             required = false;
123         }
124     }
125     else
126     {
127         using namespace phosphor::logging;
128         lg2::error("Host Enqueue: Unable to find PEL ID {ID} in repository",
129                    "ID", lg2::hex, id);
130         required = false;
131     }
132 
133     return required;
134 }
135 
136 bool HostNotifier::notifyRequired(uint32_t id) const
137 {
138     bool notify = true;
139     Repository::LogID i{Repository::LogID::Pel{id}};
140 
141     if (auto attributes = _repo.getPELAttributes(i); attributes)
142     {
143         // If already acked by the host, don't send again.
144         // (A safety check as it shouldn't get to this point.)
145         auto a = attributes.value().get();
146         if (a.hostState == TransmissionState::acked)
147         {
148             notify = false;
149         }
150         else if (a.actionFlags.test(hiddenFlagBit))
151         {
152             // If hidden and acked (or will be) acked by the HMC,
153             // also don't send it. (HMC management can come and
154             // go at any time)
155             if ((a.hmcState == TransmissionState::acked) ||
156                 _dataIface.isHMCManaged())
157             {
158                 notify = false;
159             }
160         }
161     }
162     else
163     {
164         // Must have been deleted since put on the queue.
165         notify = false;
166     }
167 
168     return notify;
169 }
170 
171 void HostNotifier::newLogCallback(const PEL& pel)
172 {
173     if (!enqueueRequired(pel.id()))
174     {
175         return;
176     }
177 
178     lg2::debug("New PEL added to queue, PEL ID = {ID}", "ID", lg2::hex,
179                pel.id());
180 
181     _pelQueue.push_back(pel.id());
182 
183     // Notify shouldn't happen if host is down, not up long enough, or full
184     if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled())
185     {
186         return;
187     }
188 
189     // Dispatch a command now if there isn't currently a command
190     // in progress and this is the first log in the queue or it
191     // previously gave up from a hard failure.
192     auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() ||
193                       _retryTimer.isEnabled();
194 
195     auto firstPEL = _pelQueue.size() == 1;
196     auto gaveUp = _retryCount >= maxRetryAttempts;
197 
198     if (!inProgress && (firstPEL || gaveUp))
199     {
200         _retryCount = 0;
201 
202         // Send a log, but from the event loop, not from here.
203         scheduleDispatch();
204     }
205 }
206 
207 void HostNotifier::deleteLogCallback(uint32_t id)
208 {
209     auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id);
210     if (queueIt != _pelQueue.end())
211     {
212         lg2::debug("Host notifier removing deleted log from queue");
213         _pelQueue.erase(queueIt);
214     }
215 
216     auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id);
217     if (sentIt != _sentPELs.end())
218     {
219         lg2::debug("Host notifier removing deleted log from sent list");
220         _sentPELs.erase(sentIt);
221     }
222 
223     // Nothing we can do about this...
224     if (id == _inProgressPEL)
225     {
226         lg2::warning(
227             "A PEL was deleted while its host notification was in progress, PEL ID = {ID}",
228             "ID", lg2::hex, id);
229     }
230 }
231 
232 void HostNotifier::scheduleDispatch()
233 {
234     _dispatcher = std::make_unique<sdeventplus::source::Defer>(
235         _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch),
236                                           this, std::placeholders::_1));
237 }
238 
239 void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/)
240 {
241     _dispatcher.reset();
242 
243     doNewLogNotify();
244 }
245 
246 void HostNotifier::doNewLogNotify()
247 {
248     if (!_dataIface.isHostUp() || _retryTimer.isEnabled() ||
249         _hostFullTimer.isEnabled())
250     {
251         return;
252     }
253 
254     if (_retryCount >= maxRetryAttempts)
255     {
256         // Give up until a new log comes in.
257         if (_retryCount == maxRetryAttempts)
258         {
259             // If this were to really happen, the PLDM interface
260             // would be down and isolating that shouldn't left to
261             // a logging daemon, so just trace.  Also, this will start
262             // trying again when the next new log comes in.
263             lg2::error(
264                 "PEL Host notifier hit max retry attempts. Giving up for now. PEL ID = {ID}",
265                 "ID", lg2::hex, _pelQueue.front());
266 
267             // Tell the host interface object to clean itself up, especially to
268             // release the PLDM instance ID it's been using.
269             _hostIface->cancelCmd();
270         }
271         return;
272     }
273 
274     bool doNotify = false;
275     uint32_t id = 0;
276 
277     // Find the PEL to send
278     while (!doNotify && !_pelQueue.empty())
279     {
280         id = _pelQueue.front();
281         _pelQueue.pop_front();
282 
283         if (notifyRequired(id))
284         {
285             doNotify = true;
286         }
287     }
288 
289     if (doNotify)
290     {
291         // Get the size using the repo attributes
292         Repository::LogID i{Repository::LogID::Pel{id}};
293         if (auto attributes = _repo.getPELAttributes(i); attributes)
294         {
295             auto size = static_cast<size_t>(
296                 std::filesystem::file_size((*attributes).get().path));
297 
298             lg2::debug("sendNewLogCmd: ID {ID} size {SIZE}", "ID", lg2::hex, id,
299                        "SIZE", size);
300 
301             auto rc = _hostIface->sendNewLogCmd(id, size);
302 
303             if (rc == CmdStatus::success)
304             {
305                 _inProgressPEL = id;
306             }
307             else
308             {
309                 // It failed.  Retry
310                 lg2::error("PLDM send failed, PEL ID = {ID}", "ID", lg2::hex,
311                            id);
312                 _pelQueue.push_front(id);
313                 _inProgressPEL = 0;
314                 _retryTimer.restartOnce(_hostIface->getSendRetryDelay());
315             }
316         }
317         else
318         {
319             lg2::error(
320                 "PEL ID is not in repository. Cannot notify host. PEL ID = {ID}",
321                 "ID", lg2::hex, id);
322         }
323     }
324 }
325 
326 void HostNotifier::hostStateChange(bool hostUp)
327 {
328     _retryCount = 0;
329     _hostFull = false;
330 
331     if (hostUp && !_pelQueue.empty())
332     {
333         lg2::debug("Host state change to on");
334         _hostUpTimer.restartOnce(_hostIface->getHostUpDelay());
335     }
336     else if (!hostUp)
337     {
338         lg2::debug("Host state change to off");
339 
340         stopCommand();
341 
342         // Reset the state on any PELs that were sent but not acked back
343         // to new so they'll get sent again.
344         for (auto id : _sentPELs)
345         {
346             _pelQueue.push_back(id);
347             _repo.setPELHostTransState(id, TransmissionState::newPEL);
348         }
349 
350         _sentPELs.clear();
351 
352         if (_hostFullTimer.isEnabled())
353         {
354             _hostFullTimer.setEnabled(false);
355         }
356 
357         if (_hostUpTimer.isEnabled())
358         {
359             _hostUpTimer.setEnabled(false);
360         }
361     }
362 }
363 
364 void HostNotifier::commandResponse(ResponseStatus status)
365 {
366     auto id = _inProgressPEL;
367     _inProgressPEL = 0;
368 
369     if (status == ResponseStatus::success)
370     {
371         lg2::debug("HostNotifier command response success, PEL ID = {ID}", "ID",
372                    lg2::hex, id);
373         _retryCount = 0;
374 
375         _sentPELs.push_back(id);
376 
377         _repo.setPELHostTransState(id, TransmissionState::sent);
378 
379         // If the host is full, don't send off the next PEL
380         if (!_hostFull && !_pelQueue.empty())
381         {
382             doNewLogNotify();
383         }
384     }
385     else
386     {
387         lg2::error("PLDM command response failure, PEL ID = {ID}", "ID",
388                    lg2::hex, id);
389         // Retry
390         _pelQueue.push_front(id);
391         _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay());
392     }
393 }
394 
395 void HostNotifier::retryTimerExpired()
396 {
397     if (_dataIface.isHostUp())
398     {
399         lg2::info("Attempting command retry, PEL ID = {ID}", "ID", lg2::hex,
400                   _pelQueue.front());
401         _retryCount++;
402         doNewLogNotify();
403     }
404 }
405 
406 void HostNotifier::hostFullTimerExpired()
407 {
408     lg2::debug("Host full timer expired, trying send again");
409     doNewLogNotify();
410 }
411 
412 void HostNotifier::stopCommand()
413 {
414     _retryCount = 0;
415 
416     if (_inProgressPEL != 0)
417     {
418         _pelQueue.push_front(_inProgressPEL);
419         _inProgressPEL = 0;
420     }
421 
422     if (_retryTimer.isEnabled())
423     {
424         _retryTimer.setEnabled(false);
425     }
426 
427     // Ensure the PLDM instance ID is released
428     _hostIface->cancelCmd();
429 }
430 
431 void HostNotifier::ackPEL(uint32_t id)
432 {
433     _repo.setPELHostTransState(id, TransmissionState::acked);
434 
435     // No longer just 'sent', so remove it from the sent list.
436     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
437     if (sent != _sentPELs.end())
438     {
439         _sentPELs.erase(sent);
440     }
441 
442     // An ack means the host is no longer full
443     if (_hostFullTimer.isEnabled())
444     {
445         _hostFullTimer.setEnabled(false);
446     }
447 
448     if (_hostFull)
449     {
450         _hostFull = false;
451 
452         lg2::debug("Host previously full, not anymore after this ack");
453 
454         // Start sending PELs again, from the event loop
455         if (!_pelQueue.empty())
456         {
457             scheduleDispatch();
458         }
459     }
460 }
461 
462 void HostNotifier::setHostFull(uint32_t id)
463 {
464     lg2::debug("Received Host full indication, PEL ID = {ID}", "ID", lg2::hex,
465                id);
466 
467     _hostFull = true;
468 
469     // This PEL needs to get re-sent
470     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
471     if (sent != _sentPELs.end())
472     {
473         _sentPELs.erase(sent);
474         _repo.setPELHostTransState(id, TransmissionState::newPEL);
475 
476         if (std::find(_pelQueue.begin(), _pelQueue.end(), id) ==
477             _pelQueue.end())
478         {
479             _pelQueue.push_front(id);
480         }
481     }
482 
483     // The only PELs that will be sent when the
484     // host is full is from this timer callback.
485     if (!_hostFullTimer.isEnabled())
486     {
487         lg2::debug("Starting host full timer");
488         _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay());
489     }
490 }
491 
492 void HostNotifier::setBadPEL(uint32_t id)
493 {
494     lg2::error("PEL rejected by the host, PEL ID = {ID}", "ID", lg2::hex, id);
495 
496     auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id);
497     if (sent != _sentPELs.end())
498     {
499         _sentPELs.erase(sent);
500     }
501 
502     _repo.setPELHostTransState(id, TransmissionState::badPEL);
503 }
504 
505 } // namespace openpower::pels
506