1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "host_notifier.hpp" 17 18 #include <phosphor-logging/lg2.hpp> 19 20 namespace openpower::pels 21 { 22 23 const auto subscriptionName = "PELHostNotifier"; 24 const size_t maxRetryAttempts = 15; 25 26 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface, 27 std::unique_ptr<HostInterface> hostIface) : 28 _repo(repo), _dataIface(dataIface), _hostIface(std::move(hostIface)), 29 _retryTimer(_hostIface->getEvent(), 30 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)), 31 _hostFullTimer( 32 _hostIface->getEvent(), 33 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)), 34 _hostUpTimer(_hostIface->getEvent(), 35 std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired), 36 this)) 37 { 38 // Subscribe to be told about new PELs. 39 _repo.subscribeToAdds(subscriptionName, 40 std::bind(std::mem_fn(&HostNotifier::newLogCallback), 41 this, std::placeholders::_1)); 42 43 // Subscribe to be told about deleted PELs. 44 _repo.subscribeToDeletes( 45 subscriptionName, 46 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this, 47 std::placeholders::_1)); 48 49 // Add any existing PELs to the queue to send them if necessary. 50 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this, 51 std::placeholders::_1)); 52 53 // Subscribe to be told about host state changes. 54 _dataIface.subscribeToHostStateChange( 55 subscriptionName, std::bind(std::mem_fn(&HostNotifier::hostStateChange), 56 this, std::placeholders::_1)); 57 58 // Set the function to call when the async reponse is received. 59 _hostIface->setResponseFunction( 60 std::bind(std::mem_fn(&HostNotifier::commandResponse), this, 61 std::placeholders::_1)); 62 63 // Start sending logs if the host is running 64 if (!_pelQueue.empty() && _dataIface.isHostUp()) 65 { 66 lg2::debug("Host is already up at startup"); 67 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay()); 68 } 69 } 70 71 HostNotifier::~HostNotifier() 72 { 73 _repo.unsubscribeFromAdds(subscriptionName); 74 _dataIface.unsubscribeFromHostStateChange(subscriptionName); 75 } 76 77 void HostNotifier::hostUpTimerExpired() 78 { 79 lg2::debug("Host up timer expired"); 80 doNewLogNotify(); 81 } 82 83 bool HostNotifier::addPELToQueue(const PEL& pel) 84 { 85 if (enqueueRequired(pel.id())) 86 { 87 _pelQueue.push_back(pel.id()); 88 } 89 90 // Return false so that Repo::for_each keeps going. 91 return false; 92 } 93 94 bool HostNotifier::enqueueRequired(uint32_t id) const 95 { 96 bool required = true; 97 Repository::LogID i{Repository::LogID::Pel{id}}; 98 99 // Manufacturing testing may turn off sending up PELs 100 if (!_dataIface.getHostPELEnablement()) 101 { 102 return false; 103 } 104 105 if (auto attributes = _repo.getPELAttributes(i); attributes) 106 { 107 auto a = attributes.value().get(); 108 109 if ((a.hostState == TransmissionState::acked) || 110 (a.hostState == TransmissionState::badPEL)) 111 { 112 required = false; 113 } 114 else if (a.actionFlags.test(hiddenFlagBit) && 115 (a.hmcState == TransmissionState::acked)) 116 { 117 required = false; 118 } 119 else if (a.actionFlags.test(dontReportToHostFlagBit)) 120 { 121 required = false; 122 } 123 } 124 else 125 { 126 using namespace phosphor::logging; 127 lg2::error("Host Enqueue: Unable to find PEL ID {ID} in repository", 128 "ID", lg2::hex, id); 129 required = false; 130 } 131 132 return required; 133 } 134 135 bool HostNotifier::notifyRequired(uint32_t id) const 136 { 137 bool notify = true; 138 Repository::LogID i{Repository::LogID::Pel{id}}; 139 140 if (auto attributes = _repo.getPELAttributes(i); attributes) 141 { 142 // If already acked by the host, don't send again. 143 // (A safety check as it shouldn't get to this point.) 144 auto a = attributes.value().get(); 145 if (a.hostState == TransmissionState::acked) 146 { 147 notify = false; 148 } 149 else if (a.actionFlags.test(hiddenFlagBit)) 150 { 151 // If hidden and acked (or will be) acked by the HMC, 152 // also don't send it. (HMC management can come and 153 // go at any time) 154 if ((a.hmcState == TransmissionState::acked) || 155 _dataIface.isHMCManaged()) 156 { 157 notify = false; 158 } 159 } 160 } 161 else 162 { 163 // Must have been deleted since put on the queue. 164 notify = false; 165 } 166 167 return notify; 168 } 169 170 void HostNotifier::newLogCallback(const PEL& pel) 171 { 172 if (!enqueueRequired(pel.id())) 173 { 174 return; 175 } 176 177 lg2::debug("New PEL added to queue, PEL ID = {ID}", "ID", lg2::hex, 178 pel.id()); 179 180 _pelQueue.push_back(pel.id()); 181 182 // Notify shouldn't happen if host is down, not up long enough, or full 183 if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled()) 184 { 185 return; 186 } 187 188 // Dispatch a command now if there isn't currently a command 189 // in progress and this is the first log in the queue or it 190 // previously gave up from a hard failure. 191 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() || 192 _retryTimer.isEnabled(); 193 194 auto firstPEL = _pelQueue.size() == 1; 195 auto gaveUp = _retryCount >= maxRetryAttempts; 196 197 if (!inProgress && (firstPEL || gaveUp)) 198 { 199 _retryCount = 0; 200 201 // Send a log, but from the event loop, not from here. 202 scheduleDispatch(); 203 } 204 } 205 206 void HostNotifier::deleteLogCallback(uint32_t id) 207 { 208 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id); 209 if (queueIt != _pelQueue.end()) 210 { 211 lg2::debug("Host notifier removing deleted log from queue"); 212 _pelQueue.erase(queueIt); 213 } 214 215 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id); 216 if (sentIt != _sentPELs.end()) 217 { 218 lg2::debug("Host notifier removing deleted log from sent list"); 219 _sentPELs.erase(sentIt); 220 } 221 222 // Nothing we can do about this... 223 if (id == _inProgressPEL) 224 { 225 lg2::warning( 226 "A PEL was deleted while its host notification was in progress, PEL ID = {ID}", 227 "ID", lg2::hex, id); 228 } 229 } 230 231 void HostNotifier::scheduleDispatch() 232 { 233 _dispatcher = std::make_unique<sdeventplus::source::Defer>( 234 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch), 235 this, std::placeholders::_1)); 236 } 237 238 void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/) 239 { 240 _dispatcher.reset(); 241 242 doNewLogNotify(); 243 } 244 245 void HostNotifier::doNewLogNotify() 246 { 247 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() || 248 _hostFullTimer.isEnabled()) 249 { 250 return; 251 } 252 253 if (_retryCount >= maxRetryAttempts) 254 { 255 // Give up until a new log comes in. 256 if (_retryCount == maxRetryAttempts) 257 { 258 // If this were to really happen, the PLDM interface 259 // would be down and isolating that shouldn't left to 260 // a logging daemon, so just trace. Also, this will start 261 // trying again when the next new log comes in. 262 lg2::error( 263 "PEL Host notifier hit max retry attempts. Giving up for now. PEL ID = {ID}", 264 "ID", lg2::hex, _pelQueue.front()); 265 266 // Tell the host interface object to clean itself up, especially to 267 // release the PLDM instance ID it's been using. 268 _hostIface->cancelCmd(); 269 } 270 return; 271 } 272 273 bool doNotify = false; 274 uint32_t id = 0; 275 276 // Find the PEL to send 277 while (!doNotify && !_pelQueue.empty()) 278 { 279 id = _pelQueue.front(); 280 _pelQueue.pop_front(); 281 282 if (notifyRequired(id)) 283 { 284 doNotify = true; 285 } 286 } 287 288 if (doNotify) 289 { 290 // Get the size using the repo attributes 291 Repository::LogID i{Repository::LogID::Pel{id}}; 292 if (auto attributes = _repo.getPELAttributes(i); attributes) 293 { 294 auto size = static_cast<size_t>( 295 std::filesystem::file_size((*attributes).get().path)); 296 297 lg2::debug("sendNewLogCmd: ID {ID} size {SIZE}", "ID", lg2::hex, id, 298 "SIZE", size); 299 300 auto rc = _hostIface->sendNewLogCmd(id, size); 301 302 if (rc == CmdStatus::success) 303 { 304 _inProgressPEL = id; 305 } 306 else 307 { 308 // It failed. Retry 309 lg2::error("PLDM send failed, PEL ID = {ID}", "ID", lg2::hex, 310 id); 311 _pelQueue.push_front(id); 312 _inProgressPEL = 0; 313 _retryTimer.restartOnce(_hostIface->getSendRetryDelay()); 314 } 315 } 316 else 317 { 318 lg2::error( 319 "PEL ID is not in repository. Cannot notify host. PEL ID = {ID}", 320 "ID", lg2::hex, id); 321 } 322 } 323 } 324 325 void HostNotifier::hostStateChange(bool hostUp) 326 { 327 _retryCount = 0; 328 _hostFull = false; 329 330 if (hostUp && !_pelQueue.empty()) 331 { 332 lg2::debug("Host state change to on"); 333 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay()); 334 } 335 else if (!hostUp) 336 { 337 lg2::debug("Host state change to off"); 338 339 stopCommand(); 340 341 // Reset the state on any PELs that were sent but not acked back 342 // to new so they'll get sent again. 343 for (auto id : _sentPELs) 344 { 345 _pelQueue.push_back(id); 346 _repo.setPELHostTransState(id, TransmissionState::newPEL); 347 } 348 349 _sentPELs.clear(); 350 351 if (_hostFullTimer.isEnabled()) 352 { 353 _hostFullTimer.setEnabled(false); 354 } 355 356 if (_hostUpTimer.isEnabled()) 357 { 358 _hostUpTimer.setEnabled(false); 359 } 360 } 361 } 362 363 void HostNotifier::commandResponse(ResponseStatus status) 364 { 365 auto id = _inProgressPEL; 366 _inProgressPEL = 0; 367 368 if (status == ResponseStatus::success) 369 { 370 lg2::debug("HostNotifier command response success, PEL ID = {ID}", "ID", 371 lg2::hex, id); 372 _retryCount = 0; 373 374 _sentPELs.push_back(id); 375 376 _repo.setPELHostTransState(id, TransmissionState::sent); 377 378 // If the host is full, don't send off the next PEL 379 if (!_hostFull && !_pelQueue.empty()) 380 { 381 doNewLogNotify(); 382 } 383 } 384 else 385 { 386 lg2::error("PLDM command response failure, PEL ID = {ID}", "ID", 387 lg2::hex, id); 388 // Retry 389 _pelQueue.push_front(id); 390 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay()); 391 } 392 } 393 394 void HostNotifier::retryTimerExpired() 395 { 396 if (_dataIface.isHostUp()) 397 { 398 lg2::info("Attempting command retry, PEL ID = {ID}", "ID", lg2::hex, 399 _pelQueue.front()); 400 _retryCount++; 401 doNewLogNotify(); 402 } 403 } 404 405 void HostNotifier::hostFullTimerExpired() 406 { 407 lg2::debug("Host full timer expired, trying send again"); 408 doNewLogNotify(); 409 } 410 411 void HostNotifier::stopCommand() 412 { 413 _retryCount = 0; 414 415 if (_inProgressPEL != 0) 416 { 417 _pelQueue.push_front(_inProgressPEL); 418 _inProgressPEL = 0; 419 } 420 421 if (_retryTimer.isEnabled()) 422 { 423 _retryTimer.setEnabled(false); 424 } 425 426 // Ensure the PLDM instance ID is released 427 _hostIface->cancelCmd(); 428 } 429 430 void HostNotifier::ackPEL(uint32_t id) 431 { 432 _repo.setPELHostTransState(id, TransmissionState::acked); 433 434 // No longer just 'sent', so remove it from the sent list. 435 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 436 if (sent != _sentPELs.end()) 437 { 438 _sentPELs.erase(sent); 439 } 440 441 // An ack means the host is no longer full 442 if (_hostFullTimer.isEnabled()) 443 { 444 _hostFullTimer.setEnabled(false); 445 } 446 447 if (_hostFull) 448 { 449 _hostFull = false; 450 451 lg2::debug("Host previously full, not anymore after this ack"); 452 453 // Start sending PELs again, from the event loop 454 if (!_pelQueue.empty()) 455 { 456 scheduleDispatch(); 457 } 458 } 459 } 460 461 void HostNotifier::setHostFull(uint32_t id) 462 { 463 lg2::debug("Received Host full indication, PEL ID = {ID}", "ID", lg2::hex, 464 id); 465 466 _hostFull = true; 467 468 // This PEL needs to get re-sent 469 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 470 if (sent != _sentPELs.end()) 471 { 472 _sentPELs.erase(sent); 473 _repo.setPELHostTransState(id, TransmissionState::newPEL); 474 475 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) == 476 _pelQueue.end()) 477 { 478 _pelQueue.push_front(id); 479 } 480 } 481 482 // The only PELs that will be sent when the 483 // host is full is from this timer callback. 484 if (!_hostFullTimer.isEnabled()) 485 { 486 lg2::debug("Starting host full timer"); 487 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay()); 488 } 489 } 490 491 void HostNotifier::setBadPEL(uint32_t id) 492 { 493 lg2::error("PEL rejected by the host, PEL ID = {ID}", "ID", lg2::hex, id); 494 495 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 496 if (sent != _sentPELs.end()) 497 { 498 _sentPELs.erase(sent); 499 } 500 501 _repo.setPELHostTransState(id, TransmissionState::badPEL); 502 } 503 504 } // namespace openpower::pels 505