1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "host_notifier.hpp" 17 18 #include <phosphor-logging/lg2.hpp> 19 20 namespace openpower::pels 21 { 22 23 const auto subscriptionName = "PELHostNotifier"; 24 const size_t maxRetryAttempts = 15; 25 26 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface, 27 std::unique_ptr<HostInterface> hostIface) : 28 _repo(repo), 29 _dataIface(dataIface), _hostIface(std::move(hostIface)), 30 _retryTimer(_hostIface->getEvent(), 31 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)), 32 _hostFullTimer( 33 _hostIface->getEvent(), 34 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)), 35 _hostUpTimer( 36 _hostIface->getEvent(), 37 std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired), this)) 38 { 39 // Subscribe to be told about new PELs. 40 _repo.subscribeToAdds(subscriptionName, 41 std::bind(std::mem_fn(&HostNotifier::newLogCallback), 42 this, std::placeholders::_1)); 43 44 // Subscribe to be told about deleted PELs. 45 _repo.subscribeToDeletes( 46 subscriptionName, 47 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this, 48 std::placeholders::_1)); 49 50 // Add any existing PELs to the queue to send them if necessary. 51 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this, 52 std::placeholders::_1)); 53 54 // Subscribe to be told about host state changes. 55 _dataIface.subscribeToHostStateChange( 56 subscriptionName, std::bind(std::mem_fn(&HostNotifier::hostStateChange), 57 this, std::placeholders::_1)); 58 59 // Set the function to call when the async reponse is received. 60 _hostIface->setResponseFunction( 61 std::bind(std::mem_fn(&HostNotifier::commandResponse), this, 62 std::placeholders::_1)); 63 64 // Start sending logs if the host is running 65 if (!_pelQueue.empty() && _dataIface.isHostUp()) 66 { 67 lg2::debug("Host is already up at startup"); 68 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay()); 69 } 70 } 71 72 HostNotifier::~HostNotifier() 73 { 74 _repo.unsubscribeFromAdds(subscriptionName); 75 _dataIface.unsubscribeFromHostStateChange(subscriptionName); 76 } 77 78 void HostNotifier::hostUpTimerExpired() 79 { 80 lg2::debug("Host up timer expired"); 81 doNewLogNotify(); 82 } 83 84 bool HostNotifier::addPELToQueue(const PEL& pel) 85 { 86 if (enqueueRequired(pel.id())) 87 { 88 _pelQueue.push_back(pel.id()); 89 } 90 91 // Return false so that Repo::for_each keeps going. 92 return false; 93 } 94 95 bool HostNotifier::enqueueRequired(uint32_t id) const 96 { 97 bool required = true; 98 Repository::LogID i{Repository::LogID::Pel{id}}; 99 100 // Manufacturing testing may turn off sending up PELs 101 if (!_dataIface.getHostPELEnablement()) 102 { 103 return false; 104 } 105 106 if (auto attributes = _repo.getPELAttributes(i); attributes) 107 { 108 auto a = attributes.value().get(); 109 110 if ((a.hostState == TransmissionState::acked) || 111 (a.hostState == TransmissionState::badPEL)) 112 { 113 required = false; 114 } 115 else if (a.actionFlags.test(hiddenFlagBit) && 116 (a.hmcState == TransmissionState::acked)) 117 { 118 required = false; 119 } 120 else if (a.actionFlags.test(dontReportToHostFlagBit)) 121 { 122 required = false; 123 } 124 } 125 else 126 { 127 using namespace phosphor::logging; 128 lg2::error("Host Enqueue: Unable to find PEL ID {ID} in repository", 129 "ID", lg2::hex, id); 130 required = false; 131 } 132 133 return required; 134 } 135 136 bool HostNotifier::notifyRequired(uint32_t id) const 137 { 138 bool notify = true; 139 Repository::LogID i{Repository::LogID::Pel{id}}; 140 141 if (auto attributes = _repo.getPELAttributes(i); attributes) 142 { 143 // If already acked by the host, don't send again. 144 // (A safety check as it shouldn't get to this point.) 145 auto a = attributes.value().get(); 146 if (a.hostState == TransmissionState::acked) 147 { 148 notify = false; 149 } 150 else if (a.actionFlags.test(hiddenFlagBit)) 151 { 152 // If hidden and acked (or will be) acked by the HMC, 153 // also don't send it. (HMC management can come and 154 // go at any time) 155 if ((a.hmcState == TransmissionState::acked) || 156 _dataIface.isHMCManaged()) 157 { 158 notify = false; 159 } 160 } 161 } 162 else 163 { 164 // Must have been deleted since put on the queue. 165 notify = false; 166 } 167 168 return notify; 169 } 170 171 void HostNotifier::newLogCallback(const PEL& pel) 172 { 173 if (!enqueueRequired(pel.id())) 174 { 175 return; 176 } 177 178 lg2::debug("New PEL added to queue, PEL ID = {ID}", "ID", lg2::hex, 179 pel.id()); 180 181 _pelQueue.push_back(pel.id()); 182 183 // Notify shouldn't happen if host is down, not up long enough, or full 184 if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled()) 185 { 186 return; 187 } 188 189 // Dispatch a command now if there isn't currently a command 190 // in progress and this is the first log in the queue or it 191 // previously gave up from a hard failure. 192 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() || 193 _retryTimer.isEnabled(); 194 195 auto firstPEL = _pelQueue.size() == 1; 196 auto gaveUp = _retryCount >= maxRetryAttempts; 197 198 if (!inProgress && (firstPEL || gaveUp)) 199 { 200 _retryCount = 0; 201 202 // Send a log, but from the event loop, not from here. 203 scheduleDispatch(); 204 } 205 } 206 207 void HostNotifier::deleteLogCallback(uint32_t id) 208 { 209 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id); 210 if (queueIt != _pelQueue.end()) 211 { 212 lg2::debug("Host notifier removing deleted log from queue"); 213 _pelQueue.erase(queueIt); 214 } 215 216 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id); 217 if (sentIt != _sentPELs.end()) 218 { 219 lg2::debug("Host notifier removing deleted log from sent list"); 220 _sentPELs.erase(sentIt); 221 } 222 223 // Nothing we can do about this... 224 if (id == _inProgressPEL) 225 { 226 lg2::warning( 227 "A PEL was deleted while its host notification was in progress, PEL ID = {ID}", 228 "ID", lg2::hex, id); 229 } 230 } 231 232 void HostNotifier::scheduleDispatch() 233 { 234 _dispatcher = std::make_unique<sdeventplus::source::Defer>( 235 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch), 236 this, std::placeholders::_1)); 237 } 238 239 void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/) 240 { 241 _dispatcher.reset(); 242 243 doNewLogNotify(); 244 } 245 246 void HostNotifier::doNewLogNotify() 247 { 248 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() || 249 _hostFullTimer.isEnabled()) 250 { 251 return; 252 } 253 254 if (_retryCount >= maxRetryAttempts) 255 { 256 // Give up until a new log comes in. 257 if (_retryCount == maxRetryAttempts) 258 { 259 // If this were to really happen, the PLDM interface 260 // would be down and isolating that shouldn't left to 261 // a logging daemon, so just trace. Also, this will start 262 // trying again when the next new log comes in. 263 lg2::error( 264 "PEL Host notifier hit max retry attempts. Giving up for now. PEL ID = {ID}", 265 "ID", lg2::hex, _pelQueue.front()); 266 267 // Tell the host interface object to clean itself up, especially to 268 // release the PLDM instance ID it's been using. 269 _hostIface->cancelCmd(); 270 } 271 return; 272 } 273 274 bool doNotify = false; 275 uint32_t id = 0; 276 277 // Find the PEL to send 278 while (!doNotify && !_pelQueue.empty()) 279 { 280 id = _pelQueue.front(); 281 _pelQueue.pop_front(); 282 283 if (notifyRequired(id)) 284 { 285 doNotify = true; 286 } 287 } 288 289 if (doNotify) 290 { 291 // Get the size using the repo attributes 292 Repository::LogID i{Repository::LogID::Pel{id}}; 293 if (auto attributes = _repo.getPELAttributes(i); attributes) 294 { 295 auto size = static_cast<size_t>( 296 std::filesystem::file_size((*attributes).get().path)); 297 298 lg2::debug("sendNewLogCmd: ID {ID} size {SIZE}", "ID", lg2::hex, id, 299 "SIZE", size); 300 301 auto rc = _hostIface->sendNewLogCmd(id, size); 302 303 if (rc == CmdStatus::success) 304 { 305 _inProgressPEL = id; 306 } 307 else 308 { 309 // It failed. Retry 310 lg2::error("PLDM send failed, PEL ID = {ID}", "ID", lg2::hex, 311 id); 312 _pelQueue.push_front(id); 313 _inProgressPEL = 0; 314 _retryTimer.restartOnce(_hostIface->getSendRetryDelay()); 315 } 316 } 317 else 318 { 319 lg2::error( 320 "PEL ID is not in repository. Cannot notify host. PEL ID = {ID}", 321 "ID", lg2::hex, id); 322 } 323 } 324 } 325 326 void HostNotifier::hostStateChange(bool hostUp) 327 { 328 _retryCount = 0; 329 _hostFull = false; 330 331 if (hostUp && !_pelQueue.empty()) 332 { 333 lg2::debug("Host state change to on"); 334 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay()); 335 } 336 else if (!hostUp) 337 { 338 lg2::debug("Host state change to off"); 339 340 stopCommand(); 341 342 // Reset the state on any PELs that were sent but not acked back 343 // to new so they'll get sent again. 344 for (auto id : _sentPELs) 345 { 346 _pelQueue.push_back(id); 347 _repo.setPELHostTransState(id, TransmissionState::newPEL); 348 } 349 350 _sentPELs.clear(); 351 352 if (_hostFullTimer.isEnabled()) 353 { 354 _hostFullTimer.setEnabled(false); 355 } 356 357 if (_hostUpTimer.isEnabled()) 358 { 359 _hostUpTimer.setEnabled(false); 360 } 361 } 362 } 363 364 void HostNotifier::commandResponse(ResponseStatus status) 365 { 366 auto id = _inProgressPEL; 367 _inProgressPEL = 0; 368 369 if (status == ResponseStatus::success) 370 { 371 lg2::debug("HostNotifier command response success, PEL ID = {ID}", "ID", 372 lg2::hex, id); 373 _retryCount = 0; 374 375 _sentPELs.push_back(id); 376 377 _repo.setPELHostTransState(id, TransmissionState::sent); 378 379 // If the host is full, don't send off the next PEL 380 if (!_hostFull && !_pelQueue.empty()) 381 { 382 doNewLogNotify(); 383 } 384 } 385 else 386 { 387 lg2::error("PLDM command response failure, PEL ID = {ID}", "ID", 388 lg2::hex, id); 389 // Retry 390 _pelQueue.push_front(id); 391 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay()); 392 } 393 } 394 395 void HostNotifier::retryTimerExpired() 396 { 397 if (_dataIface.isHostUp()) 398 { 399 lg2::info("Attempting command retry, PEL ID = {ID}", "ID", lg2::hex, 400 _pelQueue.front()); 401 _retryCount++; 402 doNewLogNotify(); 403 } 404 } 405 406 void HostNotifier::hostFullTimerExpired() 407 { 408 lg2::debug("Host full timer expired, trying send again"); 409 doNewLogNotify(); 410 } 411 412 void HostNotifier::stopCommand() 413 { 414 _retryCount = 0; 415 416 if (_inProgressPEL != 0) 417 { 418 _pelQueue.push_front(_inProgressPEL); 419 _inProgressPEL = 0; 420 } 421 422 if (_retryTimer.isEnabled()) 423 { 424 _retryTimer.setEnabled(false); 425 } 426 427 // Ensure the PLDM instance ID is released 428 _hostIface->cancelCmd(); 429 } 430 431 void HostNotifier::ackPEL(uint32_t id) 432 { 433 _repo.setPELHostTransState(id, TransmissionState::acked); 434 435 // No longer just 'sent', so remove it from the sent list. 436 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 437 if (sent != _sentPELs.end()) 438 { 439 _sentPELs.erase(sent); 440 } 441 442 // An ack means the host is no longer full 443 if (_hostFullTimer.isEnabled()) 444 { 445 _hostFullTimer.setEnabled(false); 446 } 447 448 if (_hostFull) 449 { 450 _hostFull = false; 451 452 lg2::debug("Host previously full, not anymore after this ack"); 453 454 // Start sending PELs again, from the event loop 455 if (!_pelQueue.empty()) 456 { 457 scheduleDispatch(); 458 } 459 } 460 } 461 462 void HostNotifier::setHostFull(uint32_t id) 463 { 464 lg2::debug("Received Host full indication, PEL ID = {ID}", "ID", lg2::hex, 465 id); 466 467 _hostFull = true; 468 469 // This PEL needs to get re-sent 470 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 471 if (sent != _sentPELs.end()) 472 { 473 _sentPELs.erase(sent); 474 _repo.setPELHostTransState(id, TransmissionState::newPEL); 475 476 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) == 477 _pelQueue.end()) 478 { 479 _pelQueue.push_front(id); 480 } 481 } 482 483 // The only PELs that will be sent when the 484 // host is full is from this timer callback. 485 if (!_hostFullTimer.isEnabled()) 486 { 487 lg2::debug("Starting host full timer"); 488 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay()); 489 } 490 } 491 492 void HostNotifier::setBadPEL(uint32_t id) 493 { 494 lg2::error("PEL rejected by the host, PEL ID = {ID}", "ID", lg2::hex, id); 495 496 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 497 if (sent != _sentPELs.end()) 498 { 499 _sentPELs.erase(sent); 500 } 501 502 _repo.setPELHostTransState(id, TransmissionState::badPEL); 503 } 504 505 } // namespace openpower::pels 506