1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "host_notifier.hpp" 17 18 #include <phosphor-logging/log.hpp> 19 20 namespace openpower::pels 21 { 22 23 const auto subscriptionName = "PELHostNotifier"; 24 const size_t maxRetryAttempts = 15; 25 26 using namespace phosphor::logging; 27 28 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface, 29 std::unique_ptr<HostInterface> hostIface) : 30 _repo(repo), 31 _dataIface(dataIface), _hostIface(std::move(hostIface)), 32 _retryTimer(_hostIface->getEvent(), 33 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)), 34 _hostFullTimer( 35 _hostIface->getEvent(), 36 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)), 37 _hostUpTimer( 38 _hostIface->getEvent(), 39 std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired), this)) 40 { 41 // Subscribe to be told about new PELs. 42 _repo.subscribeToAdds(subscriptionName, 43 std::bind(std::mem_fn(&HostNotifier::newLogCallback), 44 this, std::placeholders::_1)); 45 46 // Subscribe to be told about deleted PELs. 47 _repo.subscribeToDeletes( 48 subscriptionName, 49 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this, 50 std::placeholders::_1)); 51 52 // Add any existing PELs to the queue to send them if necessary. 53 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this, 54 std::placeholders::_1)); 55 56 // Subscribe to be told about host state changes. 57 _dataIface.subscribeToHostStateChange( 58 subscriptionName, 59 std::bind(std::mem_fun(&HostNotifier::hostStateChange), this, 60 std::placeholders::_1)); 61 62 // Set the function to call when the async reponse is received. 63 _hostIface->setResponseFunction( 64 std::bind(std::mem_fn(&HostNotifier::commandResponse), this, 65 std::placeholders::_1)); 66 67 // Start sending logs if the host is running 68 if (!_pelQueue.empty() && _dataIface.isHostUp()) 69 { 70 log<level::DEBUG>("Host is already up at startup"); 71 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay()); 72 } 73 } 74 75 HostNotifier::~HostNotifier() 76 { 77 _repo.unsubscribeFromAdds(subscriptionName); 78 _dataIface.unsubscribeFromHostStateChange(subscriptionName); 79 } 80 81 void HostNotifier::hostUpTimerExpired() 82 { 83 log<level::DEBUG>("Host up timer expired"); 84 doNewLogNotify(); 85 } 86 87 bool HostNotifier::addPELToQueue(const PEL& pel) 88 { 89 if (enqueueRequired(pel.id())) 90 { 91 _pelQueue.push_back(pel.id()); 92 } 93 94 // Return false so that Repo::for_each keeps going. 95 return false; 96 } 97 98 bool HostNotifier::enqueueRequired(uint32_t id) const 99 { 100 bool required = true; 101 Repository::LogID i{Repository::LogID::Pel{id}}; 102 103 // Manufacturing testing may turn off sending up PELs 104 if (!_dataIface.getHostPELEnablement()) 105 { 106 return false; 107 } 108 109 if (auto attributes = _repo.getPELAttributes(i); attributes) 110 { 111 auto a = attributes.value().get(); 112 113 if ((a.hostState == TransmissionState::acked) || 114 (a.hostState == TransmissionState::badPEL)) 115 { 116 required = false; 117 } 118 else if (a.actionFlags.test(hiddenFlagBit) && 119 (a.hmcState == TransmissionState::acked)) 120 { 121 required = false; 122 } 123 else if (a.actionFlags.test(dontReportToHostFlagBit)) 124 { 125 required = false; 126 } 127 } 128 else 129 { 130 using namespace phosphor::logging; 131 log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository", 132 entry("PEL_ID=0x%X", id)); 133 required = false; 134 } 135 136 return required; 137 } 138 139 bool HostNotifier::notifyRequired(uint32_t id) const 140 { 141 bool notify = true; 142 Repository::LogID i{Repository::LogID::Pel{id}}; 143 144 if (auto attributes = _repo.getPELAttributes(i); attributes) 145 { 146 // If already acked by the host, don't send again. 147 // (A safety check as it shouldn't get to this point.) 148 auto a = attributes.value().get(); 149 if (a.hostState == TransmissionState::acked) 150 { 151 notify = false; 152 } 153 else if (a.actionFlags.test(hiddenFlagBit)) 154 { 155 // If hidden and acked (or will be) acked by the HMC, 156 // also don't send it. (HMC management can come and 157 // go at any time) 158 if ((a.hmcState == TransmissionState::acked) || 159 _dataIface.isHMCManaged()) 160 { 161 notify = false; 162 } 163 } 164 } 165 else 166 { 167 // Must have been deleted since put on the queue. 168 notify = false; 169 } 170 171 return notify; 172 } 173 174 void HostNotifier::newLogCallback(const PEL& pel) 175 { 176 if (!enqueueRequired(pel.id())) 177 { 178 return; 179 } 180 181 log<level::DEBUG>("new PEL added to queue", entry("PEL_ID=0x%X", pel.id())); 182 183 _pelQueue.push_back(pel.id()); 184 185 // Notify shouldn't happen if host is down, not up long enough, or full 186 if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled()) 187 { 188 return; 189 } 190 191 // Dispatch a command now if there isn't currently a command 192 // in progress and this is the first log in the queue or it 193 // previously gave up from a hard failure. 194 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() || 195 _retryTimer.isEnabled(); 196 197 auto firstPEL = _pelQueue.size() == 1; 198 auto gaveUp = _retryCount >= maxRetryAttempts; 199 200 if (!inProgress && (firstPEL || gaveUp)) 201 { 202 _retryCount = 0; 203 204 // Send a log, but from the event loop, not from here. 205 scheduleDispatch(); 206 } 207 } 208 209 void HostNotifier::deleteLogCallback(uint32_t id) 210 { 211 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id); 212 if (queueIt != _pelQueue.end()) 213 { 214 log<level::DEBUG>("Host notifier removing deleted log from queue"); 215 _pelQueue.erase(queueIt); 216 } 217 218 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id); 219 if (sentIt != _sentPELs.end()) 220 { 221 log<level::DEBUG>("Host notifier removing deleted log from sent list"); 222 _sentPELs.erase(sentIt); 223 } 224 225 // Nothing we can do about this... 226 if (id == _inProgressPEL) 227 { 228 log<level::WARNING>( 229 "A PEL was deleted while its host notification was in progress", 230 entry("PEL_ID=0x%X", id)); 231 } 232 } 233 234 void HostNotifier::scheduleDispatch() 235 { 236 _dispatcher = std::make_unique<sdeventplus::source::Defer>( 237 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch), 238 this, std::placeholders::_1)); 239 } 240 241 void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/) 242 { 243 _dispatcher.reset(); 244 245 doNewLogNotify(); 246 } 247 248 void HostNotifier::doNewLogNotify() 249 { 250 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() || 251 _hostFullTimer.isEnabled()) 252 { 253 return; 254 } 255 256 if (_retryCount >= maxRetryAttempts) 257 { 258 // Give up until a new log comes in. 259 if (_retryCount == maxRetryAttempts) 260 { 261 // If this were to really happen, the PLDM interface 262 // would be down and isolating that shouldn't left to 263 // a logging daemon, so just trace. Also, this will start 264 // trying again when the next new log comes in. 265 log<level::ERR>( 266 "PEL Host notifier hit max retry attempts. Giving up for now.", 267 entry("PEL_ID=0x%X", _pelQueue.front())); 268 269 // Tell the host interface object to clean itself up, especially to 270 // release the PLDM instance ID it's been using. 271 _hostIface->cancelCmd(); 272 } 273 return; 274 } 275 276 bool doNotify = false; 277 uint32_t id = 0; 278 279 // Find the PEL to send 280 while (!doNotify && !_pelQueue.empty()) 281 { 282 id = _pelQueue.front(); 283 _pelQueue.pop_front(); 284 285 if (notifyRequired(id)) 286 { 287 doNotify = true; 288 } 289 } 290 291 if (doNotify) 292 { 293 // Get the size using the repo attributes 294 Repository::LogID i{Repository::LogID::Pel{id}}; 295 if (auto attributes = _repo.getPELAttributes(i); attributes) 296 { 297 auto size = static_cast<size_t>( 298 std::filesystem::file_size((*attributes).get().path)); 299 300 log<level::DEBUG>("sendNewLogCmd", entry("PEL_ID=0x%X", id), 301 entry("PEL_SIZE=%d", size)); 302 303 auto rc = _hostIface->sendNewLogCmd(id, size); 304 305 if (rc == CmdStatus::success) 306 { 307 _inProgressPEL = id; 308 } 309 else 310 { 311 // It failed. Retry 312 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id)); 313 _pelQueue.push_front(id); 314 _inProgressPEL = 0; 315 _retryTimer.restartOnce(_hostIface->getSendRetryDelay()); 316 } 317 } 318 else 319 { 320 log<level::ERR>("PEL ID not in repository. Cannot notify host", 321 entry("PEL_ID=0x%X", id)); 322 } 323 } 324 } 325 326 void HostNotifier::hostStateChange(bool hostUp) 327 { 328 _retryCount = 0; 329 _hostFull = false; 330 331 if (hostUp && !_pelQueue.empty()) 332 { 333 log<level::DEBUG>("Host state change to on"); 334 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay()); 335 } 336 else if (!hostUp) 337 { 338 log<level::DEBUG>("Host state change to off"); 339 340 stopCommand(); 341 342 // Reset the state on any PELs that were sent but not acked back 343 // to new so they'll get sent again. 344 for (auto id : _sentPELs) 345 { 346 _pelQueue.push_back(id); 347 _repo.setPELHostTransState(id, TransmissionState::newPEL); 348 } 349 350 _sentPELs.clear(); 351 352 if (_hostFullTimer.isEnabled()) 353 { 354 _hostFullTimer.setEnabled(false); 355 } 356 357 if (_hostUpTimer.isEnabled()) 358 { 359 _hostUpTimer.setEnabled(false); 360 } 361 } 362 } 363 364 void HostNotifier::commandResponse(ResponseStatus status) 365 { 366 auto id = _inProgressPEL; 367 _inProgressPEL = 0; 368 369 if (status == ResponseStatus::success) 370 { 371 log<level::DEBUG>("HostNotifier command response success", 372 entry("PEL_ID=0x%X", id)); 373 _retryCount = 0; 374 375 _sentPELs.push_back(id); 376 377 _repo.setPELHostTransState(id, TransmissionState::sent); 378 379 // If the host is full, don't send off the next PEL 380 if (!_hostFull && !_pelQueue.empty()) 381 { 382 doNewLogNotify(); 383 } 384 } 385 else 386 { 387 log<level::ERR>("PLDM command response failure", 388 entry("PEL_ID=0x%X", id)); 389 // Retry 390 _pelQueue.push_front(id); 391 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay()); 392 } 393 } 394 395 void HostNotifier::retryTimerExpired() 396 { 397 if (_dataIface.isHostUp()) 398 { 399 log<level::INFO>("Attempting command retry", 400 entry("PEL_ID=0x%X", _pelQueue.front())); 401 _retryCount++; 402 doNewLogNotify(); 403 } 404 } 405 406 void HostNotifier::hostFullTimerExpired() 407 { 408 log<level::DEBUG>("Host full timer expired, trying send again"); 409 doNewLogNotify(); 410 } 411 412 void HostNotifier::stopCommand() 413 { 414 _retryCount = 0; 415 416 if (_inProgressPEL != 0) 417 { 418 _pelQueue.push_front(_inProgressPEL); 419 _inProgressPEL = 0; 420 } 421 422 if (_retryTimer.isEnabled()) 423 { 424 _retryTimer.setEnabled(false); 425 } 426 427 // Ensure the PLDM instance ID is released 428 _hostIface->cancelCmd(); 429 } 430 431 void HostNotifier::ackPEL(uint32_t id) 432 { 433 _repo.setPELHostTransState(id, TransmissionState::acked); 434 435 // No longer just 'sent', so remove it from the sent list. 436 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 437 if (sent != _sentPELs.end()) 438 { 439 _sentPELs.erase(sent); 440 } 441 442 // An ack means the host is no longer full 443 if (_hostFullTimer.isEnabled()) 444 { 445 _hostFullTimer.setEnabled(false); 446 } 447 448 if (_hostFull) 449 { 450 _hostFull = false; 451 452 log<level::DEBUG>("Host previously full, not anymore after this ack"); 453 454 // Start sending PELs again, from the event loop 455 if (!_pelQueue.empty()) 456 { 457 scheduleDispatch(); 458 } 459 } 460 } 461 462 void HostNotifier::setHostFull(uint32_t id) 463 { 464 log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id)); 465 466 _hostFull = true; 467 468 // This PEL needs to get re-sent 469 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 470 if (sent != _sentPELs.end()) 471 { 472 _sentPELs.erase(sent); 473 _repo.setPELHostTransState(id, TransmissionState::newPEL); 474 475 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) == 476 _pelQueue.end()) 477 { 478 _pelQueue.push_front(id); 479 } 480 } 481 482 // The only PELs that will be sent when the 483 // host is full is from this timer callback. 484 if (!_hostFullTimer.isEnabled()) 485 { 486 log<level::DEBUG>("Starting host full timer"); 487 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay()); 488 } 489 } 490 491 void HostNotifier::setBadPEL(uint32_t id) 492 { 493 log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id)); 494 495 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 496 if (sent != _sentPELs.end()) 497 { 498 _sentPELs.erase(sent); 499 } 500 501 _repo.setPELHostTransState(id, TransmissionState::badPEL); 502 } 503 504 } // namespace openpower::pels 505