1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "host_notifier.hpp" 17 18 #include <phosphor-logging/log.hpp> 19 20 namespace openpower::pels 21 { 22 23 const auto subscriptionName = "PELHostNotifier"; 24 const size_t maxRetryAttempts = 15; 25 26 using namespace phosphor::logging; 27 28 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface, 29 std::unique_ptr<HostInterface> hostIface) : 30 _repo(repo), 31 _dataIface(dataIface), _hostIface(std::move(hostIface)), 32 _retryTimer(_hostIface->getEvent(), 33 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)), 34 _hostFullTimer( 35 _hostIface->getEvent(), 36 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)), 37 _hostUpTimer( 38 _hostIface->getEvent(), 39 std::bind(std::mem_fn(&HostNotifier::hostUpTimerExpired), this)) 40 { 41 // Subscribe to be told about new PELs. 42 _repo.subscribeToAdds(subscriptionName, 43 std::bind(std::mem_fn(&HostNotifier::newLogCallback), 44 this, std::placeholders::_1)); 45 46 // Subscribe to be told about deleted PELs. 47 _repo.subscribeToDeletes( 48 subscriptionName, 49 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this, 50 std::placeholders::_1)); 51 52 // Add any existing PELs to the queue to send them if necessary. 53 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this, 54 std::placeholders::_1)); 55 56 // Subscribe to be told about host state changes. 57 _dataIface.subscribeToHostStateChange( 58 subscriptionName, std::bind(std::mem_fn(&HostNotifier::hostStateChange), 59 this, std::placeholders::_1)); 60 61 // Set the function to call when the async reponse is received. 62 _hostIface->setResponseFunction( 63 std::bind(std::mem_fn(&HostNotifier::commandResponse), this, 64 std::placeholders::_1)); 65 66 // Start sending logs if the host is running 67 if (!_pelQueue.empty() && _dataIface.isHostUp()) 68 { 69 log<level::DEBUG>("Host is already up at startup"); 70 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay()); 71 } 72 } 73 74 HostNotifier::~HostNotifier() 75 { 76 _repo.unsubscribeFromAdds(subscriptionName); 77 _dataIface.unsubscribeFromHostStateChange(subscriptionName); 78 } 79 80 void HostNotifier::hostUpTimerExpired() 81 { 82 log<level::DEBUG>("Host up timer expired"); 83 doNewLogNotify(); 84 } 85 86 bool HostNotifier::addPELToQueue(const PEL& pel) 87 { 88 if (enqueueRequired(pel.id())) 89 { 90 _pelQueue.push_back(pel.id()); 91 } 92 93 // Return false so that Repo::for_each keeps going. 94 return false; 95 } 96 97 bool HostNotifier::enqueueRequired(uint32_t id) const 98 { 99 bool required = true; 100 Repository::LogID i{Repository::LogID::Pel{id}}; 101 102 // Manufacturing testing may turn off sending up PELs 103 if (!_dataIface.getHostPELEnablement()) 104 { 105 return false; 106 } 107 108 if (auto attributes = _repo.getPELAttributes(i); attributes) 109 { 110 auto a = attributes.value().get(); 111 112 if ((a.hostState == TransmissionState::acked) || 113 (a.hostState == TransmissionState::badPEL)) 114 { 115 required = false; 116 } 117 else if (a.actionFlags.test(hiddenFlagBit) && 118 (a.hmcState == TransmissionState::acked)) 119 { 120 required = false; 121 } 122 else if (a.actionFlags.test(dontReportToHostFlagBit)) 123 { 124 required = false; 125 } 126 } 127 else 128 { 129 using namespace phosphor::logging; 130 log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository", 131 entry("PEL_ID=0x%X", id)); 132 required = false; 133 } 134 135 return required; 136 } 137 138 bool HostNotifier::notifyRequired(uint32_t id) const 139 { 140 bool notify = true; 141 Repository::LogID i{Repository::LogID::Pel{id}}; 142 143 if (auto attributes = _repo.getPELAttributes(i); attributes) 144 { 145 // If already acked by the host, don't send again. 146 // (A safety check as it shouldn't get to this point.) 147 auto a = attributes.value().get(); 148 if (a.hostState == TransmissionState::acked) 149 { 150 notify = false; 151 } 152 else if (a.actionFlags.test(hiddenFlagBit)) 153 { 154 // If hidden and acked (or will be) acked by the HMC, 155 // also don't send it. (HMC management can come and 156 // go at any time) 157 if ((a.hmcState == TransmissionState::acked) || 158 _dataIface.isHMCManaged()) 159 { 160 notify = false; 161 } 162 } 163 } 164 else 165 { 166 // Must have been deleted since put on the queue. 167 notify = false; 168 } 169 170 return notify; 171 } 172 173 void HostNotifier::newLogCallback(const PEL& pel) 174 { 175 if (!enqueueRequired(pel.id())) 176 { 177 return; 178 } 179 180 log<level::DEBUG>("new PEL added to queue", entry("PEL_ID=0x%X", pel.id())); 181 182 _pelQueue.push_back(pel.id()); 183 184 // Notify shouldn't happen if host is down, not up long enough, or full 185 if (!_dataIface.isHostUp() || _hostFull || _hostUpTimer.isEnabled()) 186 { 187 return; 188 } 189 190 // Dispatch a command now if there isn't currently a command 191 // in progress and this is the first log in the queue or it 192 // previously gave up from a hard failure. 193 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() || 194 _retryTimer.isEnabled(); 195 196 auto firstPEL = _pelQueue.size() == 1; 197 auto gaveUp = _retryCount >= maxRetryAttempts; 198 199 if (!inProgress && (firstPEL || gaveUp)) 200 { 201 _retryCount = 0; 202 203 // Send a log, but from the event loop, not from here. 204 scheduleDispatch(); 205 } 206 } 207 208 void HostNotifier::deleteLogCallback(uint32_t id) 209 { 210 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id); 211 if (queueIt != _pelQueue.end()) 212 { 213 log<level::DEBUG>("Host notifier removing deleted log from queue"); 214 _pelQueue.erase(queueIt); 215 } 216 217 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id); 218 if (sentIt != _sentPELs.end()) 219 { 220 log<level::DEBUG>("Host notifier removing deleted log from sent list"); 221 _sentPELs.erase(sentIt); 222 } 223 224 // Nothing we can do about this... 225 if (id == _inProgressPEL) 226 { 227 log<level::WARNING>( 228 "A PEL was deleted while its host notification was in progress", 229 entry("PEL_ID=0x%X", id)); 230 } 231 } 232 233 void HostNotifier::scheduleDispatch() 234 { 235 _dispatcher = std::make_unique<sdeventplus::source::Defer>( 236 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch), 237 this, std::placeholders::_1)); 238 } 239 240 void HostNotifier::dispatch(sdeventplus::source::EventBase& /*source*/) 241 { 242 _dispatcher.reset(); 243 244 doNewLogNotify(); 245 } 246 247 void HostNotifier::doNewLogNotify() 248 { 249 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() || 250 _hostFullTimer.isEnabled()) 251 { 252 return; 253 } 254 255 if (_retryCount >= maxRetryAttempts) 256 { 257 // Give up until a new log comes in. 258 if (_retryCount == maxRetryAttempts) 259 { 260 // If this were to really happen, the PLDM interface 261 // would be down and isolating that shouldn't left to 262 // a logging daemon, so just trace. Also, this will start 263 // trying again when the next new log comes in. 264 log<level::ERR>( 265 "PEL Host notifier hit max retry attempts. Giving up for now.", 266 entry("PEL_ID=0x%X", _pelQueue.front())); 267 268 // Tell the host interface object to clean itself up, especially to 269 // release the PLDM instance ID it's been using. 270 _hostIface->cancelCmd(); 271 } 272 return; 273 } 274 275 bool doNotify = false; 276 uint32_t id = 0; 277 278 // Find the PEL to send 279 while (!doNotify && !_pelQueue.empty()) 280 { 281 id = _pelQueue.front(); 282 _pelQueue.pop_front(); 283 284 if (notifyRequired(id)) 285 { 286 doNotify = true; 287 } 288 } 289 290 if (doNotify) 291 { 292 // Get the size using the repo attributes 293 Repository::LogID i{Repository::LogID::Pel{id}}; 294 if (auto attributes = _repo.getPELAttributes(i); attributes) 295 { 296 auto size = static_cast<size_t>( 297 std::filesystem::file_size((*attributes).get().path)); 298 299 log<level::DEBUG>("sendNewLogCmd", entry("PEL_ID=0x%X", id), 300 entry("PEL_SIZE=%d", size)); 301 302 auto rc = _hostIface->sendNewLogCmd(id, size); 303 304 if (rc == CmdStatus::success) 305 { 306 _inProgressPEL = id; 307 } 308 else 309 { 310 // It failed. Retry 311 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id)); 312 _pelQueue.push_front(id); 313 _inProgressPEL = 0; 314 _retryTimer.restartOnce(_hostIface->getSendRetryDelay()); 315 } 316 } 317 else 318 { 319 log<level::ERR>("PEL ID not in repository. Cannot notify host", 320 entry("PEL_ID=0x%X", id)); 321 } 322 } 323 } 324 325 void HostNotifier::hostStateChange(bool hostUp) 326 { 327 _retryCount = 0; 328 _hostFull = false; 329 330 if (hostUp && !_pelQueue.empty()) 331 { 332 log<level::DEBUG>("Host state change to on"); 333 _hostUpTimer.restartOnce(_hostIface->getHostUpDelay()); 334 } 335 else if (!hostUp) 336 { 337 log<level::DEBUG>("Host state change to off"); 338 339 stopCommand(); 340 341 // Reset the state on any PELs that were sent but not acked back 342 // to new so they'll get sent again. 343 for (auto id : _sentPELs) 344 { 345 _pelQueue.push_back(id); 346 _repo.setPELHostTransState(id, TransmissionState::newPEL); 347 } 348 349 _sentPELs.clear(); 350 351 if (_hostFullTimer.isEnabled()) 352 { 353 _hostFullTimer.setEnabled(false); 354 } 355 356 if (_hostUpTimer.isEnabled()) 357 { 358 _hostUpTimer.setEnabled(false); 359 } 360 } 361 } 362 363 void HostNotifier::commandResponse(ResponseStatus status) 364 { 365 auto id = _inProgressPEL; 366 _inProgressPEL = 0; 367 368 if (status == ResponseStatus::success) 369 { 370 log<level::DEBUG>("HostNotifier command response success", 371 entry("PEL_ID=0x%X", id)); 372 _retryCount = 0; 373 374 _sentPELs.push_back(id); 375 376 _repo.setPELHostTransState(id, TransmissionState::sent); 377 378 // If the host is full, don't send off the next PEL 379 if (!_hostFull && !_pelQueue.empty()) 380 { 381 doNewLogNotify(); 382 } 383 } 384 else 385 { 386 log<level::ERR>("PLDM command response failure", 387 entry("PEL_ID=0x%X", id)); 388 // Retry 389 _pelQueue.push_front(id); 390 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay()); 391 } 392 } 393 394 void HostNotifier::retryTimerExpired() 395 { 396 if (_dataIface.isHostUp()) 397 { 398 log<level::INFO>("Attempting command retry", 399 entry("PEL_ID=0x%X", _pelQueue.front())); 400 _retryCount++; 401 doNewLogNotify(); 402 } 403 } 404 405 void HostNotifier::hostFullTimerExpired() 406 { 407 log<level::DEBUG>("Host full timer expired, trying send again"); 408 doNewLogNotify(); 409 } 410 411 void HostNotifier::stopCommand() 412 { 413 _retryCount = 0; 414 415 if (_inProgressPEL != 0) 416 { 417 _pelQueue.push_front(_inProgressPEL); 418 _inProgressPEL = 0; 419 } 420 421 if (_retryTimer.isEnabled()) 422 { 423 _retryTimer.setEnabled(false); 424 } 425 426 // Ensure the PLDM instance ID is released 427 _hostIface->cancelCmd(); 428 } 429 430 void HostNotifier::ackPEL(uint32_t id) 431 { 432 _repo.setPELHostTransState(id, TransmissionState::acked); 433 434 // No longer just 'sent', so remove it from the sent list. 435 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 436 if (sent != _sentPELs.end()) 437 { 438 _sentPELs.erase(sent); 439 } 440 441 // An ack means the host is no longer full 442 if (_hostFullTimer.isEnabled()) 443 { 444 _hostFullTimer.setEnabled(false); 445 } 446 447 if (_hostFull) 448 { 449 _hostFull = false; 450 451 log<level::DEBUG>("Host previously full, not anymore after this ack"); 452 453 // Start sending PELs again, from the event loop 454 if (!_pelQueue.empty()) 455 { 456 scheduleDispatch(); 457 } 458 } 459 } 460 461 void HostNotifier::setHostFull(uint32_t id) 462 { 463 log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id)); 464 465 _hostFull = true; 466 467 // This PEL needs to get re-sent 468 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 469 if (sent != _sentPELs.end()) 470 { 471 _sentPELs.erase(sent); 472 _repo.setPELHostTransState(id, TransmissionState::newPEL); 473 474 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) == 475 _pelQueue.end()) 476 { 477 _pelQueue.push_front(id); 478 } 479 } 480 481 // The only PELs that will be sent when the 482 // host is full is from this timer callback. 483 if (!_hostFullTimer.isEnabled()) 484 { 485 log<level::DEBUG>("Starting host full timer"); 486 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay()); 487 } 488 } 489 490 void HostNotifier::setBadPEL(uint32_t id) 491 { 492 log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id)); 493 494 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 495 if (sent != _sentPELs.end()) 496 { 497 _sentPELs.erase(sent); 498 } 499 500 _repo.setPELHostTransState(id, TransmissionState::badPEL); 501 } 502 503 } // namespace openpower::pels 504