1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "host_notifier.hpp" 17 18 #include <phosphor-logging/log.hpp> 19 20 namespace openpower::pels 21 { 22 23 const auto subscriptionName = "PELHostNotifier"; 24 const size_t maxRetryAttempts = 15; 25 26 using namespace phosphor::logging; 27 28 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface, 29 std::unique_ptr<HostInterface> hostIface) : 30 _repo(repo), 31 _dataIface(dataIface), _hostIface(std::move(hostIface)), 32 _retryTimer(_hostIface->getEvent(), 33 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)), 34 _hostFullTimer( 35 _hostIface->getEvent(), 36 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)) 37 { 38 // Subscribe to be told about new PELs. 39 _repo.subscribeToAdds(subscriptionName, 40 std::bind(std::mem_fn(&HostNotifier::newLogCallback), 41 this, std::placeholders::_1)); 42 43 // Subscribe to be told about deleted PELs. 44 _repo.subscribeToDeletes( 45 subscriptionName, 46 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this, 47 std::placeholders::_1)); 48 49 // Add any existing PELs to the queue to send them if necessary. 50 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this, 51 std::placeholders::_1)); 52 53 // Subscribe to be told about host state changes. 54 _dataIface.subscribeToHostStateChange( 55 subscriptionName, 56 std::bind(std::mem_fun(&HostNotifier::hostStateChange), this, 57 std::placeholders::_1)); 58 59 // Set the function to call when the async reponse is received. 60 _hostIface->setResponseFunction( 61 std::bind(std::mem_fn(&HostNotifier::commandResponse), this, 62 std::placeholders::_1)); 63 64 // Start sending logs if the host is running 65 if (!_pelQueue.empty() && _dataIface.isHostUp()) 66 { 67 doNewLogNotify(); 68 } 69 } 70 71 HostNotifier::~HostNotifier() 72 { 73 _repo.unsubscribeFromAdds(subscriptionName); 74 _dataIface.unsubscribeFromHostStateChange(subscriptionName); 75 } 76 77 bool HostNotifier::addPELToQueue(const PEL& pel) 78 { 79 if (enqueueRequired(pel.id())) 80 { 81 _pelQueue.push_back(pel.id()); 82 } 83 84 // Return false so that Repo::for_each keeps going. 85 return false; 86 } 87 88 bool HostNotifier::enqueueRequired(uint32_t id) const 89 { 90 bool required = true; 91 Repository::LogID i{Repository::LogID::Pel{id}}; 92 93 // Manufacturing testing may turn off sending up PELs 94 if (!_dataIface.getHostPELEnablement()) 95 { 96 return false; 97 } 98 99 if (auto attributes = _repo.getPELAttributes(i); attributes) 100 { 101 auto a = attributes.value().get(); 102 103 if ((a.hostState == TransmissionState::acked) || 104 (a.hostState == TransmissionState::badPEL)) 105 { 106 required = false; 107 } 108 else if (a.actionFlags.test(hiddenFlagBit) && 109 (a.hmcState == TransmissionState::acked)) 110 { 111 required = false; 112 } 113 else if (a.actionFlags.test(dontReportToHostFlagBit)) 114 { 115 required = false; 116 } 117 } 118 else 119 { 120 using namespace phosphor::logging; 121 log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository", 122 entry("PEL_ID=0x%X", id)); 123 required = false; 124 } 125 126 return required; 127 } 128 129 bool HostNotifier::notifyRequired(uint32_t id) const 130 { 131 bool notify = true; 132 Repository::LogID i{Repository::LogID::Pel{id}}; 133 134 if (auto attributes = _repo.getPELAttributes(i); attributes) 135 { 136 // If already acked by the host, don't send again. 137 // (A safety check as it shouldn't get to this point.) 138 auto a = attributes.value().get(); 139 if (a.hostState == TransmissionState::acked) 140 { 141 notify = false; 142 } 143 else if (a.actionFlags.test(hiddenFlagBit)) 144 { 145 // If hidden and acked (or will be) acked by the HMC, 146 // also don't send it. (HMC management can come and 147 // go at any time) 148 if ((a.hmcState == TransmissionState::acked) || 149 _dataIface.isHMCManaged()) 150 { 151 notify = false; 152 } 153 } 154 } 155 else 156 { 157 // Must have been deleted since put on the queue. 158 notify = false; 159 } 160 161 return notify; 162 } 163 164 void HostNotifier::newLogCallback(const PEL& pel) 165 { 166 if (!enqueueRequired(pel.id())) 167 { 168 return; 169 } 170 171 log<level::DEBUG>("new PEL added to queue", entry("PEL_ID=0x%X", pel.id())); 172 173 _pelQueue.push_back(pel.id()); 174 175 // Notify shouldn't happen if host is down or full 176 if (!_dataIface.isHostUp() || _hostFull) 177 { 178 return; 179 } 180 181 // Dispatch a command now if there isn't currently a command 182 // in progress and this is the first log in the queue or it 183 // previously gave up from a hard failure. 184 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() || 185 _retryTimer.isEnabled(); 186 187 auto firstPEL = _pelQueue.size() == 1; 188 auto gaveUp = _retryCount >= maxRetryAttempts; 189 190 if (!inProgress && (firstPEL || gaveUp)) 191 { 192 _retryCount = 0; 193 194 // Send a log, but from the event loop, not from here. 195 scheduleDispatch(); 196 } 197 } 198 199 void HostNotifier::deleteLogCallback(uint32_t id) 200 { 201 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id); 202 if (queueIt != _pelQueue.end()) 203 { 204 log<level::DEBUG>("Host notifier removing deleted log from queue"); 205 _pelQueue.erase(queueIt); 206 } 207 208 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id); 209 if (sentIt != _sentPELs.end()) 210 { 211 log<level::DEBUG>("Host notifier removing deleted log from sent list"); 212 _sentPELs.erase(sentIt); 213 } 214 215 // Nothing we can do about this... 216 if (id == _inProgressPEL) 217 { 218 log<level::WARNING>( 219 "A PEL was deleted while its host notification was in progress", 220 entry("PEL_ID=0x%X", id)); 221 } 222 } 223 224 void HostNotifier::scheduleDispatch() 225 { 226 _dispatcher = std::make_unique<sdeventplus::source::Defer>( 227 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch), 228 this, std::placeholders::_1)); 229 } 230 231 void HostNotifier::dispatch(sdeventplus::source::EventBase& source) 232 { 233 _dispatcher.reset(); 234 235 doNewLogNotify(); 236 } 237 238 void HostNotifier::doNewLogNotify() 239 { 240 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() || 241 _hostFullTimer.isEnabled()) 242 { 243 return; 244 } 245 246 if (_retryCount >= maxRetryAttempts) 247 { 248 // Give up until a new log comes in. 249 if (_retryCount == maxRetryAttempts) 250 { 251 // If this were to really happen, the PLDM interface 252 // would be down and isolating that shouldn't left to 253 // a logging daemon, so just trace. Also, this will start 254 // trying again when the next new log comes in. 255 log<level::ERR>( 256 "PEL Host notifier hit max retry attempts. Giving up for now.", 257 entry("PEL_ID=0x%X", _pelQueue.front())); 258 259 // Tell the host interface object to clean itself up, especially to 260 // release the PLDM instance ID it's been using. 261 _hostIface->cancelCmd(); 262 } 263 return; 264 } 265 266 bool doNotify = false; 267 uint32_t id = 0; 268 269 // Find the PEL to send 270 while (!doNotify && !_pelQueue.empty()) 271 { 272 id = _pelQueue.front(); 273 _pelQueue.pop_front(); 274 275 if (notifyRequired(id)) 276 { 277 doNotify = true; 278 } 279 } 280 281 if (doNotify) 282 { 283 // Get the size using the repo attributes 284 Repository::LogID i{Repository::LogID::Pel{id}}; 285 if (auto attributes = _repo.getPELAttributes(i); attributes) 286 { 287 auto size = static_cast<size_t>( 288 std::filesystem::file_size((*attributes).get().path)); 289 290 log<level::DEBUG>("sendNewLogCmd", entry("PEL_ID=0x%X", id), 291 entry("PEL_SIZE=%d", size)); 292 293 auto rc = _hostIface->sendNewLogCmd(id, size); 294 295 if (rc == CmdStatus::success) 296 { 297 _inProgressPEL = id; 298 } 299 else 300 { 301 // It failed. Retry 302 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id)); 303 _pelQueue.push_front(id); 304 _inProgressPEL = 0; 305 _retryTimer.restartOnce(_hostIface->getSendRetryDelay()); 306 } 307 } 308 else 309 { 310 log<level::ERR>("PEL ID not in repository. Cannot notify host", 311 entry("PEL_ID=0x%X", id)); 312 } 313 } 314 } 315 316 void HostNotifier::hostStateChange(bool hostUp) 317 { 318 _retryCount = 0; 319 _hostFull = false; 320 321 if (hostUp && !_pelQueue.empty()) 322 { 323 log<level::DEBUG>("Host state change to on"); 324 325 doNewLogNotify(); 326 } 327 else if (!hostUp) 328 { 329 log<level::DEBUG>("Host state change to off"); 330 331 stopCommand(); 332 333 // Reset the state on any PELs that were sent but not acked back 334 // to new so they'll get sent again. 335 for (auto id : _sentPELs) 336 { 337 _pelQueue.push_back(id); 338 _repo.setPELHostTransState(id, TransmissionState::newPEL); 339 } 340 341 _sentPELs.clear(); 342 343 if (_hostFullTimer.isEnabled()) 344 { 345 _hostFullTimer.setEnabled(false); 346 } 347 } 348 } 349 350 void HostNotifier::commandResponse(ResponseStatus status) 351 { 352 auto id = _inProgressPEL; 353 _inProgressPEL = 0; 354 355 if (status == ResponseStatus::success) 356 { 357 log<level::DEBUG>("HostNotifier command response success", 358 entry("PEL_ID=0x%X", id)); 359 _retryCount = 0; 360 361 _sentPELs.push_back(id); 362 363 _repo.setPELHostTransState(id, TransmissionState::sent); 364 365 // If the host is full, don't send off the next PEL 366 if (!_hostFull && !_pelQueue.empty()) 367 { 368 doNewLogNotify(); 369 } 370 } 371 else 372 { 373 log<level::ERR>("PLDM command response failure", 374 entry("PEL_ID=0x%X", id)); 375 // Retry 376 _pelQueue.push_front(id); 377 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay()); 378 } 379 } 380 381 void HostNotifier::retryTimerExpired() 382 { 383 if (_dataIface.isHostUp()) 384 { 385 log<level::INFO>("Attempting command retry", 386 entry("PEL_ID=0x%X", _pelQueue.front())); 387 _retryCount++; 388 doNewLogNotify(); 389 } 390 } 391 392 void HostNotifier::hostFullTimerExpired() 393 { 394 log<level::DEBUG>("Host full timer expired, trying send again"); 395 doNewLogNotify(); 396 } 397 398 void HostNotifier::stopCommand() 399 { 400 _retryCount = 0; 401 402 if (_inProgressPEL != 0) 403 { 404 _pelQueue.push_front(_inProgressPEL); 405 _inProgressPEL = 0; 406 } 407 408 if (_retryTimer.isEnabled()) 409 { 410 _retryTimer.setEnabled(false); 411 } 412 413 // Ensure the PLDM instance ID is released 414 _hostIface->cancelCmd(); 415 } 416 417 void HostNotifier::ackPEL(uint32_t id) 418 { 419 _repo.setPELHostTransState(id, TransmissionState::acked); 420 421 // No longer just 'sent', so remove it from the sent list. 422 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 423 if (sent != _sentPELs.end()) 424 { 425 _sentPELs.erase(sent); 426 } 427 428 // An ack means the host is no longer full 429 if (_hostFullTimer.isEnabled()) 430 { 431 _hostFullTimer.setEnabled(false); 432 } 433 434 if (_hostFull) 435 { 436 _hostFull = false; 437 438 log<level::DEBUG>("Host previously full, not anymore after this ack"); 439 440 // Start sending PELs again, from the event loop 441 if (!_pelQueue.empty()) 442 { 443 scheduleDispatch(); 444 } 445 } 446 } 447 448 void HostNotifier::setHostFull(uint32_t id) 449 { 450 log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id)); 451 452 _hostFull = true; 453 454 // This PEL needs to get re-sent 455 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 456 if (sent != _sentPELs.end()) 457 { 458 _sentPELs.erase(sent); 459 _repo.setPELHostTransState(id, TransmissionState::newPEL); 460 461 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) == 462 _pelQueue.end()) 463 { 464 _pelQueue.push_front(id); 465 } 466 } 467 468 // The only PELs that will be sent when the 469 // host is full is from this timer callback. 470 if (!_hostFullTimer.isEnabled()) 471 { 472 log<level::DEBUG>("Starting host full timer"); 473 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay()); 474 } 475 } 476 477 void HostNotifier::setBadPEL(uint32_t id) 478 { 479 log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id)); 480 481 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 482 if (sent != _sentPELs.end()) 483 { 484 _sentPELs.erase(sent); 485 } 486 487 _repo.setPELHostTransState(id, TransmissionState::badPEL); 488 } 489 490 } // namespace openpower::pels 491