1 /** 2 * Copyright © 2019 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "host_notifier.hpp" 17 18 #include <phosphor-logging/log.hpp> 19 20 namespace openpower::pels 21 { 22 23 const auto subscriptionName = "PELHostNotifier"; 24 const size_t maxRetryAttempts = 15; 25 26 using namespace phosphor::logging; 27 28 HostNotifier::HostNotifier(Repository& repo, DataInterfaceBase& dataIface, 29 std::unique_ptr<HostInterface> hostIface) : 30 _repo(repo), 31 _dataIface(dataIface), _hostIface(std::move(hostIface)), 32 _retryTimer(_hostIface->getEvent(), 33 std::bind(std::mem_fn(&HostNotifier::retryTimerExpired), this)), 34 _hostFullTimer( 35 _hostIface->getEvent(), 36 std::bind(std::mem_fn(&HostNotifier::hostFullTimerExpired), this)) 37 { 38 // Subscribe to be told about new PELs. 39 _repo.subscribeToAdds(subscriptionName, 40 std::bind(std::mem_fn(&HostNotifier::newLogCallback), 41 this, std::placeholders::_1)); 42 43 // Subscribe to be told about deleted PELs. 44 _repo.subscribeToDeletes( 45 subscriptionName, 46 std::bind(std::mem_fn(&HostNotifier::deleteLogCallback), this, 47 std::placeholders::_1)); 48 49 // Add any existing PELs to the queue to send them if necessary. 50 _repo.for_each(std::bind(std::mem_fn(&HostNotifier::addPELToQueue), this, 51 std::placeholders::_1)); 52 53 // Subscribe to be told about host state changes. 54 _dataIface.subscribeToHostStateChange( 55 subscriptionName, 56 std::bind(std::mem_fun(&HostNotifier::hostStateChange), this, 57 std::placeholders::_1)); 58 59 // Set the function to call when the async reponse is received. 60 _hostIface->setResponseFunction( 61 std::bind(std::mem_fn(&HostNotifier::commandResponse), this, 62 std::placeholders::_1)); 63 64 // Start sending logs if the host is running 65 if (!_pelQueue.empty() && _dataIface.isHostUp()) 66 { 67 doNewLogNotify(); 68 } 69 } 70 71 HostNotifier::~HostNotifier() 72 { 73 _repo.unsubscribeFromAdds(subscriptionName); 74 _dataIface.unsubscribeFromHostStateChange(subscriptionName); 75 } 76 77 bool HostNotifier::addPELToQueue(const PEL& pel) 78 { 79 if (enqueueRequired(pel.id())) 80 { 81 _pelQueue.push_back(pel.id()); 82 } 83 84 // Return false so that Repo::for_each keeps going. 85 return false; 86 } 87 88 bool HostNotifier::enqueueRequired(uint32_t id) const 89 { 90 bool required = true; 91 Repository::LogID i{Repository::LogID::Pel{id}}; 92 93 // Manufacturing testing may turn off sending up PELs 94 if (!_dataIface.getHostPELEnablement()) 95 { 96 return false; 97 } 98 99 if (auto attributes = _repo.getPELAttributes(i); attributes) 100 { 101 auto a = attributes.value().get(); 102 103 if ((a.hostState == TransmissionState::acked) || 104 (a.hostState == TransmissionState::badPEL)) 105 { 106 required = false; 107 } 108 else if (a.actionFlags.test(hiddenFlagBit) && 109 (a.hmcState == TransmissionState::acked)) 110 { 111 required = false; 112 } 113 else if (a.actionFlags.test(dontReportToHostFlagBit)) 114 { 115 required = false; 116 } 117 } 118 else 119 { 120 using namespace phosphor::logging; 121 log<level::ERR>("Host Enqueue: Unable to find PEL ID in repository", 122 entry("PEL_ID=0x%X", id)); 123 required = false; 124 } 125 126 return required; 127 } 128 129 bool HostNotifier::notifyRequired(uint32_t id) const 130 { 131 bool notify = true; 132 Repository::LogID i{Repository::LogID::Pel{id}}; 133 134 if (auto attributes = _repo.getPELAttributes(i); attributes) 135 { 136 // If already acked by the host, don't send again. 137 // (A safety check as it shouldn't get to this point.) 138 auto a = attributes.value().get(); 139 if (a.hostState == TransmissionState::acked) 140 { 141 notify = false; 142 } 143 else if (a.actionFlags.test(hiddenFlagBit)) 144 { 145 // If hidden and acked (or will be) acked by the HMC, 146 // also don't send it. (HMC management can come and 147 // go at any time) 148 if ((a.hmcState == TransmissionState::acked) || 149 _dataIface.isHMCManaged()) 150 { 151 notify = false; 152 } 153 } 154 } 155 else 156 { 157 // Must have been deleted since put on the queue. 158 notify = false; 159 } 160 161 return notify; 162 } 163 164 void HostNotifier::newLogCallback(const PEL& pel) 165 { 166 if (!enqueueRequired(pel.id())) 167 { 168 return; 169 } 170 171 _pelQueue.push_back(pel.id()); 172 173 // Notify shouldn't happen if host is down or full 174 if (!_dataIface.isHostUp() || _hostFull) 175 { 176 return; 177 } 178 179 // Dispatch a command now if there isn't currently a command 180 // in progress and this is the first log in the queue or it 181 // previously gave up from a hard failure. 182 auto inProgress = (_inProgressPEL != 0) || _hostIface->cmdInProgress() || 183 _retryTimer.isEnabled(); 184 185 auto firstPEL = _pelQueue.size() == 1; 186 auto gaveUp = _retryCount >= maxRetryAttempts; 187 188 if (!inProgress && (firstPEL || gaveUp)) 189 { 190 _retryCount = 0; 191 192 // Send a log, but from the event loop, not from here. 193 scheduleDispatch(); 194 } 195 } 196 197 void HostNotifier::deleteLogCallback(uint32_t id) 198 { 199 auto queueIt = std::find(_pelQueue.begin(), _pelQueue.end(), id); 200 if (queueIt != _pelQueue.end()) 201 { 202 log<level::DEBUG>("Host notifier removing deleted log from queue"); 203 _pelQueue.erase(queueIt); 204 } 205 206 auto sentIt = std::find(_sentPELs.begin(), _sentPELs.end(), id); 207 if (sentIt != _sentPELs.end()) 208 { 209 log<level::DEBUG>("Host notifier removing deleted log from sent list"); 210 _sentPELs.erase(sentIt); 211 } 212 213 // Nothing we can do about this... 214 if (id == _inProgressPEL) 215 { 216 log<level::WARNING>( 217 "A PEL was deleted while its host notification was in progress", 218 entry("PEL_ID=0x%X", id)); 219 } 220 } 221 222 void HostNotifier::scheduleDispatch() 223 { 224 _dispatcher = std::make_unique<sdeventplus::source::Defer>( 225 _hostIface->getEvent(), std::bind(std::mem_fn(&HostNotifier::dispatch), 226 this, std::placeholders::_1)); 227 } 228 229 void HostNotifier::dispatch(sdeventplus::source::EventBase& source) 230 { 231 _dispatcher.reset(); 232 233 doNewLogNotify(); 234 } 235 236 void HostNotifier::doNewLogNotify() 237 { 238 if (!_dataIface.isHostUp() || _retryTimer.isEnabled() || 239 _hostFullTimer.isEnabled()) 240 { 241 return; 242 } 243 244 if (_retryCount >= maxRetryAttempts) 245 { 246 // Give up until a new log comes in. 247 if (_retryCount == maxRetryAttempts) 248 { 249 // If this were to really happen, the PLDM interface 250 // would be down and isolating that shouldn't left to 251 // a logging daemon, so just trace. Also, this will start 252 // trying again when the next new log comes in. 253 log<level::ERR>( 254 "PEL Host notifier hit max retry attempts. Giving up for now.", 255 entry("PEL_ID=0x%X", _pelQueue.front())); 256 257 // Tell the host interface object to clean itself up, especially to 258 // release the PLDM instance ID it's been using. 259 _hostIface->cancelCmd(); 260 } 261 return; 262 } 263 264 bool doNotify = false; 265 uint32_t id = 0; 266 267 // Find the PEL to send 268 while (!doNotify && !_pelQueue.empty()) 269 { 270 id = _pelQueue.front(); 271 _pelQueue.pop_front(); 272 273 if (notifyRequired(id)) 274 { 275 doNotify = true; 276 } 277 } 278 279 if (doNotify) 280 { 281 // Get the size using the repo attributes 282 Repository::LogID i{Repository::LogID::Pel{id}}; 283 if (auto attributes = _repo.getPELAttributes(i); attributes) 284 { 285 auto size = static_cast<size_t>( 286 std::filesystem::file_size((*attributes).get().path)); 287 auto rc = _hostIface->sendNewLogCmd(id, size); 288 289 if (rc == CmdStatus::success) 290 { 291 _inProgressPEL = id; 292 } 293 else 294 { 295 // It failed. Retry 296 log<level::ERR>("PLDM send failed", entry("PEL_ID=0x%X", id)); 297 _pelQueue.push_front(id); 298 _inProgressPEL = 0; 299 _retryTimer.restartOnce(_hostIface->getSendRetryDelay()); 300 } 301 } 302 else 303 { 304 log<level::ERR>("PEL ID not in repository. Cannot notify host", 305 entry("PEL_ID=0x%X", id)); 306 } 307 } 308 } 309 310 void HostNotifier::hostStateChange(bool hostUp) 311 { 312 _retryCount = 0; 313 _hostFull = false; 314 315 if (hostUp && !_pelQueue.empty()) 316 { 317 doNewLogNotify(); 318 } 319 else if (!hostUp) 320 { 321 stopCommand(); 322 323 // Reset the state on any PELs that were sent but not acked back 324 // to new so they'll get sent again. 325 for (auto id : _sentPELs) 326 { 327 _pelQueue.push_back(id); 328 _repo.setPELHostTransState(id, TransmissionState::newPEL); 329 } 330 331 _sentPELs.clear(); 332 333 if (_hostFullTimer.isEnabled()) 334 { 335 _hostFullTimer.setEnabled(false); 336 } 337 } 338 } 339 340 void HostNotifier::commandResponse(ResponseStatus status) 341 { 342 auto id = _inProgressPEL; 343 _inProgressPEL = 0; 344 345 if (status == ResponseStatus::success) 346 { 347 _retryCount = 0; 348 349 _sentPELs.push_back(id); 350 351 _repo.setPELHostTransState(id, TransmissionState::sent); 352 353 // If the host is full, don't send off the next PEL 354 if (!_hostFull && !_pelQueue.empty()) 355 { 356 doNewLogNotify(); 357 } 358 } 359 else 360 { 361 log<level::ERR>("PLDM command response failure", 362 entry("PEL_ID=0x%X", id)); 363 // Retry 364 _pelQueue.push_front(id); 365 _retryTimer.restartOnce(_hostIface->getReceiveRetryDelay()); 366 } 367 } 368 369 void HostNotifier::retryTimerExpired() 370 { 371 if (_dataIface.isHostUp()) 372 { 373 log<level::INFO>("Attempting command retry", 374 entry("PEL_ID=0x%X", _pelQueue.front())); 375 _retryCount++; 376 doNewLogNotify(); 377 } 378 } 379 380 void HostNotifier::hostFullTimerExpired() 381 { 382 doNewLogNotify(); 383 } 384 385 void HostNotifier::stopCommand() 386 { 387 _retryCount = 0; 388 389 if (_inProgressPEL != 0) 390 { 391 _pelQueue.push_front(_inProgressPEL); 392 _inProgressPEL = 0; 393 } 394 395 if (_retryTimer.isEnabled()) 396 { 397 _retryTimer.setEnabled(false); 398 } 399 400 // Ensure the PLDM instance ID is released 401 _hostIface->cancelCmd(); 402 } 403 404 void HostNotifier::ackPEL(uint32_t id) 405 { 406 _repo.setPELHostTransState(id, TransmissionState::acked); 407 408 // No longer just 'sent', so remove it from the sent list. 409 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 410 if (sent != _sentPELs.end()) 411 { 412 _sentPELs.erase(sent); 413 } 414 415 // An ack means the host is no longer full 416 if (_hostFullTimer.isEnabled()) 417 { 418 _hostFullTimer.setEnabled(false); 419 } 420 421 if (_hostFull) 422 { 423 _hostFull = false; 424 425 // Start sending PELs again, from the event loop 426 if (!_pelQueue.empty()) 427 { 428 scheduleDispatch(); 429 } 430 } 431 } 432 433 void HostNotifier::setHostFull(uint32_t id) 434 { 435 log<level::INFO>("Received Host full indication", entry("PEL_ID=0x%X", id)); 436 437 _hostFull = true; 438 439 // This PEL needs to get re-sent 440 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 441 if (sent != _sentPELs.end()) 442 { 443 _sentPELs.erase(sent); 444 _repo.setPELHostTransState(id, TransmissionState::newPEL); 445 446 if (std::find(_pelQueue.begin(), _pelQueue.end(), id) == 447 _pelQueue.end()) 448 { 449 _pelQueue.push_front(id); 450 } 451 } 452 453 // The only PELs that will be sent when the 454 // host is full is from this timer callback. 455 if (!_hostFullTimer.isEnabled()) 456 { 457 _hostFullTimer.restartOnce(_hostIface->getHostFullRetryDelay()); 458 } 459 } 460 461 void HostNotifier::setBadPEL(uint32_t id) 462 { 463 log<level::ERR>("PEL rejected by the host", entry("PEL_ID=0x%X", id)); 464 465 auto sent = std::find(_sentPELs.begin(), _sentPELs.end(), id); 466 if (sent != _sentPELs.end()) 467 { 468 _sentPELs.erase(sent); 469 } 470 471 _repo.setPELHostTransState(id, TransmissionState::badPEL); 472 } 473 474 } // namespace openpower::pels 475