1 /** 2 * Copyright © 2022 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "fan.hpp" 17 18 #include "logging.hpp" 19 #include "sdbusplus.hpp" 20 #include "system.hpp" 21 #include "types.hpp" 22 #include "utility.hpp" 23 24 #include <phosphor-logging/log.hpp> 25 26 #include <format> 27 28 namespace phosphor 29 { 30 namespace fan 31 { 32 namespace monitor 33 { 34 35 using namespace phosphor::logging; 36 using namespace sdbusplus::bus::match; 37 38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event, 39 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def, 40 System& system) : 41 _bus(bus), _name(def.name), _deviation(def.deviation), 42 _upperDeviation(def.upperDeviation), 43 _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc), 44 _trustManager(trust), 45 #ifdef MONITOR_USE_JSON 46 _monitorDelay(def.monitorStartDelay), 47 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)), 48 #endif 49 _system(system), 50 _presenceMatch(bus, 51 rules::propertiesChanged(util::INVENTORY_PATH + _name, 52 util::INV_ITEM_IFACE), 53 std::bind(std::mem_fn(&Fan::presenceChanged), this, 54 std::placeholders::_1)), 55 _presenceIfaceAddedMatch( 56 bus, 57 rules::interfacesAdded() + 58 rules::argNpath(0, util::INVENTORY_PATH + _name), 59 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this, 60 std::placeholders::_1)), 61 _fanMissingErrorDelay(def.fanMissingErrDelay), 62 _setFuncOnPresent(def.funcOnPresent) 63 { 64 // Setup tach sensors for monitoring 65 for (const auto& s : def.sensorList) 66 { 67 _sensors.emplace_back(std::make_shared<TachSensor>( 68 mode, bus, *this, s.name, s.hasTarget, def.funcDelay, 69 s.targetInterface, s.targetPath, s.factor, s.offset, def.method, 70 s.threshold, s.ignoreAboveMax, def.timeout, 71 def.nonfuncRotorErrDelay, def.countInterval, event)); 72 73 _trustManager->registerSensor(_sensors.back()); 74 } 75 76 bool functionalState = 77 (_numSensorFailsForNonFunc == 0) || 78 (countNonFunctionalSensors() < _numSensorFailsForNonFunc); 79 80 if (updateInventory(functionalState) && !functionalState) 81 { 82 // the inventory update threw an exception, possibly because D-Bus 83 // wasn't ready. Try to update sensors back to functional to avoid a 84 // false-alarm. They will be updated again from subscribing to the 85 // properties-changed event 86 87 for (auto& sensor : _sensors) 88 sensor->setFunctional(true); 89 } 90 91 #ifndef MONITOR_USE_JSON 92 // Check current tach state when entering monitor mode 93 if (mode != Mode::init) 94 { 95 _monitorReady = true; 96 97 // The TachSensors will now have already read the input 98 // and target values, so check them. 99 tachChanged(); 100 } 101 #else 102 if (_system.isPowerOn()) 103 { 104 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 105 } 106 #endif 107 108 if (_fanMissingErrorDelay) 109 { 110 _fanMissingErrorTimer = std::make_unique< 111 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 112 event, std::bind(&System::fanMissingErrorTimerExpired, &system, 113 std::ref(*this))); 114 } 115 116 try 117 { 118 _present = util::SDBusPlus::getProperty<bool>( 119 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present"); 120 121 if (!_present) 122 { 123 getLogger().log( 124 std::format("On startup, fan {} is missing", _name)); 125 if (_system.isPowerOn() && _fanMissingErrorTimer) 126 { 127 _fanMissingErrorTimer->restartOnce( 128 std::chrono::seconds{*_fanMissingErrorDelay}); 129 } 130 } 131 } 132 catch (const util::DBusServiceError& e) 133 { 134 // This could happen on the first BMC boot if the presence 135 // detect app hasn't started yet and there isn't an inventory 136 // cache yet. 137 } 138 } 139 140 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg) 141 { 142 sdbusplus::message::object_path path; 143 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 144 145 msg.read(path, interfaces); 146 147 auto properties = interfaces.find(util::INV_ITEM_IFACE); 148 if (properties == interfaces.end()) 149 { 150 return; 151 } 152 153 auto property = properties->second.find("Present"); 154 if (property == properties->second.end()) 155 { 156 return; 157 } 158 159 _present = std::get<bool>(property->second); 160 161 if (!_present) 162 { 163 getLogger().log(std::format( 164 "New fan {} interface added and fan is not present", _name)); 165 if (_system.isPowerOn() && _fanMissingErrorTimer) 166 { 167 _fanMissingErrorTimer->restartOnce( 168 std::chrono::seconds{*_fanMissingErrorDelay}); 169 } 170 } 171 172 _system.fanStatusChange(*this); 173 } 174 175 void Fan::startMonitor() 176 { 177 _monitorReady = true; 178 179 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 180 try 181 { 182 // Force a getProperty call to check if the tach sensor is 183 // on D-Bus. If it isn't, now set it to nonfunctional. 184 // This isn't done earlier so that code watching for 185 // nonfunctional tach sensors doesn't take actions before 186 // those sensors show up on D-Bus. 187 sensor->updateTachAndTarget(); 188 tachChanged(*sensor); 189 } 190 catch (const util::DBusServiceError& e) 191 { 192 // The tach property still isn't on D-Bus. Ensure 193 // sensor is nonfunctional, but skip creating an 194 // error for it since it isn't a fan problem. 195 getLogger().log(std::format( 196 "Monitoring starting but {} sensor value not on D-Bus", 197 sensor->name())); 198 199 sensor->setFunctional(false, true); 200 201 if (_numSensorFailsForNonFunc) 202 { 203 if (_functional && 204 (countNonFunctionalSensors() >= _numSensorFailsForNonFunc)) 205 { 206 updateInventory(false); 207 } 208 } 209 210 // At this point, don't start any power off actions due 211 // to missing sensors. Let something else handle that 212 // policy. 213 _system.fanStatusChange(*this, true); 214 } 215 }); 216 } 217 218 void Fan::tachChanged() 219 { 220 if (_monitorReady) 221 { 222 for (auto& s : _sensors) 223 { 224 tachChanged(*s); 225 } 226 } 227 } 228 229 void Fan::tachChanged(TachSensor& sensor) 230 { 231 if (!_system.isPowerOn() || !_monitorReady) 232 { 233 return; 234 } 235 236 if (_trustManager->active()) 237 { 238 if (!_trustManager->checkTrust(sensor)) 239 { 240 return; 241 } 242 } 243 244 // If the error checking method is 'count', if a tach change leads 245 // to an out of range sensor the count timer will take over in calling 246 // process() until the sensor is healthy again. 247 if (!sensor.countTimerRunning()) 248 { 249 process(sensor); 250 } 251 } 252 253 void Fan::countTimerExpired(TachSensor& sensor) 254 { 255 if (_trustManager->active() && !_trustManager->checkTrust(sensor)) 256 { 257 return; 258 } 259 process(sensor); 260 } 261 262 void Fan::process(TachSensor& sensor) 263 { 264 // If this sensor is out of range at this moment, start 265 // its timer, at the end of which the inventory 266 // for the fan may get updated to not functional. 267 268 // If this sensor is OK, put everything back into a good state. 269 270 if (outOfRange(sensor)) 271 { 272 if (sensor.functional()) 273 { 274 switch (sensor.getMethod()) 275 { 276 case MethodMode::timebased: 277 // Start nonfunctional timer if not already running 278 sensor.startTimer(TimerMode::nonfunc); 279 break; 280 case MethodMode::count: 281 282 if (!sensor.countTimerRunning()) 283 { 284 sensor.startCountTimer(); 285 } 286 sensor.setCounter(true); 287 if (sensor.getCounter() >= sensor.getThreshold()) 288 { 289 updateState(sensor); 290 } 291 break; 292 } 293 } 294 } 295 else 296 { 297 switch (sensor.getMethod()) 298 { 299 case MethodMode::timebased: 300 if (sensor.functional()) 301 { 302 if (sensor.timerRunning()) 303 { 304 sensor.stopTimer(); 305 } 306 } 307 else 308 { 309 // Start functional timer if not already running 310 sensor.startTimer(TimerMode::func); 311 } 312 break; 313 case MethodMode::count: 314 sensor.setCounter(false); 315 if (sensor.getCounter() == 0) 316 { 317 if (!sensor.functional()) 318 { 319 updateState(sensor); 320 } 321 322 sensor.stopCountTimer(); 323 } 324 break; 325 } 326 } 327 } 328 329 uint64_t Fan::findTargetSpeed() 330 { 331 uint64_t target = 0; 332 // The sensor doesn't support a target, 333 // so get it from another sensor. 334 auto s = std::find_if(_sensors.begin(), _sensors.end(), [](const auto& s) { 335 return s->hasTarget(); 336 }); 337 338 if (s != _sensors.end()) 339 { 340 target = (*s)->getTarget(); 341 } 342 343 return target; 344 } 345 346 size_t Fan::countNonFunctionalSensors() const 347 { 348 return std::count_if(_sensors.begin(), _sensors.end(), [](const auto& s) { 349 return !s->functional(); 350 }); 351 } 352 353 bool Fan::outOfRange(const TachSensor& sensor) 354 { 355 if (!sensor.hasOwner()) 356 { 357 return true; 358 } 359 360 auto actual = static_cast<uint64_t>(sensor.getInput()); 361 auto range = sensor.getRange(_deviation, _upperDeviation); 362 363 return ((actual < range.first) || 364 (range.second && actual > range.second.value())); 365 } 366 367 void Fan::updateState(TachSensor& sensor) 368 { 369 if (!_system.isPowerOn()) 370 { 371 return; 372 } 373 374 auto range = sensor.getRange(_deviation, _upperDeviation); 375 std::string rangeMax = "NoMax"; 376 if (range.second) 377 { 378 rangeMax = std::to_string(range.second.value()); 379 } 380 381 // Skip starting the error timer if the sensor 382 // isn't on D-Bus as this isn't a fan hardware problem. 383 sensor.setFunctional(!sensor.functional(), !sensor.hasOwner()); 384 385 getLogger().log(std::format( 386 "Setting tach sensor {} functional state to {}. " 387 "[target = {}, actual = {}, allowed range = ({} - {}) " 388 "owned = {}]", 389 sensor.name(), sensor.functional(), sensor.getTarget(), 390 sensor.getInput(), range.first, rangeMax, sensor.hasOwner())); 391 392 // A zero value for _numSensorFailsForNonFunc means we aren't dealing 393 // with fan FRU functional status, only sensor functional status. 394 if (_numSensorFailsForNonFunc) 395 { 396 auto numNonFuncSensors = countNonFunctionalSensors(); 397 // If the fan was nonfunctional and enough sensors are now OK, 398 // the fan can be set to functional as long as `set_func_on_present` was 399 // not set 400 if (!_setFuncOnPresent && !_functional && 401 !(numNonFuncSensors >= _numSensorFailsForNonFunc)) 402 { 403 getLogger().log(std::format("Setting fan {} to functional, number " 404 "of nonfunctional sensors = {}", 405 _name, numNonFuncSensors)); 406 updateInventory(true); 407 } 408 409 // If the fan is currently functional, but too many 410 // contained sensors are now nonfunctional, update 411 // the fan to nonfunctional. 412 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc)) 413 { 414 getLogger().log(std::format("Setting fan {} to nonfunctional, " 415 "number of nonfunctional sensors = {}", 416 _name, numNonFuncSensors)); 417 updateInventory(false); 418 } 419 } 420 421 // Skip the power off rule checks if the sensor isn't 422 // on D-Bus so a running system isn't shutdown. 423 _system.fanStatusChange(*this, !sensor.hasOwner()); 424 } 425 426 bool Fan::updateInventory(bool functional) 427 { 428 bool dbusError = false; 429 430 try 431 { 432 auto objectMap = 433 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF, 434 util::FUNCTIONAL_PROPERTY, functional); 435 436 auto response = util::SDBusPlus::callMethod( 437 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH, 438 util::INVENTORY_INTF, "Notify", objectMap); 439 440 if (response.is_method_error()) 441 { 442 log<level::ERR>("Error in Notify call to update inventory"); 443 444 dbusError = true; 445 } 446 } 447 catch (const util::DBusError& e) 448 { 449 dbusError = true; 450 451 getLogger().log( 452 std::format("D-Bus Exception reading/updating inventory : {}", 453 e.what()), 454 Logger::error); 455 } 456 457 // This will always track the current state of the inventory. 458 _functional = functional; 459 460 return dbusError; 461 } 462 463 void Fan::presenceChanged(sdbusplus::message_t& msg) 464 { 465 std::string interface; 466 std::map<std::string, std::variant<bool>> properties; 467 468 msg.read(interface, properties); 469 470 auto presentProp = properties.find("Present"); 471 if (presentProp != properties.end()) 472 { 473 _present = std::get<bool>(presentProp->second); 474 475 getLogger().log( 476 std::format("Fan {} presence state change to {}", _name, _present)); 477 478 if (_present && _setFuncOnPresent) 479 { 480 updateInventory(true); 481 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 482 sensor->setFunctional(true); 483 sensor->resetMethod(); 484 }); 485 } 486 487 _system.fanStatusChange(*this); 488 489 if (_fanMissingErrorDelay) 490 { 491 if (!_present && _system.isPowerOn()) 492 { 493 _fanMissingErrorTimer->restartOnce( 494 std::chrono::seconds{*_fanMissingErrorDelay}); 495 } 496 else if (_present && _fanMissingErrorTimer->isEnabled()) 497 { 498 _fanMissingErrorTimer->setEnabled(false); 499 } 500 } 501 } 502 } 503 504 void Fan::sensorErrorTimerExpired(const TachSensor& sensor) 505 { 506 if (_present && _system.isPowerOn()) 507 { 508 _system.sensorErrorTimerExpired(*this, sensor); 509 } 510 } 511 512 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn) 513 { 514 #ifdef MONITOR_USE_JSON 515 if (powerStateOn) 516 { 517 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 518 519 _numSensorsOnDBusAtPowerOn = 0; 520 521 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 522 try 523 { 524 // Force a getProperty call. If sensor is on D-Bus, 525 // then make sure it's functional. 526 sensor->updateTachAndTarget(); 527 528 _numSensorsOnDBusAtPowerOn++; 529 530 if (_present) 531 { 532 // If not functional, set it back to functional. 533 if (!sensor->functional()) 534 { 535 sensor->setFunctional(true); 536 _system.fanStatusChange(*this, true); 537 } 538 539 // Set the counters back to zero 540 if (sensor->getMethod() == MethodMode::count) 541 { 542 sensor->resetMethod(); 543 } 544 } 545 } 546 catch (const util::DBusError& e) 547 { 548 // Properties still aren't on D-Bus. Let startMonitor() 549 // deal with it, or maybe System::powerStateChanged() if 550 // there aren't any sensors at all on D-Bus. 551 getLogger().log(std::format( 552 "At power on, tach sensor {} value not on D-Bus", 553 sensor->name())); 554 } 555 }); 556 557 if (_present) 558 { 559 // If configured to change functional state on the fan itself, 560 // Set it back to true now if necessary. 561 if (_numSensorFailsForNonFunc) 562 { 563 if (!_functional && 564 (countNonFunctionalSensors() < _numSensorFailsForNonFunc)) 565 { 566 updateInventory(true); 567 } 568 } 569 } 570 else 571 { 572 getLogger().log( 573 std::format("At power on, fan {} is missing", _name)); 574 575 if (_fanMissingErrorTimer) 576 { 577 _fanMissingErrorTimer->restartOnce( 578 std::chrono::seconds{*_fanMissingErrorDelay}); 579 } 580 } 581 } 582 else 583 { 584 _monitorReady = false; 585 586 if (_monitorTimer.isEnabled()) 587 { 588 _monitorTimer.setEnabled(false); 589 } 590 591 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled()) 592 { 593 _fanMissingErrorTimer->setEnabled(false); 594 } 595 596 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 597 if (sensor->timerRunning()) 598 { 599 sensor->stopTimer(); 600 } 601 602 sensor->stopCountTimer(); 603 }); 604 } 605 #endif 606 } 607 608 } // namespace monitor 609 } // namespace fan 610 } // namespace phosphor 611