1 /** 2 * Copyright © 2022 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "fan.hpp" 17 18 #include "logging.hpp" 19 #include "sdbusplus.hpp" 20 #include "system.hpp" 21 #include "types.hpp" 22 #include "utility.hpp" 23 24 #include <fmt/format.h> 25 26 #include <phosphor-logging/log.hpp> 27 28 namespace phosphor 29 { 30 namespace fan 31 { 32 namespace monitor 33 { 34 35 using namespace phosphor::logging; 36 using namespace sdbusplus::bus::match; 37 38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event, 39 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def, 40 System& system) : 41 _bus(bus), 42 _name(std::get<fanNameField>(def)), 43 _deviation(std::get<fanDeviationField>(def)), 44 _numSensorFailsForNonFunc(std::get<numSensorFailsForNonfuncField>(def)), 45 _trustManager(trust), 46 #ifdef MONITOR_USE_JSON 47 _monitorDelay(std::get<monitorStartDelayField>(def)), 48 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)), 49 #endif 50 _system(system), 51 _presenceMatch(bus, 52 rules::propertiesChanged(util::INVENTORY_PATH + _name, 53 util::INV_ITEM_IFACE), 54 std::bind(std::mem_fn(&Fan::presenceChanged), this, 55 std::placeholders::_1)), 56 _presenceIfaceAddedMatch( 57 bus, 58 rules::interfacesAdded() + 59 rules::argNpath(0, util::INVENTORY_PATH + _name), 60 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this, 61 std::placeholders::_1)), 62 _fanMissingErrorDelay(std::get<fanMissingErrDelayField>(def)), 63 _setFuncOnPresent(std::get<funcOnPresentField>(def)) 64 { 65 // Setup tach sensors for monitoring 66 auto& sensors = std::get<sensorListField>(def); 67 for (auto& s : sensors) 68 { 69 _sensors.emplace_back(std::make_shared<TachSensor>( 70 mode, bus, *this, std::get<sensorNameField>(s), 71 std::get<hasTargetField>(s), std::get<funcDelay>(def), 72 std::get<targetInterfaceField>(s), std::get<factorField>(s), 73 std::get<offsetField>(s), std::get<methodField>(def), 74 std::get<thresholdField>(s), std::get<ignoreAboveMaxField>(s), 75 std::get<timeoutField>(def), 76 std::get<nonfuncRotorErrDelayField>(def), 77 std::get<countIntervalField>(def), event)); 78 79 _trustManager->registerSensor(_sensors.back()); 80 } 81 82 bool functionalState = 83 (_numSensorFailsForNonFunc == 0) || 84 (countNonFunctionalSensors() < _numSensorFailsForNonFunc); 85 86 if (updateInventory(functionalState) && !functionalState) 87 { 88 // the inventory update threw an exception, possibly because D-Bus 89 // wasn't ready. Try to update sensors back to functional to avoid a 90 // false-alarm. They will be updated again from subscribing to the 91 // properties-changed event 92 93 for (auto& sensor : _sensors) 94 sensor->setFunctional(true); 95 } 96 97 #ifndef MONITOR_USE_JSON 98 // Check current tach state when entering monitor mode 99 if (mode != Mode::init) 100 { 101 _monitorReady = true; 102 103 // The TachSensors will now have already read the input 104 // and target values, so check them. 105 tachChanged(); 106 } 107 #else 108 if (_system.isPowerOn()) 109 { 110 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 111 } 112 #endif 113 114 if (_fanMissingErrorDelay) 115 { 116 _fanMissingErrorTimer = std::make_unique< 117 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 118 event, std::bind(&System::fanMissingErrorTimerExpired, &system, 119 std::ref(*this))); 120 } 121 122 try 123 { 124 _present = util::SDBusPlus::getProperty<bool>( 125 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present"); 126 127 if (!_present) 128 { 129 getLogger().log( 130 fmt::format("On startup, fan {} is missing", _name)); 131 if (_system.isPowerOn() && _fanMissingErrorTimer) 132 { 133 _fanMissingErrorTimer->restartOnce( 134 std::chrono::seconds{*_fanMissingErrorDelay}); 135 } 136 } 137 } 138 catch (const util::DBusServiceError& e) 139 { 140 // This could happen on the first BMC boot if the presence 141 // detect app hasn't started yet and there isn't an inventory 142 // cache yet. 143 } 144 } 145 146 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg) 147 { 148 sdbusplus::message::object_path path; 149 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 150 151 msg.read(path, interfaces); 152 153 auto properties = interfaces.find(util::INV_ITEM_IFACE); 154 if (properties == interfaces.end()) 155 { 156 return; 157 } 158 159 auto property = properties->second.find("Present"); 160 if (property == properties->second.end()) 161 { 162 return; 163 } 164 165 _present = std::get<bool>(property->second); 166 167 if (!_present) 168 { 169 getLogger().log(fmt::format( 170 "New fan {} interface added and fan is not present", _name)); 171 if (_system.isPowerOn() && _fanMissingErrorTimer) 172 { 173 _fanMissingErrorTimer->restartOnce( 174 std::chrono::seconds{*_fanMissingErrorDelay}); 175 } 176 } 177 178 _system.fanStatusChange(*this); 179 } 180 181 void Fan::startMonitor() 182 { 183 _monitorReady = true; 184 185 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 186 if (_present) 187 { 188 try 189 { 190 // Force a getProperty call to check if the tach sensor is 191 // on D-Bus. If it isn't, now set it to nonfunctional. 192 // This isn't done earlier so that code watching for 193 // nonfunctional tach sensors doesn't take actions before 194 // those sensors show up on D-Bus. 195 sensor->updateTachAndTarget(); 196 tachChanged(*sensor); 197 } 198 catch (const util::DBusServiceError& e) 199 { 200 // The tach property still isn't on D-Bus. Ensure 201 // sensor is nonfunctional, but skip creating an 202 // error for it since it isn't a fan problem. 203 getLogger().log(fmt::format( 204 "Monitoring starting but {} sensor value not on D-Bus", 205 sensor->name())); 206 207 sensor->setFunctional(false, true); 208 209 if (_numSensorFailsForNonFunc) 210 { 211 if (_functional && (countNonFunctionalSensors() >= 212 _numSensorFailsForNonFunc)) 213 { 214 updateInventory(false); 215 } 216 } 217 218 // At this point, don't start any power off actions due 219 // to missing sensors. Let something else handle that 220 // policy. 221 _system.fanStatusChange(*this, true); 222 } 223 } 224 }); 225 } 226 227 void Fan::tachChanged() 228 { 229 if (_monitorReady) 230 { 231 for (auto& s : _sensors) 232 { 233 tachChanged(*s); 234 } 235 } 236 } 237 238 void Fan::tachChanged(TachSensor& sensor) 239 { 240 if (!_system.isPowerOn() || !_monitorReady) 241 { 242 return; 243 } 244 245 if (_trustManager->active()) 246 { 247 if (!_trustManager->checkTrust(sensor)) 248 { 249 return; 250 } 251 } 252 253 // If the error checking method is 'count', if a tach change leads 254 // to an out of range sensor the count timer will take over in calling 255 // process() until the sensor is healthy again. 256 if (!sensor.countTimerRunning()) 257 { 258 process(sensor); 259 } 260 } 261 262 void Fan::countTimerExpired(TachSensor& sensor) 263 { 264 if (_trustManager->active() && !_trustManager->checkTrust(sensor)) 265 { 266 return; 267 } 268 process(sensor); 269 } 270 271 void Fan::process(TachSensor& sensor) 272 { 273 // If this sensor is out of range at this moment, start 274 // its timer, at the end of which the inventory 275 // for the fan may get updated to not functional. 276 277 // If this sensor is OK, put everything back into a good state. 278 279 if (outOfRange(sensor)) 280 { 281 if (sensor.functional()) 282 { 283 switch (sensor.getMethod()) 284 { 285 case MethodMode::timebased: 286 // Start nonfunctional timer if not already running 287 sensor.startTimer(TimerMode::nonfunc); 288 break; 289 case MethodMode::count: 290 291 if (!sensor.countTimerRunning()) 292 { 293 sensor.startCountTimer(); 294 } 295 sensor.setCounter(true); 296 if (sensor.getCounter() >= sensor.getThreshold()) 297 { 298 updateState(sensor); 299 } 300 break; 301 } 302 } 303 } 304 else 305 { 306 switch (sensor.getMethod()) 307 { 308 case MethodMode::timebased: 309 if (sensor.functional()) 310 { 311 if (sensor.timerRunning()) 312 { 313 sensor.stopTimer(); 314 } 315 } 316 else 317 { 318 // Start functional timer if not already running 319 sensor.startTimer(TimerMode::func); 320 } 321 break; 322 case MethodMode::count: 323 sensor.setCounter(false); 324 if (sensor.getCounter() == 0) 325 { 326 if (!sensor.functional()) 327 { 328 updateState(sensor); 329 } 330 331 sensor.stopCountTimer(); 332 } 333 break; 334 } 335 } 336 } 337 338 uint64_t Fan::findTargetSpeed() 339 { 340 uint64_t target = 0; 341 // The sensor doesn't support a target, 342 // so get it from another sensor. 343 auto s = std::find_if(_sensors.begin(), _sensors.end(), 344 [](const auto& s) { return s->hasTarget(); }); 345 346 if (s != _sensors.end()) 347 { 348 target = (*s)->getTarget(); 349 } 350 351 return target; 352 } 353 354 size_t Fan::countNonFunctionalSensors() const 355 { 356 return std::count_if(_sensors.begin(), _sensors.end(), 357 [](const auto& s) { return !s->functional(); }); 358 } 359 360 bool Fan::outOfRange(const TachSensor& sensor) 361 { 362 if (!sensor.hasOwner()) 363 { 364 return true; 365 } 366 367 auto actual = static_cast<uint64_t>(sensor.getInput()); 368 auto range = sensor.getRange(_deviation); 369 370 return ((actual < range.first) || 371 (range.second && actual > range.second.value())); 372 } 373 374 void Fan::updateState(TachSensor& sensor) 375 { 376 if (!_system.isPowerOn()) 377 { 378 return; 379 } 380 381 auto range = sensor.getRange(_deviation); 382 std::string rangeMax = "NoMax"; 383 if (range.second) 384 { 385 rangeMax = std::to_string(range.second.value()); 386 } 387 388 // Skip starting the error timer if the sensor 389 // isn't on D-Bus as this isn't a fan hardware problem. 390 sensor.setFunctional(!sensor.functional(), !sensor.hasOwner()); 391 392 getLogger().log(fmt::format( 393 "Setting tach sensor {} functional state to {}. " 394 "[target = {}, input = {}, allowed range = ({} - {}) " 395 "owned = {}]", 396 sensor.name(), sensor.functional(), sensor.getTarget(), 397 sensor.getInput(), range.first, rangeMax, sensor.hasOwner())); 398 399 // A zero value for _numSensorFailsForNonFunc means we aren't dealing 400 // with fan FRU functional status, only sensor functional status. 401 if (_numSensorFailsForNonFunc) 402 { 403 auto numNonFuncSensors = countNonFunctionalSensors(); 404 // If the fan was nonfunctional and enough sensors are now OK, 405 // the fan can be set to functional as long as `set_func_on_present` was 406 // not set 407 if (!_setFuncOnPresent && !_functional && 408 !(numNonFuncSensors >= _numSensorFailsForNonFunc)) 409 { 410 getLogger().log(fmt::format("Setting fan {} to functional, number " 411 "of nonfunctional sensors = {}", 412 _name, numNonFuncSensors)); 413 updateInventory(true); 414 } 415 416 // If the fan is currently functional, but too many 417 // contained sensors are now nonfunctional, update 418 // the fan to nonfunctional. 419 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc)) 420 { 421 getLogger().log(fmt::format("Setting fan {} to nonfunctional, " 422 "number of nonfunctional sensors = {}", 423 _name, numNonFuncSensors)); 424 updateInventory(false); 425 } 426 } 427 428 // Skip the power off rule checks if the sensor isn't 429 // on D-Bus so a running system isn't shutdown. 430 _system.fanStatusChange(*this, !sensor.hasOwner()); 431 } 432 433 bool Fan::updateInventory(bool functional) 434 { 435 bool dbusError = false; 436 437 try 438 { 439 auto objectMap = 440 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF, 441 util::FUNCTIONAL_PROPERTY, functional); 442 443 auto response = util::SDBusPlus::callMethod( 444 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH, 445 util::INVENTORY_INTF, "Notify", objectMap); 446 447 if (response.is_method_error()) 448 { 449 log<level::ERR>("Error in Notify call to update inventory"); 450 451 dbusError = true; 452 } 453 } 454 catch (const util::DBusError& e) 455 { 456 dbusError = true; 457 458 getLogger().log( 459 fmt::format("D-Bus Exception reading/updating inventory : {}", 460 e.what()), 461 Logger::error); 462 } 463 464 // This will always track the current state of the inventory. 465 _functional = functional; 466 467 return dbusError; 468 } 469 470 void Fan::presenceChanged(sdbusplus::message_t& msg) 471 { 472 std::string interface; 473 std::map<std::string, std::variant<bool>> properties; 474 475 msg.read(interface, properties); 476 477 auto presentProp = properties.find("Present"); 478 if (presentProp != properties.end()) 479 { 480 _present = std::get<bool>(presentProp->second); 481 482 getLogger().log( 483 fmt::format("Fan {} presence state change to {}", _name, _present)); 484 485 if (_present && _setFuncOnPresent) 486 { 487 updateInventory(true); 488 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 489 sensor->setFunctional(true); 490 sensor->resetMethod(); 491 }); 492 } 493 494 _system.fanStatusChange(*this); 495 496 if (_fanMissingErrorDelay) 497 { 498 if (!_present && _system.isPowerOn()) 499 { 500 _fanMissingErrorTimer->restartOnce( 501 std::chrono::seconds{*_fanMissingErrorDelay}); 502 } 503 else if (_present && _fanMissingErrorTimer->isEnabled()) 504 { 505 _fanMissingErrorTimer->setEnabled(false); 506 } 507 } 508 } 509 } 510 511 void Fan::sensorErrorTimerExpired(const TachSensor& sensor) 512 { 513 if (_present && _system.isPowerOn()) 514 { 515 _system.sensorErrorTimerExpired(*this, sensor); 516 } 517 } 518 519 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn) 520 { 521 #ifdef MONITOR_USE_JSON 522 if (powerStateOn) 523 { 524 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 525 526 _numSensorsOnDBusAtPowerOn = 0; 527 528 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 529 try 530 { 531 // Force a getProperty call. If sensor is on D-Bus, 532 // then make sure it's functional. 533 sensor->updateTachAndTarget(); 534 535 _numSensorsOnDBusAtPowerOn++; 536 537 if (_present) 538 { 539 // If not functional, set it back to functional. 540 if (!sensor->functional()) 541 { 542 sensor->setFunctional(true); 543 _system.fanStatusChange(*this, true); 544 } 545 546 // Set the counters back to zero 547 if (sensor->getMethod() == MethodMode::count) 548 { 549 sensor->resetMethod(); 550 } 551 } 552 } 553 catch (const util::DBusError& e) 554 { 555 // Properties still aren't on D-Bus. Let startMonitor() 556 // deal with it, or maybe System::powerStateChanged() if 557 // there aren't any sensors at all on D-Bus. 558 getLogger().log(fmt::format( 559 "At power on, tach sensor {} value not on D-Bus", 560 sensor->name())); 561 } 562 }); 563 564 if (_present) 565 { 566 // If configured to change functional state on the fan itself, 567 // Set it back to true now if necessary. 568 if (_numSensorFailsForNonFunc) 569 { 570 if (!_functional && 571 (countNonFunctionalSensors() < _numSensorFailsForNonFunc)) 572 { 573 updateInventory(true); 574 } 575 } 576 } 577 else 578 { 579 getLogger().log( 580 fmt::format("At power on, fan {} is missing", _name)); 581 582 if (_fanMissingErrorTimer) 583 { 584 _fanMissingErrorTimer->restartOnce( 585 std::chrono::seconds{*_fanMissingErrorDelay}); 586 } 587 } 588 } 589 else 590 { 591 _monitorReady = false; 592 593 if (_monitorTimer.isEnabled()) 594 { 595 _monitorTimer.setEnabled(false); 596 } 597 598 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled()) 599 { 600 _fanMissingErrorTimer->setEnabled(false); 601 } 602 603 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 604 if (sensor->timerRunning()) 605 { 606 sensor->stopTimer(); 607 } 608 609 sensor->stopCountTimer(); 610 }); 611 } 612 #endif 613 } 614 615 } // namespace monitor 616 } // namespace fan 617 } // namespace phosphor 618