1 /** 2 * Copyright © 2022 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "fan.hpp" 17 18 #include "logging.hpp" 19 #include "sdbusplus.hpp" 20 #include "system.hpp" 21 #include "types.hpp" 22 #include "utility.hpp" 23 24 #include <fmt/format.h> 25 26 #include <phosphor-logging/log.hpp> 27 28 namespace phosphor 29 { 30 namespace fan 31 { 32 namespace monitor 33 { 34 35 using namespace phosphor::logging; 36 using namespace sdbusplus::bus::match; 37 38 Fan::Fan(Mode mode, sdbusplus::bus::bus& bus, const sdeventplus::Event& event, 39 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def, 40 System& system) : 41 _bus(bus), 42 _name(std::get<fanNameField>(def)), 43 _deviation(std::get<fanDeviationField>(def)), 44 _numSensorFailsForNonFunc(std::get<numSensorFailsForNonfuncField>(def)), 45 _trustManager(trust), 46 #ifdef MONITOR_USE_JSON 47 _monitorDelay(std::get<monitorStartDelayField>(def)), 48 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)), 49 #endif 50 _system(system), 51 _presenceMatch(bus, 52 rules::propertiesChanged(util::INVENTORY_PATH + _name, 53 util::INV_ITEM_IFACE), 54 std::bind(std::mem_fn(&Fan::presenceChanged), this, 55 std::placeholders::_1)), 56 _presenceIfaceAddedMatch( 57 bus, 58 rules::interfacesAdded() + 59 rules::argNpath(0, util::INVENTORY_PATH + _name), 60 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this, 61 std::placeholders::_1)), 62 _fanMissingErrorDelay(std::get<fanMissingErrDelayField>(def)), 63 _setFuncOnPresent(std::get<funcOnPresentField>(def)) 64 { 65 // Setup tach sensors for monitoring 66 auto& sensors = std::get<sensorListField>(def); 67 for (auto& s : sensors) 68 { 69 _sensors.emplace_back(std::make_shared<TachSensor>( 70 mode, bus, *this, std::get<sensorNameField>(s), 71 std::get<hasTargetField>(s), std::get<funcDelay>(def), 72 std::get<targetInterfaceField>(s), std::get<factorField>(s), 73 std::get<offsetField>(s), std::get<methodField>(def), 74 std::get<thresholdField>(s), std::get<ignoreAboveMaxField>(s), 75 std::get<timeoutField>(def), 76 std::get<nonfuncRotorErrDelayField>(def), 77 std::get<countIntervalField>(def), event)); 78 79 _trustManager->registerSensor(_sensors.back()); 80 } 81 82 bool functionalState = 83 (_numSensorFailsForNonFunc == 0) || 84 (countNonFunctionalSensors() < _numSensorFailsForNonFunc); 85 86 if (updateInventory(functionalState) && !functionalState) 87 { 88 // the inventory update threw an exception, possibly because D-Bus 89 // wasn't ready. Try to update sensors back to functional to avoid a 90 // false-alarm. They will be updated again from subscribing to the 91 // properties-changed event 92 93 for (auto& sensor : _sensors) 94 sensor->setFunctional(true); 95 } 96 97 #ifndef MONITOR_USE_JSON 98 // Check current tach state when entering monitor mode 99 if (mode != Mode::init) 100 { 101 _monitorReady = true; 102 103 // The TachSensors will now have already read the input 104 // and target values, so check them. 105 tachChanged(); 106 } 107 #else 108 if (_system.isPowerOn()) 109 { 110 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 111 } 112 #endif 113 114 if (_fanMissingErrorDelay) 115 { 116 _fanMissingErrorTimer = std::make_unique< 117 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 118 event, std::bind(&System::fanMissingErrorTimerExpired, &system, 119 std::ref(*this))); 120 } 121 122 try 123 { 124 _present = util::SDBusPlus::getProperty<bool>( 125 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present"); 126 127 if (!_present) 128 { 129 getLogger().log( 130 fmt::format("On startup, fan {} is missing", _name)); 131 if (_system.isPowerOn() && _fanMissingErrorTimer) 132 { 133 _fanMissingErrorTimer->restartOnce( 134 std::chrono::seconds{*_fanMissingErrorDelay}); 135 } 136 } 137 } 138 catch (const util::DBusServiceError& e) 139 { 140 // This could happen on the first BMC boot if the presence 141 // detect app hasn't started yet and there isn't an inventory 142 // cache yet. 143 } 144 } 145 146 void Fan::presenceIfaceAdded(sdbusplus::message::message& msg) 147 { 148 sdbusplus::message::object_path path; 149 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 150 151 msg.read(path, interfaces); 152 153 auto properties = interfaces.find(util::INV_ITEM_IFACE); 154 if (properties == interfaces.end()) 155 { 156 return; 157 } 158 159 auto property = properties->second.find("Present"); 160 if (property == properties->second.end()) 161 { 162 return; 163 } 164 165 _present = std::get<bool>(property->second); 166 167 if (!_present) 168 { 169 getLogger().log(fmt::format( 170 "New fan {} interface added and fan is not present", _name)); 171 if (_system.isPowerOn() && _fanMissingErrorTimer) 172 { 173 _fanMissingErrorTimer->restartOnce( 174 std::chrono::seconds{*_fanMissingErrorDelay}); 175 } 176 } 177 178 _system.fanStatusChange(*this); 179 } 180 181 void Fan::startMonitor() 182 { 183 _monitorReady = true; 184 185 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 186 if (_present) 187 { 188 try 189 { 190 // Force a getProperty call to check if the tach sensor is 191 // on D-Bus. If it isn't, now set it to nonfunctional. 192 // This isn't done earlier so that code watching for 193 // nonfunctional tach sensors doesn't take actions before 194 // those sensors show up on D-Bus. 195 sensor->updateTachAndTarget(); 196 tachChanged(*sensor); 197 } 198 catch (const util::DBusServiceError& e) 199 { 200 // The tach property still isn't on D-Bus, ensure 201 // sensor is nonfunctional. 202 getLogger().log(fmt::format( 203 "Monitoring starting but {} sensor value not on D-Bus", 204 sensor->name())); 205 206 sensor->setFunctional(false); 207 208 if (_numSensorFailsForNonFunc) 209 { 210 if (_functional && (countNonFunctionalSensors() >= 211 _numSensorFailsForNonFunc)) 212 { 213 updateInventory(false); 214 } 215 } 216 217 _system.fanStatusChange(*this); 218 } 219 } 220 }); 221 } 222 223 void Fan::tachChanged() 224 { 225 if (_monitorReady) 226 { 227 for (auto& s : _sensors) 228 { 229 tachChanged(*s); 230 } 231 } 232 } 233 234 void Fan::tachChanged(TachSensor& sensor) 235 { 236 if (!_system.isPowerOn() || !_monitorReady) 237 { 238 return; 239 } 240 241 if (_trustManager->active()) 242 { 243 if (!_trustManager->checkTrust(sensor)) 244 { 245 return; 246 } 247 } 248 249 // If the error checking method is 'count', if a tach change leads 250 // to an out of range sensor the count timer will take over in calling 251 // process() until the sensor is healthy again. 252 if (!sensor.countTimerRunning()) 253 { 254 process(sensor); 255 } 256 } 257 258 void Fan::countTimerExpired(TachSensor& sensor) 259 { 260 if (_trustManager->active() && !_trustManager->checkTrust(sensor)) 261 { 262 return; 263 } 264 process(sensor); 265 } 266 267 void Fan::process(TachSensor& sensor) 268 { 269 // If this sensor is out of range at this moment, start 270 // its timer, at the end of which the inventory 271 // for the fan may get updated to not functional. 272 273 // If this sensor is OK, put everything back into a good state. 274 275 if (outOfRange(sensor)) 276 { 277 if (sensor.functional()) 278 { 279 switch (sensor.getMethod()) 280 { 281 case MethodMode::timebased: 282 // Start nonfunctional timer if not already running 283 sensor.startTimer(TimerMode::nonfunc); 284 break; 285 case MethodMode::count: 286 287 if (!sensor.countTimerRunning()) 288 { 289 sensor.startCountTimer(); 290 } 291 sensor.setCounter(true); 292 if (sensor.getCounter() >= sensor.getThreshold()) 293 { 294 updateState(sensor); 295 } 296 break; 297 } 298 } 299 } 300 else 301 { 302 switch (sensor.getMethod()) 303 { 304 case MethodMode::timebased: 305 if (sensor.functional()) 306 { 307 if (sensor.timerRunning()) 308 { 309 sensor.stopTimer(); 310 } 311 } 312 else 313 { 314 // Start functional timer if not already running 315 sensor.startTimer(TimerMode::func); 316 } 317 break; 318 case MethodMode::count: 319 sensor.setCounter(false); 320 if (sensor.getCounter() == 0) 321 { 322 if (!sensor.functional()) 323 { 324 updateState(sensor); 325 } 326 327 sensor.stopCountTimer(); 328 } 329 break; 330 } 331 } 332 } 333 334 uint64_t Fan::findTargetSpeed() 335 { 336 uint64_t target = 0; 337 // The sensor doesn't support a target, 338 // so get it from another sensor. 339 auto s = std::find_if(_sensors.begin(), _sensors.end(), 340 [](const auto& s) { return s->hasTarget(); }); 341 342 if (s != _sensors.end()) 343 { 344 target = (*s)->getTarget(); 345 } 346 347 return target; 348 } 349 350 size_t Fan::countNonFunctionalSensors() const 351 { 352 return std::count_if(_sensors.begin(), _sensors.end(), 353 [](const auto& s) { return !s->functional(); }); 354 } 355 356 bool Fan::outOfRange(const TachSensor& sensor) 357 { 358 if (!sensor.hasOwner()) 359 { 360 return true; 361 } 362 363 auto actual = static_cast<uint64_t>(sensor.getInput()); 364 auto range = sensor.getRange(_deviation); 365 366 return ((actual < range.first) || 367 (range.second && actual > range.second.value())); 368 } 369 370 void Fan::updateState(TachSensor& sensor) 371 { 372 if (!_system.isPowerOn()) 373 { 374 return; 375 } 376 377 auto range = sensor.getRange(_deviation); 378 std::string rangeMax = "NoMax"; 379 if (range.second) 380 { 381 rangeMax = std::to_string(range.second.value()); 382 } 383 384 sensor.setFunctional(!sensor.functional()); 385 getLogger().log( 386 fmt::format("Setting tach sensor {} functional state to {}. " 387 "[target = {}, input = {}, allowed range = ({} - {})]", 388 sensor.name(), sensor.functional(), sensor.getTarget(), 389 sensor.getInput(), range.first, rangeMax)); 390 391 // A zero value for _numSensorFailsForNonFunc means we aren't dealing 392 // with fan FRU functional status, only sensor functional status. 393 if (_numSensorFailsForNonFunc) 394 { 395 auto numNonFuncSensors = countNonFunctionalSensors(); 396 // If the fan was nonfunctional and enough sensors are now OK, 397 // the fan can be set to functional as long as `set_func_on_present` was 398 // not set 399 if (!_setFuncOnPresent && !_functional && 400 !(numNonFuncSensors >= _numSensorFailsForNonFunc)) 401 { 402 getLogger().log(fmt::format("Setting fan {} to functional, number " 403 "of nonfunctional sensors = {}", 404 _name, numNonFuncSensors)); 405 updateInventory(true); 406 } 407 408 // If the fan is currently functional, but too many 409 // contained sensors are now nonfunctional, update 410 // the fan to nonfunctional. 411 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc)) 412 { 413 getLogger().log(fmt::format("Setting fan {} to nonfunctional, " 414 "number of nonfunctional sensors = {}", 415 _name, numNonFuncSensors)); 416 updateInventory(false); 417 } 418 } 419 420 _system.fanStatusChange(*this); 421 } 422 423 bool Fan::updateInventory(bool functional) 424 { 425 bool dbusError = false; 426 427 try 428 { 429 auto objectMap = 430 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF, 431 util::FUNCTIONAL_PROPERTY, functional); 432 433 auto response = util::SDBusPlus::callMethod( 434 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH, 435 util::INVENTORY_INTF, "Notify", objectMap); 436 437 if (response.is_method_error()) 438 { 439 log<level::ERR>("Error in Notify call to update inventory"); 440 441 dbusError = true; 442 } 443 } 444 catch (const util::DBusError& e) 445 { 446 dbusError = true; 447 448 getLogger().log( 449 fmt::format("D-Bus Exception reading/updating inventory : {}", 450 e.what()), 451 Logger::error); 452 } 453 454 // This will always track the current state of the inventory. 455 _functional = functional; 456 457 return dbusError; 458 } 459 460 void Fan::presenceChanged(sdbusplus::message::message& msg) 461 { 462 std::string interface; 463 std::map<std::string, std::variant<bool>> properties; 464 465 msg.read(interface, properties); 466 467 auto presentProp = properties.find("Present"); 468 if (presentProp != properties.end()) 469 { 470 _present = std::get<bool>(presentProp->second); 471 472 getLogger().log( 473 fmt::format("Fan {} presence state change to {}", _name, _present)); 474 475 if (_present && _setFuncOnPresent) 476 { 477 updateInventory(true); 478 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 479 sensor->setFunctional(true); 480 sensor->resetMethod(); 481 }); 482 } 483 484 _system.fanStatusChange(*this); 485 486 if (_fanMissingErrorDelay) 487 { 488 if (!_present && _system.isPowerOn()) 489 { 490 _fanMissingErrorTimer->restartOnce( 491 std::chrono::seconds{*_fanMissingErrorDelay}); 492 } 493 else if (_present && _fanMissingErrorTimer->isEnabled()) 494 { 495 _fanMissingErrorTimer->setEnabled(false); 496 } 497 } 498 } 499 } 500 501 void Fan::sensorErrorTimerExpired(const TachSensor& sensor) 502 { 503 if (_present && _system.isPowerOn()) 504 { 505 _system.sensorErrorTimerExpired(*this, sensor); 506 } 507 } 508 509 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn) 510 { 511 #ifdef MONITOR_USE_JSON 512 if (powerStateOn) 513 { 514 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 515 516 _numSensorsOnDBusAtPowerOn = 0; 517 518 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 519 try 520 { 521 // Force a getProperty call. If sensor is on D-Bus, 522 // then make sure it's functional. 523 sensor->updateTachAndTarget(); 524 525 _numSensorsOnDBusAtPowerOn++; 526 527 if (_present) 528 { 529 // If not functional, set it back to functional. 530 if (!sensor->functional()) 531 { 532 sensor->setFunctional(true); 533 _system.fanStatusChange(*this, true); 534 } 535 536 // Set the counters back to zero 537 if (sensor->getMethod() == MethodMode::count) 538 { 539 sensor->resetMethod(); 540 } 541 } 542 } 543 catch (const util::DBusError& e) 544 { 545 // Properties still aren't on D-Bus. Let startMonitor() 546 // deal with it, or maybe System::powerStateChanged() if 547 // there aren't any sensors at all on D-Bus. 548 getLogger().log(fmt::format( 549 "At power on, tach sensor {} value not on D-Bus", 550 sensor->name())); 551 } 552 }); 553 554 if (_present) 555 { 556 // If configured to change functional state on the fan itself, 557 // Set it back to true now if necessary. 558 if (_numSensorFailsForNonFunc) 559 { 560 if (!_functional && 561 (countNonFunctionalSensors() < _numSensorFailsForNonFunc)) 562 { 563 updateInventory(true); 564 } 565 } 566 } 567 else 568 { 569 getLogger().log( 570 fmt::format("At power on, fan {} is missing", _name)); 571 572 if (_fanMissingErrorTimer) 573 { 574 _fanMissingErrorTimer->restartOnce( 575 std::chrono::seconds{*_fanMissingErrorDelay}); 576 } 577 } 578 } 579 else 580 { 581 _monitorReady = false; 582 583 if (_monitorTimer.isEnabled()) 584 { 585 _monitorTimer.setEnabled(false); 586 } 587 588 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled()) 589 { 590 _fanMissingErrorTimer->setEnabled(false); 591 } 592 593 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 594 if (sensor->timerRunning()) 595 { 596 sensor->stopTimer(); 597 } 598 599 sensor->stopCountTimer(); 600 }); 601 } 602 #endif 603 } 604 605 } // namespace monitor 606 } // namespace fan 607 } // namespace phosphor 608