1 /** 2 * Copyright © 2022 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "fan.hpp" 17 18 #include "logging.hpp" 19 #include "sdbusplus.hpp" 20 #include "system.hpp" 21 #include "types.hpp" 22 #include "utility.hpp" 23 24 #include <phosphor-logging/log.hpp> 25 26 #include <format> 27 28 namespace phosphor 29 { 30 namespace fan 31 { 32 namespace monitor 33 { 34 35 using namespace phosphor::logging; 36 using namespace sdbusplus::bus::match; 37 38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event, 39 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def, 40 System& system) : 41 _bus(bus), 42 _name(def.name), _deviation(def.deviation), 43 _upperDeviation(def.upperDeviation), 44 _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc), 45 _trustManager(trust), 46 #ifdef MONITOR_USE_JSON 47 _monitorDelay(def.monitorStartDelay), 48 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)), 49 #endif 50 _system(system), 51 _presenceMatch(bus, 52 rules::propertiesChanged(util::INVENTORY_PATH + _name, 53 util::INV_ITEM_IFACE), 54 std::bind(std::mem_fn(&Fan::presenceChanged), this, 55 std::placeholders::_1)), 56 _presenceIfaceAddedMatch( 57 bus, 58 rules::interfacesAdded() + 59 rules::argNpath(0, util::INVENTORY_PATH + _name), 60 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this, 61 std::placeholders::_1)), 62 _fanMissingErrorDelay(def.fanMissingErrDelay), 63 _setFuncOnPresent(def.funcOnPresent) 64 { 65 // Setup tach sensors for monitoring 66 for (const auto& s : def.sensorList) 67 { 68 _sensors.emplace_back(std::make_shared<TachSensor>( 69 mode, bus, *this, s.name, s.hasTarget, def.funcDelay, 70 s.targetInterface, s.targetPath, s.factor, s.offset, def.method, 71 s.threshold, s.ignoreAboveMax, def.timeout, 72 def.nonfuncRotorErrDelay, def.countInterval, event)); 73 74 _trustManager->registerSensor(_sensors.back()); 75 } 76 77 bool functionalState = 78 (_numSensorFailsForNonFunc == 0) || 79 (countNonFunctionalSensors() < _numSensorFailsForNonFunc); 80 81 if (updateInventory(functionalState) && !functionalState) 82 { 83 // the inventory update threw an exception, possibly because D-Bus 84 // wasn't ready. Try to update sensors back to functional to avoid a 85 // false-alarm. They will be updated again from subscribing to the 86 // properties-changed event 87 88 for (auto& sensor : _sensors) 89 sensor->setFunctional(true); 90 } 91 92 #ifndef MONITOR_USE_JSON 93 // Check current tach state when entering monitor mode 94 if (mode != Mode::init) 95 { 96 _monitorReady = true; 97 98 // The TachSensors will now have already read the input 99 // and target values, so check them. 100 tachChanged(); 101 } 102 #else 103 if (_system.isPowerOn()) 104 { 105 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 106 } 107 #endif 108 109 if (_fanMissingErrorDelay) 110 { 111 _fanMissingErrorTimer = std::make_unique< 112 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 113 event, std::bind(&System::fanMissingErrorTimerExpired, &system, 114 std::ref(*this))); 115 } 116 117 try 118 { 119 _present = util::SDBusPlus::getProperty<bool>( 120 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present"); 121 122 if (!_present) 123 { 124 getLogger().log( 125 std::format("On startup, fan {} is missing", _name)); 126 if (_system.isPowerOn() && _fanMissingErrorTimer) 127 { 128 _fanMissingErrorTimer->restartOnce( 129 std::chrono::seconds{*_fanMissingErrorDelay}); 130 } 131 } 132 } 133 catch (const util::DBusServiceError& e) 134 { 135 // This could happen on the first BMC boot if the presence 136 // detect app hasn't started yet and there isn't an inventory 137 // cache yet. 138 } 139 } 140 141 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg) 142 { 143 sdbusplus::message::object_path path; 144 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 145 146 msg.read(path, interfaces); 147 148 auto properties = interfaces.find(util::INV_ITEM_IFACE); 149 if (properties == interfaces.end()) 150 { 151 return; 152 } 153 154 auto property = properties->second.find("Present"); 155 if (property == properties->second.end()) 156 { 157 return; 158 } 159 160 _present = std::get<bool>(property->second); 161 162 if (!_present) 163 { 164 getLogger().log(std::format( 165 "New fan {} interface added and fan is not present", _name)); 166 if (_system.isPowerOn() && _fanMissingErrorTimer) 167 { 168 _fanMissingErrorTimer->restartOnce( 169 std::chrono::seconds{*_fanMissingErrorDelay}); 170 } 171 } 172 173 _system.fanStatusChange(*this); 174 } 175 176 void Fan::startMonitor() 177 { 178 _monitorReady = true; 179 180 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 181 try 182 { 183 // Force a getProperty call to check if the tach sensor is 184 // on D-Bus. If it isn't, now set it to nonfunctional. 185 // This isn't done earlier so that code watching for 186 // nonfunctional tach sensors doesn't take actions before 187 // those sensors show up on D-Bus. 188 sensor->updateTachAndTarget(); 189 tachChanged(*sensor); 190 } 191 catch (const util::DBusServiceError& e) 192 { 193 // The tach property still isn't on D-Bus. Ensure 194 // sensor is nonfunctional, but skip creating an 195 // error for it since it isn't a fan problem. 196 getLogger().log(std::format( 197 "Monitoring starting but {} sensor value not on D-Bus", 198 sensor->name())); 199 200 sensor->setFunctional(false, true); 201 202 if (_numSensorFailsForNonFunc) 203 { 204 if (_functional && 205 (countNonFunctionalSensors() >= _numSensorFailsForNonFunc)) 206 { 207 updateInventory(false); 208 } 209 } 210 211 // At this point, don't start any power off actions due 212 // to missing sensors. Let something else handle that 213 // policy. 214 _system.fanStatusChange(*this, true); 215 } 216 }); 217 } 218 219 void Fan::tachChanged() 220 { 221 if (_monitorReady) 222 { 223 for (auto& s : _sensors) 224 { 225 tachChanged(*s); 226 } 227 } 228 } 229 230 void Fan::tachChanged(TachSensor& sensor) 231 { 232 if (!_system.isPowerOn() || !_monitorReady) 233 { 234 return; 235 } 236 237 if (_trustManager->active()) 238 { 239 if (!_trustManager->checkTrust(sensor)) 240 { 241 return; 242 } 243 } 244 245 // If the error checking method is 'count', if a tach change leads 246 // to an out of range sensor the count timer will take over in calling 247 // process() until the sensor is healthy again. 248 if (!sensor.countTimerRunning()) 249 { 250 process(sensor); 251 } 252 } 253 254 void Fan::countTimerExpired(TachSensor& sensor) 255 { 256 if (_trustManager->active() && !_trustManager->checkTrust(sensor)) 257 { 258 return; 259 } 260 process(sensor); 261 } 262 263 void Fan::process(TachSensor& sensor) 264 { 265 // If this sensor is out of range at this moment, start 266 // its timer, at the end of which the inventory 267 // for the fan may get updated to not functional. 268 269 // If this sensor is OK, put everything back into a good state. 270 271 if (outOfRange(sensor)) 272 { 273 if (sensor.functional()) 274 { 275 switch (sensor.getMethod()) 276 { 277 case MethodMode::timebased: 278 // Start nonfunctional timer if not already running 279 sensor.startTimer(TimerMode::nonfunc); 280 break; 281 case MethodMode::count: 282 283 if (!sensor.countTimerRunning()) 284 { 285 sensor.startCountTimer(); 286 } 287 sensor.setCounter(true); 288 if (sensor.getCounter() >= sensor.getThreshold()) 289 { 290 updateState(sensor); 291 } 292 break; 293 } 294 } 295 } 296 else 297 { 298 switch (sensor.getMethod()) 299 { 300 case MethodMode::timebased: 301 if (sensor.functional()) 302 { 303 if (sensor.timerRunning()) 304 { 305 sensor.stopTimer(); 306 } 307 } 308 else 309 { 310 // Start functional timer if not already running 311 sensor.startTimer(TimerMode::func); 312 } 313 break; 314 case MethodMode::count: 315 sensor.setCounter(false); 316 if (sensor.getCounter() == 0) 317 { 318 if (!sensor.functional()) 319 { 320 updateState(sensor); 321 } 322 323 sensor.stopCountTimer(); 324 } 325 break; 326 } 327 } 328 } 329 330 uint64_t Fan::findTargetSpeed() 331 { 332 uint64_t target = 0; 333 // The sensor doesn't support a target, 334 // so get it from another sensor. 335 auto s = std::find_if(_sensors.begin(), _sensors.end(), 336 [](const auto& s) { return s->hasTarget(); }); 337 338 if (s != _sensors.end()) 339 { 340 target = (*s)->getTarget(); 341 } 342 343 return target; 344 } 345 346 size_t Fan::countNonFunctionalSensors() const 347 { 348 return std::count_if(_sensors.begin(), _sensors.end(), 349 [](const auto& s) { return !s->functional(); }); 350 } 351 352 bool Fan::outOfRange(const TachSensor& sensor) 353 { 354 if (!sensor.hasOwner()) 355 { 356 return true; 357 } 358 359 auto actual = static_cast<uint64_t>(sensor.getInput()); 360 auto range = sensor.getRange(_deviation, _upperDeviation); 361 362 return ((actual < range.first) || 363 (range.second && actual > range.second.value())); 364 } 365 366 void Fan::updateState(TachSensor& sensor) 367 { 368 if (!_system.isPowerOn()) 369 { 370 return; 371 } 372 373 auto range = sensor.getRange(_deviation, _upperDeviation); 374 std::string rangeMax = "NoMax"; 375 if (range.second) 376 { 377 rangeMax = std::to_string(range.second.value()); 378 } 379 380 // Skip starting the error timer if the sensor 381 // isn't on D-Bus as this isn't a fan hardware problem. 382 sensor.setFunctional(!sensor.functional(), !sensor.hasOwner()); 383 384 getLogger().log(std::format( 385 "Setting tach sensor {} functional state to {}. " 386 "[target = {}, actual = {}, allowed range = ({} - {}) " 387 "owned = {}]", 388 sensor.name(), sensor.functional(), sensor.getTarget(), 389 sensor.getInput(), range.first, rangeMax, sensor.hasOwner())); 390 391 // A zero value for _numSensorFailsForNonFunc means we aren't dealing 392 // with fan FRU functional status, only sensor functional status. 393 if (_numSensorFailsForNonFunc) 394 { 395 auto numNonFuncSensors = countNonFunctionalSensors(); 396 // If the fan was nonfunctional and enough sensors are now OK, 397 // the fan can be set to functional as long as `set_func_on_present` was 398 // not set 399 if (!_setFuncOnPresent && !_functional && 400 !(numNonFuncSensors >= _numSensorFailsForNonFunc)) 401 { 402 getLogger().log(std::format("Setting fan {} to functional, number " 403 "of nonfunctional sensors = {}", 404 _name, numNonFuncSensors)); 405 updateInventory(true); 406 } 407 408 // If the fan is currently functional, but too many 409 // contained sensors are now nonfunctional, update 410 // the fan to nonfunctional. 411 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc)) 412 { 413 getLogger().log(std::format("Setting fan {} to nonfunctional, " 414 "number of nonfunctional sensors = {}", 415 _name, numNonFuncSensors)); 416 updateInventory(false); 417 } 418 } 419 420 // Skip the power off rule checks if the sensor isn't 421 // on D-Bus so a running system isn't shutdown. 422 _system.fanStatusChange(*this, !sensor.hasOwner()); 423 } 424 425 bool Fan::updateInventory(bool functional) 426 { 427 bool dbusError = false; 428 429 try 430 { 431 auto objectMap = 432 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF, 433 util::FUNCTIONAL_PROPERTY, functional); 434 435 auto response = util::SDBusPlus::callMethod( 436 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH, 437 util::INVENTORY_INTF, "Notify", objectMap); 438 439 if (response.is_method_error()) 440 { 441 log<level::ERR>("Error in Notify call to update inventory"); 442 443 dbusError = true; 444 } 445 } 446 catch (const util::DBusError& e) 447 { 448 dbusError = true; 449 450 getLogger().log( 451 std::format("D-Bus Exception reading/updating inventory : {}", 452 e.what()), 453 Logger::error); 454 } 455 456 // This will always track the current state of the inventory. 457 _functional = functional; 458 459 return dbusError; 460 } 461 462 void Fan::presenceChanged(sdbusplus::message_t& msg) 463 { 464 std::string interface; 465 std::map<std::string, std::variant<bool>> properties; 466 467 msg.read(interface, properties); 468 469 auto presentProp = properties.find("Present"); 470 if (presentProp != properties.end()) 471 { 472 _present = std::get<bool>(presentProp->second); 473 474 getLogger().log( 475 std::format("Fan {} presence state change to {}", _name, _present)); 476 477 if (_present && _setFuncOnPresent) 478 { 479 updateInventory(true); 480 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 481 sensor->setFunctional(true); 482 sensor->resetMethod(); 483 }); 484 } 485 486 _system.fanStatusChange(*this); 487 488 if (_fanMissingErrorDelay) 489 { 490 if (!_present && _system.isPowerOn()) 491 { 492 _fanMissingErrorTimer->restartOnce( 493 std::chrono::seconds{*_fanMissingErrorDelay}); 494 } 495 else if (_present && _fanMissingErrorTimer->isEnabled()) 496 { 497 _fanMissingErrorTimer->setEnabled(false); 498 } 499 } 500 } 501 } 502 503 void Fan::sensorErrorTimerExpired(const TachSensor& sensor) 504 { 505 if (_present && _system.isPowerOn()) 506 { 507 _system.sensorErrorTimerExpired(*this, sensor); 508 } 509 } 510 511 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn) 512 { 513 #ifdef MONITOR_USE_JSON 514 if (powerStateOn) 515 { 516 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 517 518 _numSensorsOnDBusAtPowerOn = 0; 519 520 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 521 try 522 { 523 // Force a getProperty call. If sensor is on D-Bus, 524 // then make sure it's functional. 525 sensor->updateTachAndTarget(); 526 527 _numSensorsOnDBusAtPowerOn++; 528 529 if (_present) 530 { 531 // If not functional, set it back to functional. 532 if (!sensor->functional()) 533 { 534 sensor->setFunctional(true); 535 _system.fanStatusChange(*this, true); 536 } 537 538 // Set the counters back to zero 539 if (sensor->getMethod() == MethodMode::count) 540 { 541 sensor->resetMethod(); 542 } 543 } 544 } 545 catch (const util::DBusError& e) 546 { 547 // Properties still aren't on D-Bus. Let startMonitor() 548 // deal with it, or maybe System::powerStateChanged() if 549 // there aren't any sensors at all on D-Bus. 550 getLogger().log(std::format( 551 "At power on, tach sensor {} value not on D-Bus", 552 sensor->name())); 553 } 554 }); 555 556 if (_present) 557 { 558 // If configured to change functional state on the fan itself, 559 // Set it back to true now if necessary. 560 if (_numSensorFailsForNonFunc) 561 { 562 if (!_functional && 563 (countNonFunctionalSensors() < _numSensorFailsForNonFunc)) 564 { 565 updateInventory(true); 566 } 567 } 568 } 569 else 570 { 571 getLogger().log( 572 std::format("At power on, fan {} is missing", _name)); 573 574 if (_fanMissingErrorTimer) 575 { 576 _fanMissingErrorTimer->restartOnce( 577 std::chrono::seconds{*_fanMissingErrorDelay}); 578 } 579 } 580 } 581 else 582 { 583 _monitorReady = false; 584 585 if (_monitorTimer.isEnabled()) 586 { 587 _monitorTimer.setEnabled(false); 588 } 589 590 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled()) 591 { 592 _fanMissingErrorTimer->setEnabled(false); 593 } 594 595 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 596 if (sensor->timerRunning()) 597 { 598 sensor->stopTimer(); 599 } 600 601 sensor->stopCountTimer(); 602 }); 603 } 604 #endif 605 } 606 607 } // namespace monitor 608 } // namespace fan 609 } // namespace phosphor 610