1 /** 2 * Copyright © 2022 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "fan.hpp" 17 18 #include "logging.hpp" 19 #include "sdbusplus.hpp" 20 #include "system.hpp" 21 #include "types.hpp" 22 #include "utility.hpp" 23 24 #include <fmt/format.h> 25 26 #include <phosphor-logging/log.hpp> 27 28 namespace phosphor 29 { 30 namespace fan 31 { 32 namespace monitor 33 { 34 35 using namespace phosphor::logging; 36 using namespace sdbusplus::bus::match; 37 38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event, 39 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def, 40 System& system) : 41 _bus(bus), 42 _name(def.name), _deviation(def.deviation), 43 _upperDeviation(def.upperDeviation), 44 _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc), 45 _trustManager(trust), 46 #ifdef MONITOR_USE_JSON 47 _monitorDelay(def.monitorStartDelay), 48 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)), 49 #endif 50 _system(system), 51 _presenceMatch(bus, 52 rules::propertiesChanged(util::INVENTORY_PATH + _name, 53 util::INV_ITEM_IFACE), 54 std::bind(std::mem_fn(&Fan::presenceChanged), this, 55 std::placeholders::_1)), 56 _presenceIfaceAddedMatch( 57 bus, 58 rules::interfacesAdded() + 59 rules::argNpath(0, util::INVENTORY_PATH + _name), 60 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this, 61 std::placeholders::_1)), 62 _fanMissingErrorDelay(def.fanMissingErrDelay), 63 _setFuncOnPresent(def.funcOnPresent) 64 { 65 // Setup tach sensors for monitoring 66 for (const auto& s : def.sensorList) 67 { 68 _sensors.emplace_back(std::make_shared<TachSensor>( 69 mode, bus, *this, s.name, s.hasTarget, def.funcDelay, 70 s.targetInterface, s.targetPath, s.factor, s.offset, def.method, 71 s.threshold, s.ignoreAboveMax, def.timeout, 72 def.nonfuncRotorErrDelay, def.countInterval, event)); 73 74 _trustManager->registerSensor(_sensors.back()); 75 } 76 77 bool functionalState = 78 (_numSensorFailsForNonFunc == 0) || 79 (countNonFunctionalSensors() < _numSensorFailsForNonFunc); 80 81 if (updateInventory(functionalState) && !functionalState) 82 { 83 // the inventory update threw an exception, possibly because D-Bus 84 // wasn't ready. Try to update sensors back to functional to avoid a 85 // false-alarm. They will be updated again from subscribing to the 86 // properties-changed event 87 88 for (auto& sensor : _sensors) 89 sensor->setFunctional(true); 90 } 91 92 #ifndef MONITOR_USE_JSON 93 // Check current tach state when entering monitor mode 94 if (mode != Mode::init) 95 { 96 _monitorReady = true; 97 98 // The TachSensors will now have already read the input 99 // and target values, so check them. 100 tachChanged(); 101 } 102 #else 103 if (_system.isPowerOn()) 104 { 105 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 106 } 107 #endif 108 109 if (_fanMissingErrorDelay) 110 { 111 _fanMissingErrorTimer = std::make_unique< 112 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 113 event, std::bind(&System::fanMissingErrorTimerExpired, &system, 114 std::ref(*this))); 115 } 116 117 try 118 { 119 _present = util::SDBusPlus::getProperty<bool>( 120 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present"); 121 122 if (!_present) 123 { 124 getLogger().log( 125 fmt::format("On startup, fan {} is missing", _name)); 126 if (_system.isPowerOn() && _fanMissingErrorTimer) 127 { 128 _fanMissingErrorTimer->restartOnce( 129 std::chrono::seconds{*_fanMissingErrorDelay}); 130 } 131 } 132 } 133 catch (const util::DBusServiceError& e) 134 { 135 // This could happen on the first BMC boot if the presence 136 // detect app hasn't started yet and there isn't an inventory 137 // cache yet. 138 } 139 } 140 141 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg) 142 { 143 sdbusplus::message::object_path path; 144 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 145 146 msg.read(path, interfaces); 147 148 auto properties = interfaces.find(util::INV_ITEM_IFACE); 149 if (properties == interfaces.end()) 150 { 151 return; 152 } 153 154 auto property = properties->second.find("Present"); 155 if (property == properties->second.end()) 156 { 157 return; 158 } 159 160 _present = std::get<bool>(property->second); 161 162 if (!_present) 163 { 164 getLogger().log(fmt::format( 165 "New fan {} interface added and fan is not present", _name)); 166 if (_system.isPowerOn() && _fanMissingErrorTimer) 167 { 168 _fanMissingErrorTimer->restartOnce( 169 std::chrono::seconds{*_fanMissingErrorDelay}); 170 } 171 } 172 173 _system.fanStatusChange(*this); 174 } 175 176 void Fan::startMonitor() 177 { 178 _monitorReady = true; 179 180 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 181 if (_present) 182 { 183 try 184 { 185 // Force a getProperty call to check if the tach sensor is 186 // on D-Bus. If it isn't, now set it to nonfunctional. 187 // This isn't done earlier so that code watching for 188 // nonfunctional tach sensors doesn't take actions before 189 // those sensors show up on D-Bus. 190 sensor->updateTachAndTarget(); 191 tachChanged(*sensor); 192 } 193 catch (const util::DBusServiceError& e) 194 { 195 // The tach property still isn't on D-Bus. Ensure 196 // sensor is nonfunctional, but skip creating an 197 // error for it since it isn't a fan problem. 198 getLogger().log(fmt::format( 199 "Monitoring starting but {} sensor value not on D-Bus", 200 sensor->name())); 201 202 sensor->setFunctional(false, true); 203 204 if (_numSensorFailsForNonFunc) 205 { 206 if (_functional && (countNonFunctionalSensors() >= 207 _numSensorFailsForNonFunc)) 208 { 209 updateInventory(false); 210 } 211 } 212 213 // At this point, don't start any power off actions due 214 // to missing sensors. Let something else handle that 215 // policy. 216 _system.fanStatusChange(*this, true); 217 } 218 } 219 }); 220 } 221 222 void Fan::tachChanged() 223 { 224 if (_monitorReady) 225 { 226 for (auto& s : _sensors) 227 { 228 tachChanged(*s); 229 } 230 } 231 } 232 233 void Fan::tachChanged(TachSensor& sensor) 234 { 235 if (!_system.isPowerOn() || !_monitorReady) 236 { 237 return; 238 } 239 240 if (_trustManager->active()) 241 { 242 if (!_trustManager->checkTrust(sensor)) 243 { 244 return; 245 } 246 } 247 248 // If the error checking method is 'count', if a tach change leads 249 // to an out of range sensor the count timer will take over in calling 250 // process() until the sensor is healthy again. 251 if (!sensor.countTimerRunning()) 252 { 253 process(sensor); 254 } 255 } 256 257 void Fan::countTimerExpired(TachSensor& sensor) 258 { 259 if (_trustManager->active() && !_trustManager->checkTrust(sensor)) 260 { 261 return; 262 } 263 process(sensor); 264 } 265 266 void Fan::process(TachSensor& sensor) 267 { 268 // If this sensor is out of range at this moment, start 269 // its timer, at the end of which the inventory 270 // for the fan may get updated to not functional. 271 272 // If this sensor is OK, put everything back into a good state. 273 274 if (outOfRange(sensor)) 275 { 276 if (sensor.functional()) 277 { 278 switch (sensor.getMethod()) 279 { 280 case MethodMode::timebased: 281 // Start nonfunctional timer if not already running 282 sensor.startTimer(TimerMode::nonfunc); 283 break; 284 case MethodMode::count: 285 286 if (!sensor.countTimerRunning()) 287 { 288 sensor.startCountTimer(); 289 } 290 sensor.setCounter(true); 291 if (sensor.getCounter() >= sensor.getThreshold()) 292 { 293 updateState(sensor); 294 } 295 break; 296 } 297 } 298 } 299 else 300 { 301 switch (sensor.getMethod()) 302 { 303 case MethodMode::timebased: 304 if (sensor.functional()) 305 { 306 if (sensor.timerRunning()) 307 { 308 sensor.stopTimer(); 309 } 310 } 311 else 312 { 313 // Start functional timer if not already running 314 sensor.startTimer(TimerMode::func); 315 } 316 break; 317 case MethodMode::count: 318 sensor.setCounter(false); 319 if (sensor.getCounter() == 0) 320 { 321 if (!sensor.functional()) 322 { 323 updateState(sensor); 324 } 325 326 sensor.stopCountTimer(); 327 } 328 break; 329 } 330 } 331 } 332 333 uint64_t Fan::findTargetSpeed() 334 { 335 uint64_t target = 0; 336 // The sensor doesn't support a target, 337 // so get it from another sensor. 338 auto s = std::find_if(_sensors.begin(), _sensors.end(), 339 [](const auto& s) { return s->hasTarget(); }); 340 341 if (s != _sensors.end()) 342 { 343 target = (*s)->getTarget(); 344 } 345 346 return target; 347 } 348 349 size_t Fan::countNonFunctionalSensors() const 350 { 351 return std::count_if(_sensors.begin(), _sensors.end(), 352 [](const auto& s) { return !s->functional(); }); 353 } 354 355 bool Fan::outOfRange(const TachSensor& sensor) 356 { 357 if (!sensor.hasOwner()) 358 { 359 return true; 360 } 361 362 auto actual = static_cast<uint64_t>(sensor.getInput()); 363 auto range = sensor.getRange(_deviation, _upperDeviation); 364 365 return ((actual < range.first) || 366 (range.second && actual > range.second.value())); 367 } 368 369 void Fan::updateState(TachSensor& sensor) 370 { 371 if (!_system.isPowerOn()) 372 { 373 return; 374 } 375 376 auto range = sensor.getRange(_deviation, _upperDeviation); 377 std::string rangeMax = "NoMax"; 378 if (range.second) 379 { 380 rangeMax = std::to_string(range.second.value()); 381 } 382 383 // Skip starting the error timer if the sensor 384 // isn't on D-Bus as this isn't a fan hardware problem. 385 sensor.setFunctional(!sensor.functional(), !sensor.hasOwner()); 386 387 getLogger().log(fmt::format( 388 "Setting tach sensor {} functional state to {}. " 389 "[target = {}, actual = {}, allowed range = ({} - {}) " 390 "owned = {}]", 391 sensor.name(), sensor.functional(), sensor.getTarget(), 392 sensor.getInput(), range.first, rangeMax, sensor.hasOwner())); 393 394 // A zero value for _numSensorFailsForNonFunc means we aren't dealing 395 // with fan FRU functional status, only sensor functional status. 396 if (_numSensorFailsForNonFunc) 397 { 398 auto numNonFuncSensors = countNonFunctionalSensors(); 399 // If the fan was nonfunctional and enough sensors are now OK, 400 // the fan can be set to functional as long as `set_func_on_present` was 401 // not set 402 if (!_setFuncOnPresent && !_functional && 403 !(numNonFuncSensors >= _numSensorFailsForNonFunc)) 404 { 405 getLogger().log(fmt::format("Setting fan {} to functional, number " 406 "of nonfunctional sensors = {}", 407 _name, numNonFuncSensors)); 408 updateInventory(true); 409 } 410 411 // If the fan is currently functional, but too many 412 // contained sensors are now nonfunctional, update 413 // the fan to nonfunctional. 414 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc)) 415 { 416 getLogger().log(fmt::format("Setting fan {} to nonfunctional, " 417 "number of nonfunctional sensors = {}", 418 _name, numNonFuncSensors)); 419 updateInventory(false); 420 } 421 } 422 423 // Skip the power off rule checks if the sensor isn't 424 // on D-Bus so a running system isn't shutdown. 425 _system.fanStatusChange(*this, !sensor.hasOwner()); 426 } 427 428 bool Fan::updateInventory(bool functional) 429 { 430 bool dbusError = false; 431 432 try 433 { 434 auto objectMap = 435 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF, 436 util::FUNCTIONAL_PROPERTY, functional); 437 438 auto response = util::SDBusPlus::callMethod( 439 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH, 440 util::INVENTORY_INTF, "Notify", objectMap); 441 442 if (response.is_method_error()) 443 { 444 log<level::ERR>("Error in Notify call to update inventory"); 445 446 dbusError = true; 447 } 448 } 449 catch (const util::DBusError& e) 450 { 451 dbusError = true; 452 453 getLogger().log( 454 fmt::format("D-Bus Exception reading/updating inventory : {}", 455 e.what()), 456 Logger::error); 457 } 458 459 // This will always track the current state of the inventory. 460 _functional = functional; 461 462 return dbusError; 463 } 464 465 void Fan::presenceChanged(sdbusplus::message_t& msg) 466 { 467 std::string interface; 468 std::map<std::string, std::variant<bool>> properties; 469 470 msg.read(interface, properties); 471 472 auto presentProp = properties.find("Present"); 473 if (presentProp != properties.end()) 474 { 475 _present = std::get<bool>(presentProp->second); 476 477 getLogger().log( 478 fmt::format("Fan {} presence state change to {}", _name, _present)); 479 480 if (_present && _setFuncOnPresent) 481 { 482 updateInventory(true); 483 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 484 sensor->setFunctional(true); 485 sensor->resetMethod(); 486 }); 487 } 488 489 _system.fanStatusChange(*this); 490 491 if (_fanMissingErrorDelay) 492 { 493 if (!_present && _system.isPowerOn()) 494 { 495 _fanMissingErrorTimer->restartOnce( 496 std::chrono::seconds{*_fanMissingErrorDelay}); 497 } 498 else if (_present && _fanMissingErrorTimer->isEnabled()) 499 { 500 _fanMissingErrorTimer->setEnabled(false); 501 } 502 } 503 } 504 } 505 506 void Fan::sensorErrorTimerExpired(const TachSensor& sensor) 507 { 508 if (_present && _system.isPowerOn()) 509 { 510 _system.sensorErrorTimerExpired(*this, sensor); 511 } 512 } 513 514 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn) 515 { 516 #ifdef MONITOR_USE_JSON 517 if (powerStateOn) 518 { 519 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 520 521 _numSensorsOnDBusAtPowerOn = 0; 522 523 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 524 try 525 { 526 // Force a getProperty call. If sensor is on D-Bus, 527 // then make sure it's functional. 528 sensor->updateTachAndTarget(); 529 530 _numSensorsOnDBusAtPowerOn++; 531 532 if (_present) 533 { 534 // If not functional, set it back to functional. 535 if (!sensor->functional()) 536 { 537 sensor->setFunctional(true); 538 _system.fanStatusChange(*this, true); 539 } 540 541 // Set the counters back to zero 542 if (sensor->getMethod() == MethodMode::count) 543 { 544 sensor->resetMethod(); 545 } 546 } 547 } 548 catch (const util::DBusError& e) 549 { 550 // Properties still aren't on D-Bus. Let startMonitor() 551 // deal with it, or maybe System::powerStateChanged() if 552 // there aren't any sensors at all on D-Bus. 553 getLogger().log(fmt::format( 554 "At power on, tach sensor {} value not on D-Bus", 555 sensor->name())); 556 } 557 }); 558 559 if (_present) 560 { 561 // If configured to change functional state on the fan itself, 562 // Set it back to true now if necessary. 563 if (_numSensorFailsForNonFunc) 564 { 565 if (!_functional && 566 (countNonFunctionalSensors() < _numSensorFailsForNonFunc)) 567 { 568 updateInventory(true); 569 } 570 } 571 } 572 else 573 { 574 getLogger().log( 575 fmt::format("At power on, fan {} is missing", _name)); 576 577 if (_fanMissingErrorTimer) 578 { 579 _fanMissingErrorTimer->restartOnce( 580 std::chrono::seconds{*_fanMissingErrorDelay}); 581 } 582 } 583 } 584 else 585 { 586 _monitorReady = false; 587 588 if (_monitorTimer.isEnabled()) 589 { 590 _monitorTimer.setEnabled(false); 591 } 592 593 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled()) 594 { 595 _fanMissingErrorTimer->setEnabled(false); 596 } 597 598 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 599 if (sensor->timerRunning()) 600 { 601 sensor->stopTimer(); 602 } 603 604 sensor->stopCountTimer(); 605 }); 606 } 607 #endif 608 } 609 610 } // namespace monitor 611 } // namespace fan 612 } // namespace phosphor 613