1 /** 2 * Copyright © 2022 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "fan.hpp" 17 18 #include "logging.hpp" 19 #include "sdbusplus.hpp" 20 #include "system.hpp" 21 #include "types.hpp" 22 #include "utility.hpp" 23 24 #include <fmt/format.h> 25 26 #include <phosphor-logging/log.hpp> 27 28 namespace phosphor 29 { 30 namespace fan 31 { 32 namespace monitor 33 { 34 35 using namespace phosphor::logging; 36 using namespace sdbusplus::bus::match; 37 38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event, 39 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def, 40 System& system) : 41 _bus(bus), 42 _name(def.name), _deviation(def.deviation), 43 _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc), 44 _trustManager(trust), 45 #ifdef MONITOR_USE_JSON 46 _monitorDelay(def.monitorStartDelay), 47 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)), 48 #endif 49 _system(system), 50 _presenceMatch(bus, 51 rules::propertiesChanged(util::INVENTORY_PATH + _name, 52 util::INV_ITEM_IFACE), 53 std::bind(std::mem_fn(&Fan::presenceChanged), this, 54 std::placeholders::_1)), 55 _presenceIfaceAddedMatch( 56 bus, 57 rules::interfacesAdded() + 58 rules::argNpath(0, util::INVENTORY_PATH + _name), 59 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this, 60 std::placeholders::_1)), 61 _fanMissingErrorDelay(def.fanMissingErrDelay), 62 _setFuncOnPresent(def.funcOnPresent) 63 { 64 // Setup tach sensors for monitoring 65 for (const auto& s : def.sensorList) 66 { 67 _sensors.emplace_back(std::make_shared<TachSensor>( 68 mode, bus, *this, s.name, s.hasTarget, def.funcDelay, 69 s.targetInterface, s.targetPath, s.factor, s.offset, def.method, 70 s.threshold, s.ignoreAboveMax, def.timeout, 71 def.nonfuncRotorErrDelay, def.countInterval, event)); 72 73 _trustManager->registerSensor(_sensors.back()); 74 } 75 76 bool functionalState = 77 (_numSensorFailsForNonFunc == 0) || 78 (countNonFunctionalSensors() < _numSensorFailsForNonFunc); 79 80 if (updateInventory(functionalState) && !functionalState) 81 { 82 // the inventory update threw an exception, possibly because D-Bus 83 // wasn't ready. Try to update sensors back to functional to avoid a 84 // false-alarm. They will be updated again from subscribing to the 85 // properties-changed event 86 87 for (auto& sensor : _sensors) 88 sensor->setFunctional(true); 89 } 90 91 #ifndef MONITOR_USE_JSON 92 // Check current tach state when entering monitor mode 93 if (mode != Mode::init) 94 { 95 _monitorReady = true; 96 97 // The TachSensors will now have already read the input 98 // and target values, so check them. 99 tachChanged(); 100 } 101 #else 102 if (_system.isPowerOn()) 103 { 104 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 105 } 106 #endif 107 108 if (_fanMissingErrorDelay) 109 { 110 _fanMissingErrorTimer = std::make_unique< 111 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 112 event, std::bind(&System::fanMissingErrorTimerExpired, &system, 113 std::ref(*this))); 114 } 115 116 try 117 { 118 _present = util::SDBusPlus::getProperty<bool>( 119 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present"); 120 121 if (!_present) 122 { 123 getLogger().log( 124 fmt::format("On startup, fan {} is missing", _name)); 125 if (_system.isPowerOn() && _fanMissingErrorTimer) 126 { 127 _fanMissingErrorTimer->restartOnce( 128 std::chrono::seconds{*_fanMissingErrorDelay}); 129 } 130 } 131 } 132 catch (const util::DBusServiceError& e) 133 { 134 // This could happen on the first BMC boot if the presence 135 // detect app hasn't started yet and there isn't an inventory 136 // cache yet. 137 } 138 } 139 140 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg) 141 { 142 sdbusplus::message::object_path path; 143 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 144 145 msg.read(path, interfaces); 146 147 auto properties = interfaces.find(util::INV_ITEM_IFACE); 148 if (properties == interfaces.end()) 149 { 150 return; 151 } 152 153 auto property = properties->second.find("Present"); 154 if (property == properties->second.end()) 155 { 156 return; 157 } 158 159 _present = std::get<bool>(property->second); 160 161 if (!_present) 162 { 163 getLogger().log(fmt::format( 164 "New fan {} interface added and fan is not present", _name)); 165 if (_system.isPowerOn() && _fanMissingErrorTimer) 166 { 167 _fanMissingErrorTimer->restartOnce( 168 std::chrono::seconds{*_fanMissingErrorDelay}); 169 } 170 } 171 172 _system.fanStatusChange(*this); 173 } 174 175 void Fan::startMonitor() 176 { 177 _monitorReady = true; 178 179 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 180 if (_present) 181 { 182 try 183 { 184 // Force a getProperty call to check if the tach sensor is 185 // on D-Bus. If it isn't, now set it to nonfunctional. 186 // This isn't done earlier so that code watching for 187 // nonfunctional tach sensors doesn't take actions before 188 // those sensors show up on D-Bus. 189 sensor->updateTachAndTarget(); 190 tachChanged(*sensor); 191 } 192 catch (const util::DBusServiceError& e) 193 { 194 // The tach property still isn't on D-Bus. Ensure 195 // sensor is nonfunctional, but skip creating an 196 // error for it since it isn't a fan problem. 197 getLogger().log(fmt::format( 198 "Monitoring starting but {} sensor value not on D-Bus", 199 sensor->name())); 200 201 sensor->setFunctional(false, true); 202 203 if (_numSensorFailsForNonFunc) 204 { 205 if (_functional && (countNonFunctionalSensors() >= 206 _numSensorFailsForNonFunc)) 207 { 208 updateInventory(false); 209 } 210 } 211 212 // At this point, don't start any power off actions due 213 // to missing sensors. Let something else handle that 214 // policy. 215 _system.fanStatusChange(*this, true); 216 } 217 } 218 }); 219 } 220 221 void Fan::tachChanged() 222 { 223 if (_monitorReady) 224 { 225 for (auto& s : _sensors) 226 { 227 tachChanged(*s); 228 } 229 } 230 } 231 232 void Fan::tachChanged(TachSensor& sensor) 233 { 234 if (!_system.isPowerOn() || !_monitorReady) 235 { 236 return; 237 } 238 239 if (_trustManager->active()) 240 { 241 if (!_trustManager->checkTrust(sensor)) 242 { 243 return; 244 } 245 } 246 247 // If the error checking method is 'count', if a tach change leads 248 // to an out of range sensor the count timer will take over in calling 249 // process() until the sensor is healthy again. 250 if (!sensor.countTimerRunning()) 251 { 252 process(sensor); 253 } 254 } 255 256 void Fan::countTimerExpired(TachSensor& sensor) 257 { 258 if (_trustManager->active() && !_trustManager->checkTrust(sensor)) 259 { 260 return; 261 } 262 process(sensor); 263 } 264 265 void Fan::process(TachSensor& sensor) 266 { 267 // If this sensor is out of range at this moment, start 268 // its timer, at the end of which the inventory 269 // for the fan may get updated to not functional. 270 271 // If this sensor is OK, put everything back into a good state. 272 273 if (outOfRange(sensor)) 274 { 275 if (sensor.functional()) 276 { 277 switch (sensor.getMethod()) 278 { 279 case MethodMode::timebased: 280 // Start nonfunctional timer if not already running 281 sensor.startTimer(TimerMode::nonfunc); 282 break; 283 case MethodMode::count: 284 285 if (!sensor.countTimerRunning()) 286 { 287 sensor.startCountTimer(); 288 } 289 sensor.setCounter(true); 290 if (sensor.getCounter() >= sensor.getThreshold()) 291 { 292 updateState(sensor); 293 } 294 break; 295 } 296 } 297 } 298 else 299 { 300 switch (sensor.getMethod()) 301 { 302 case MethodMode::timebased: 303 if (sensor.functional()) 304 { 305 if (sensor.timerRunning()) 306 { 307 sensor.stopTimer(); 308 } 309 } 310 else 311 { 312 // Start functional timer if not already running 313 sensor.startTimer(TimerMode::func); 314 } 315 break; 316 case MethodMode::count: 317 sensor.setCounter(false); 318 if (sensor.getCounter() == 0) 319 { 320 if (!sensor.functional()) 321 { 322 updateState(sensor); 323 } 324 325 sensor.stopCountTimer(); 326 } 327 break; 328 } 329 } 330 } 331 332 uint64_t Fan::findTargetSpeed() 333 { 334 uint64_t target = 0; 335 // The sensor doesn't support a target, 336 // so get it from another sensor. 337 auto s = std::find_if(_sensors.begin(), _sensors.end(), 338 [](const auto& s) { return s->hasTarget(); }); 339 340 if (s != _sensors.end()) 341 { 342 target = (*s)->getTarget(); 343 } 344 345 return target; 346 } 347 348 size_t Fan::countNonFunctionalSensors() const 349 { 350 return std::count_if(_sensors.begin(), _sensors.end(), 351 [](const auto& s) { return !s->functional(); }); 352 } 353 354 bool Fan::outOfRange(const TachSensor& sensor) 355 { 356 if (!sensor.hasOwner()) 357 { 358 return true; 359 } 360 361 auto actual = static_cast<uint64_t>(sensor.getInput()); 362 auto range = sensor.getRange(_deviation); 363 364 return ((actual < range.first) || 365 (range.second && actual > range.second.value())); 366 } 367 368 void Fan::updateState(TachSensor& sensor) 369 { 370 if (!_system.isPowerOn()) 371 { 372 return; 373 } 374 375 auto range = sensor.getRange(_deviation); 376 std::string rangeMax = "NoMax"; 377 if (range.second) 378 { 379 rangeMax = std::to_string(range.second.value()); 380 } 381 382 // Skip starting the error timer if the sensor 383 // isn't on D-Bus as this isn't a fan hardware problem. 384 sensor.setFunctional(!sensor.functional(), !sensor.hasOwner()); 385 386 getLogger().log(fmt::format( 387 "Setting tach sensor {} functional state to {}. " 388 "[target = {}, actual = {}, allowed range = ({} - {}) " 389 "owned = {}]", 390 sensor.name(), sensor.functional(), sensor.getTarget(), 391 sensor.getInput(), range.first, rangeMax, sensor.hasOwner())); 392 393 // A zero value for _numSensorFailsForNonFunc means we aren't dealing 394 // with fan FRU functional status, only sensor functional status. 395 if (_numSensorFailsForNonFunc) 396 { 397 auto numNonFuncSensors = countNonFunctionalSensors(); 398 // If the fan was nonfunctional and enough sensors are now OK, 399 // the fan can be set to functional as long as `set_func_on_present` was 400 // not set 401 if (!_setFuncOnPresent && !_functional && 402 !(numNonFuncSensors >= _numSensorFailsForNonFunc)) 403 { 404 getLogger().log(fmt::format("Setting fan {} to functional, number " 405 "of nonfunctional sensors = {}", 406 _name, numNonFuncSensors)); 407 updateInventory(true); 408 } 409 410 // If the fan is currently functional, but too many 411 // contained sensors are now nonfunctional, update 412 // the fan to nonfunctional. 413 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc)) 414 { 415 getLogger().log(fmt::format("Setting fan {} to nonfunctional, " 416 "number of nonfunctional sensors = {}", 417 _name, numNonFuncSensors)); 418 updateInventory(false); 419 } 420 } 421 422 // Skip the power off rule checks if the sensor isn't 423 // on D-Bus so a running system isn't shutdown. 424 _system.fanStatusChange(*this, !sensor.hasOwner()); 425 } 426 427 bool Fan::updateInventory(bool functional) 428 { 429 bool dbusError = false; 430 431 try 432 { 433 auto objectMap = 434 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF, 435 util::FUNCTIONAL_PROPERTY, functional); 436 437 auto response = util::SDBusPlus::callMethod( 438 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH, 439 util::INVENTORY_INTF, "Notify", objectMap); 440 441 if (response.is_method_error()) 442 { 443 log<level::ERR>("Error in Notify call to update inventory"); 444 445 dbusError = true; 446 } 447 } 448 catch (const util::DBusError& e) 449 { 450 dbusError = true; 451 452 getLogger().log( 453 fmt::format("D-Bus Exception reading/updating inventory : {}", 454 e.what()), 455 Logger::error); 456 } 457 458 // This will always track the current state of the inventory. 459 _functional = functional; 460 461 return dbusError; 462 } 463 464 void Fan::presenceChanged(sdbusplus::message_t& msg) 465 { 466 std::string interface; 467 std::map<std::string, std::variant<bool>> properties; 468 469 msg.read(interface, properties); 470 471 auto presentProp = properties.find("Present"); 472 if (presentProp != properties.end()) 473 { 474 _present = std::get<bool>(presentProp->second); 475 476 getLogger().log( 477 fmt::format("Fan {} presence state change to {}", _name, _present)); 478 479 if (_present && _setFuncOnPresent) 480 { 481 updateInventory(true); 482 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 483 sensor->setFunctional(true); 484 sensor->resetMethod(); 485 }); 486 } 487 488 _system.fanStatusChange(*this); 489 490 if (_fanMissingErrorDelay) 491 { 492 if (!_present && _system.isPowerOn()) 493 { 494 _fanMissingErrorTimer->restartOnce( 495 std::chrono::seconds{*_fanMissingErrorDelay}); 496 } 497 else if (_present && _fanMissingErrorTimer->isEnabled()) 498 { 499 _fanMissingErrorTimer->setEnabled(false); 500 } 501 } 502 } 503 } 504 505 void Fan::sensorErrorTimerExpired(const TachSensor& sensor) 506 { 507 if (_present && _system.isPowerOn()) 508 { 509 _system.sensorErrorTimerExpired(*this, sensor); 510 } 511 } 512 513 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn) 514 { 515 #ifdef MONITOR_USE_JSON 516 if (powerStateOn) 517 { 518 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 519 520 _numSensorsOnDBusAtPowerOn = 0; 521 522 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 523 try 524 { 525 // Force a getProperty call. If sensor is on D-Bus, 526 // then make sure it's functional. 527 sensor->updateTachAndTarget(); 528 529 _numSensorsOnDBusAtPowerOn++; 530 531 if (_present) 532 { 533 // If not functional, set it back to functional. 534 if (!sensor->functional()) 535 { 536 sensor->setFunctional(true); 537 _system.fanStatusChange(*this, true); 538 } 539 540 // Set the counters back to zero 541 if (sensor->getMethod() == MethodMode::count) 542 { 543 sensor->resetMethod(); 544 } 545 } 546 } 547 catch (const util::DBusError& e) 548 { 549 // Properties still aren't on D-Bus. Let startMonitor() 550 // deal with it, or maybe System::powerStateChanged() if 551 // there aren't any sensors at all on D-Bus. 552 getLogger().log(fmt::format( 553 "At power on, tach sensor {} value not on D-Bus", 554 sensor->name())); 555 } 556 }); 557 558 if (_present) 559 { 560 // If configured to change functional state on the fan itself, 561 // Set it back to true now if necessary. 562 if (_numSensorFailsForNonFunc) 563 { 564 if (!_functional && 565 (countNonFunctionalSensors() < _numSensorFailsForNonFunc)) 566 { 567 updateInventory(true); 568 } 569 } 570 } 571 else 572 { 573 getLogger().log( 574 fmt::format("At power on, fan {} is missing", _name)); 575 576 if (_fanMissingErrorTimer) 577 { 578 _fanMissingErrorTimer->restartOnce( 579 std::chrono::seconds{*_fanMissingErrorDelay}); 580 } 581 } 582 } 583 else 584 { 585 _monitorReady = false; 586 587 if (_monitorTimer.isEnabled()) 588 { 589 _monitorTimer.setEnabled(false); 590 } 591 592 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled()) 593 { 594 _fanMissingErrorTimer->setEnabled(false); 595 } 596 597 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 598 if (sensor->timerRunning()) 599 { 600 sensor->stopTimer(); 601 } 602 603 sensor->stopCountTimer(); 604 }); 605 } 606 #endif 607 } 608 609 } // namespace monitor 610 } // namespace fan 611 } // namespace phosphor 612