1 /** 2 * Copyright © 2017 IBM Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #include "fan.hpp" 17 18 #include "logging.hpp" 19 #include "sdbusplus.hpp" 20 #include "system.hpp" 21 #include "types.hpp" 22 #include "utility.hpp" 23 24 #include <fmt/format.h> 25 26 #include <phosphor-logging/log.hpp> 27 28 #include <algorithm> 29 30 namespace phosphor 31 { 32 namespace fan 33 { 34 namespace monitor 35 { 36 37 using namespace phosphor::logging; 38 using namespace sdbusplus::bus::match; 39 40 Fan::Fan(Mode mode, sdbusplus::bus::bus& bus, const sdeventplus::Event& event, 41 std::unique_ptr<trust::Manager>& trust, const FanDefinition& def, 42 System& system) : 43 _bus(bus), 44 _name(std::get<fanNameField>(def)), 45 _deviation(std::get<fanDeviationField>(def)), 46 _numSensorFailsForNonFunc(std::get<numSensorFailsForNonfuncField>(def)), 47 _trustManager(trust), 48 #ifdef MONITOR_USE_JSON 49 _monitorDelay(std::get<monitorStartDelayField>(def)), 50 _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)), 51 #endif 52 _system(system), 53 _presenceMatch(bus, 54 rules::propertiesChanged(util::INVENTORY_PATH + _name, 55 util::INV_ITEM_IFACE), 56 std::bind(std::mem_fn(&Fan::presenceChanged), this, 57 std::placeholders::_1)), 58 _presenceIfaceAddedMatch( 59 bus, 60 rules::interfacesAdded() + 61 rules::argNpath(0, util::INVENTORY_PATH + _name), 62 std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this, 63 std::placeholders::_1)), 64 _fanMissingErrorDelay(std::get<fanMissingErrDelayField>(def)), 65 _setFuncOnPresent(std::get<funcOnPresentField>(def)) 66 { 67 // Setup tach sensors for monitoring 68 auto& sensors = std::get<sensorListField>(def); 69 for (auto& s : sensors) 70 { 71 _sensors.emplace_back(std::make_shared<TachSensor>( 72 mode, bus, *this, std::get<sensorNameField>(s), 73 std::get<hasTargetField>(s), std::get<funcDelay>(def), 74 std::get<targetInterfaceField>(s), std::get<factorField>(s), 75 std::get<offsetField>(s), std::get<methodField>(def), 76 std::get<thresholdField>(s), std::get<ignoreAboveMaxField>(s), 77 std::get<timeoutField>(def), 78 std::get<nonfuncRotorErrDelayField>(def), 79 std::get<countIntervalField>(def), event)); 80 81 _trustManager->registerSensor(_sensors.back()); 82 } 83 84 bool functionalState = 85 (_numSensorFailsForNonFunc == 0) || 86 (countNonFunctionalSensors() < _numSensorFailsForNonFunc); 87 88 if (updateInventory(functionalState) && !functionalState) 89 { 90 // the inventory update threw an exception, possibly because D-Bus 91 // wasn't ready. Try to update sensors back to functional to avoid a 92 // false-alarm. They will be updated again from subscribing to the 93 // properties-changed event 94 95 for (auto& sensor : _sensors) 96 sensor->setFunctional(true); 97 } 98 99 #ifndef MONITOR_USE_JSON 100 // Check current tach state when entering monitor mode 101 if (mode != Mode::init) 102 { 103 _monitorReady = true; 104 105 // The TachSensors will now have already read the input 106 // and target values, so check them. 107 tachChanged(); 108 } 109 #else 110 if (_system.isPowerOn()) 111 { 112 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 113 } 114 #endif 115 116 if (_fanMissingErrorDelay) 117 { 118 _fanMissingErrorTimer = std::make_unique< 119 sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>( 120 event, std::bind(&System::fanMissingErrorTimerExpired, &system, 121 std::ref(*this))); 122 } 123 124 try 125 { 126 _present = util::SDBusPlus::getProperty<bool>( 127 util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present"); 128 129 if (!_present) 130 { 131 getLogger().log( 132 fmt::format("On startup, fan {} is missing", _name)); 133 if (_system.isPowerOn() && _fanMissingErrorTimer) 134 { 135 _fanMissingErrorTimer->restartOnce( 136 std::chrono::seconds{*_fanMissingErrorDelay}); 137 } 138 } 139 } 140 catch (const util::DBusServiceError& e) 141 { 142 // This could happen on the first BMC boot if the presence 143 // detect app hasn't started yet and there isn't an inventory 144 // cache yet. 145 } 146 } 147 148 void Fan::presenceIfaceAdded(sdbusplus::message::message& msg) 149 { 150 sdbusplus::message::object_path path; 151 std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces; 152 153 msg.read(path, interfaces); 154 155 auto properties = interfaces.find(util::INV_ITEM_IFACE); 156 if (properties == interfaces.end()) 157 { 158 return; 159 } 160 161 auto property = properties->second.find("Present"); 162 if (property == properties->second.end()) 163 { 164 return; 165 } 166 167 _present = std::get<bool>(property->second); 168 169 if (!_present) 170 { 171 getLogger().log(fmt::format( 172 "New fan {} interface added and fan is not present", _name)); 173 if (_system.isPowerOn() && _fanMissingErrorTimer) 174 { 175 _fanMissingErrorTimer->restartOnce( 176 std::chrono::seconds{*_fanMissingErrorDelay}); 177 } 178 } 179 180 _system.fanStatusChange(*this); 181 } 182 183 void Fan::startMonitor() 184 { 185 _monitorReady = true; 186 187 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 188 if (_present) 189 { 190 try 191 { 192 // Force a getProperty call to check if the tach sensor is 193 // on D-Bus. If it isn't, now set it to nonfunctional. 194 // This isn't done earlier so that code watching for 195 // nonfunctional tach sensors doesn't take actions before 196 // those sensors show up on D-Bus. 197 sensor->updateTachAndTarget(); 198 tachChanged(*sensor); 199 } 200 catch (const util::DBusServiceError& e) 201 { 202 // The tach property still isn't on D-Bus, ensure 203 // sensor is nonfunctional. 204 getLogger().log(fmt::format( 205 "Monitoring starting but {} sensor value not on D-Bus", 206 sensor->name())); 207 208 sensor->setFunctional(false); 209 210 if (_numSensorFailsForNonFunc) 211 { 212 if (_functional && (countNonFunctionalSensors() >= 213 _numSensorFailsForNonFunc)) 214 { 215 updateInventory(false); 216 } 217 } 218 219 _system.fanStatusChange(*this); 220 } 221 } 222 }); 223 } 224 225 void Fan::tachChanged() 226 { 227 if (_monitorReady) 228 { 229 for (auto& s : _sensors) 230 { 231 tachChanged(*s); 232 } 233 } 234 } 235 236 void Fan::tachChanged(TachSensor& sensor) 237 { 238 if (!_system.isPowerOn() || !_monitorReady) 239 { 240 return; 241 } 242 243 if (_trustManager->active()) 244 { 245 if (!_trustManager->checkTrust(sensor)) 246 { 247 return; 248 } 249 } 250 251 // If the error checking method is 'count', if a tach change leads 252 // to an out of range sensor the count timer will take over in calling 253 // process() until the sensor is healthy again. 254 if (!sensor.countTimerRunning()) 255 { 256 process(sensor); 257 } 258 } 259 260 void Fan::countTimerExpired(TachSensor& sensor) 261 { 262 if (_trustManager->active() && !_trustManager->checkTrust(sensor)) 263 { 264 return; 265 } 266 process(sensor); 267 } 268 269 void Fan::process(TachSensor& sensor) 270 { 271 // If this sensor is out of range at this moment, start 272 // its timer, at the end of which the inventory 273 // for the fan may get updated to not functional. 274 275 // If this sensor is OK, put everything back into a good state. 276 277 if (outOfRange(sensor)) 278 { 279 if (sensor.functional()) 280 { 281 switch (sensor.getMethod()) 282 { 283 case MethodMode::timebased: 284 // Start nonfunctional timer if not already running 285 sensor.startTimer(TimerMode::nonfunc); 286 break; 287 case MethodMode::count: 288 289 if (!sensor.countTimerRunning()) 290 { 291 sensor.startCountTimer(); 292 } 293 sensor.setCounter(true); 294 if (sensor.getCounter() >= sensor.getThreshold()) 295 { 296 updateState(sensor); 297 } 298 break; 299 } 300 } 301 } 302 else 303 { 304 switch (sensor.getMethod()) 305 { 306 case MethodMode::timebased: 307 if (sensor.functional()) 308 { 309 if (sensor.timerRunning()) 310 { 311 sensor.stopTimer(); 312 } 313 } 314 else 315 { 316 // Start functional timer if not already running 317 sensor.startTimer(TimerMode::func); 318 } 319 break; 320 case MethodMode::count: 321 sensor.setCounter(false); 322 if (sensor.getCounter() == 0) 323 { 324 if (!sensor.functional()) 325 { 326 updateState(sensor); 327 } 328 329 sensor.stopCountTimer(); 330 } 331 break; 332 } 333 } 334 } 335 336 uint64_t Fan::findTargetSpeed() 337 { 338 uint64_t target = 0; 339 // The sensor doesn't support a target, 340 // so get it from another sensor. 341 auto s = std::find_if(_sensors.begin(), _sensors.end(), 342 [](const auto& s) { return s->hasTarget(); }); 343 344 if (s != _sensors.end()) 345 { 346 target = (*s)->getTarget(); 347 } 348 349 return target; 350 } 351 352 size_t Fan::countNonFunctionalSensors() const 353 { 354 return std::count_if(_sensors.begin(), _sensors.end(), 355 [](const auto& s) { return !s->functional(); }); 356 } 357 358 bool Fan::outOfRange(const TachSensor& sensor) 359 { 360 if (!sensor.hasOwner()) 361 { 362 return true; 363 } 364 365 auto actual = static_cast<uint64_t>(sensor.getInput()); 366 auto range = sensor.getRange(_deviation); 367 368 return ((actual < range.first) || 369 (range.second && actual > range.second.value())); 370 } 371 372 void Fan::updateState(TachSensor& sensor) 373 { 374 if (!_system.isPowerOn()) 375 { 376 return; 377 } 378 379 auto range = sensor.getRange(_deviation); 380 std::string rangeMax = "NoMax"; 381 if (range.second) 382 { 383 rangeMax = std::to_string(range.second.value()); 384 } 385 386 sensor.setFunctional(!sensor.functional()); 387 getLogger().log( 388 fmt::format("Setting tach sensor {} functional state to {}. " 389 "[target = {}, input = {}, allowed range = ({} - {})]", 390 sensor.name(), sensor.functional(), sensor.getTarget(), 391 sensor.getInput(), range.first, rangeMax)); 392 393 // A zero value for _numSensorFailsForNonFunc means we aren't dealing 394 // with fan FRU functional status, only sensor functional status. 395 if (_numSensorFailsForNonFunc) 396 { 397 auto numNonFuncSensors = countNonFunctionalSensors(); 398 // If the fan was nonfunctional and enough sensors are now OK, 399 // the fan can be set to functional as long as `set_func_on_present` was 400 // not set 401 if (!_setFuncOnPresent && !_functional && 402 !(numNonFuncSensors >= _numSensorFailsForNonFunc)) 403 { 404 getLogger().log(fmt::format("Setting fan {} to functional, number " 405 "of nonfunctional sensors = {}", 406 _name, numNonFuncSensors)); 407 updateInventory(true); 408 } 409 410 // If the fan is currently functional, but too many 411 // contained sensors are now nonfunctional, update 412 // the fan to nonfunctional. 413 if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc)) 414 { 415 getLogger().log(fmt::format("Setting fan {} to nonfunctional, " 416 "number of nonfunctional sensors = {}", 417 _name, numNonFuncSensors)); 418 updateInventory(false); 419 } 420 } 421 422 _system.fanStatusChange(*this); 423 } 424 425 bool Fan::updateInventory(bool functional) 426 { 427 bool dbusError = false; 428 429 try 430 { 431 auto objectMap = 432 util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF, 433 util::FUNCTIONAL_PROPERTY, functional); 434 435 auto response = util::SDBusPlus::callMethod( 436 _bus, util::INVENTORY_SVC, util::INVENTORY_PATH, 437 util::INVENTORY_INTF, "Notify", objectMap); 438 439 if (response.is_method_error()) 440 { 441 log<level::ERR>("Error in Notify call to update inventory"); 442 443 dbusError = true; 444 } 445 } 446 catch (const util::DBusError& e) 447 { 448 dbusError = true; 449 450 getLogger().log( 451 fmt::format("D-Bus Exception reading/updating inventory : {}", 452 e.what()), 453 Logger::error); 454 } 455 456 // This will always track the current state of the inventory. 457 _functional = functional; 458 459 return dbusError; 460 } 461 462 void Fan::presenceChanged(sdbusplus::message::message& msg) 463 { 464 std::string interface; 465 std::map<std::string, std::variant<bool>> properties; 466 467 msg.read(interface, properties); 468 469 auto presentProp = properties.find("Present"); 470 if (presentProp != properties.end()) 471 { 472 _present = std::get<bool>(presentProp->second); 473 474 getLogger().log( 475 fmt::format("Fan {} presence state change to {}", _name, _present)); 476 477 _system.fanStatusChange(*this); 478 479 if (_present && _setFuncOnPresent) 480 { 481 updateInventory(true); 482 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 483 sensor->setFunctional(true); 484 sensor->resetMethod(); 485 }); 486 } 487 488 if (_fanMissingErrorDelay) 489 { 490 if (!_present && _system.isPowerOn()) 491 { 492 _fanMissingErrorTimer->restartOnce( 493 std::chrono::seconds{*_fanMissingErrorDelay}); 494 } 495 else if (_present && _fanMissingErrorTimer->isEnabled()) 496 { 497 _fanMissingErrorTimer->setEnabled(false); 498 } 499 } 500 } 501 } 502 503 void Fan::sensorErrorTimerExpired(const TachSensor& sensor) 504 { 505 if (_present && _system.isPowerOn()) 506 { 507 _system.sensorErrorTimerExpired(*this, sensor); 508 } 509 } 510 511 void Fan::powerStateChanged(bool powerStateOn) 512 { 513 #ifdef MONITOR_USE_JSON 514 if (powerStateOn) 515 { 516 _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay)); 517 518 _numSensorsOnDBusAtPowerOn = 0; 519 520 std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) { 521 try 522 { 523 // Force a getProperty call. If sensor is on D-Bus, 524 // then make sure it's functional. 525 sensor->updateTachAndTarget(); 526 527 _numSensorsOnDBusAtPowerOn++; 528 529 if (_present) 530 { 531 // If not functional, set it back to functional. 532 if (!sensor->functional()) 533 { 534 sensor->setFunctional(true); 535 _system.fanStatusChange(*this, true); 536 } 537 538 // Set the counters back to zero 539 if (sensor->getMethod() == MethodMode::count) 540 { 541 sensor->resetMethod(); 542 } 543 } 544 } 545 catch (const util::DBusError& e) 546 { 547 // Properties still aren't on D-Bus. Let startMonitor() 548 // deal with it, or maybe System::powerStateChanged() if 549 // there aren't any sensors at all on D-Bus. 550 getLogger().log(fmt::format( 551 "At power on, tach sensor {} value not on D-Bus", 552 sensor->name())); 553 } 554 }); 555 556 if (_present) 557 { 558 // If configured to change functional state on the fan itself, 559 // Set it back to true now if necessary. 560 if (_numSensorFailsForNonFunc) 561 { 562 if (!_functional && 563 (countNonFunctionalSensors() < _numSensorFailsForNonFunc)) 564 { 565 updateInventory(true); 566 } 567 } 568 } 569 else 570 { 571 getLogger().log( 572 fmt::format("At power on, fan {} is missing", _name)); 573 574 if (_fanMissingErrorTimer) 575 { 576 _fanMissingErrorTimer->restartOnce( 577 std::chrono::seconds{*_fanMissingErrorDelay}); 578 } 579 } 580 } 581 else 582 { 583 _monitorReady = false; 584 585 if (_monitorTimer.isEnabled()) 586 { 587 _monitorTimer.setEnabled(false); 588 } 589 590 if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled()) 591 { 592 _fanMissingErrorTimer->setEnabled(false); 593 } 594 595 std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) { 596 if (sensor->timerRunning()) 597 { 598 sensor->stopTimer(); 599 } 600 601 sensor->stopCountTimer(); 602 }); 603 } 604 #endif 605 } 606 607 } // namespace monitor 608 } // namespace fan 609 } // namespace phosphor 610