xref: /openbmc/phosphor-fan-presence/monitor/fan.cpp (revision f724c16b42abe061752f0d78f0bdb5e2c85de4b6)
1  /**
2   * Copyright © 2022 IBM Corporation
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   *     http://www.apache.org/licenses/LICENSE-2.0
9   *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  #include "fan.hpp"
17  
18  #include "logging.hpp"
19  #include "sdbusplus.hpp"
20  #include "system.hpp"
21  #include "types.hpp"
22  #include "utility.hpp"
23  
24  #include <fmt/format.h>
25  
26  #include <phosphor-logging/log.hpp>
27  
28  namespace phosphor
29  {
30  namespace fan
31  {
32  namespace monitor
33  {
34  
35  using namespace phosphor::logging;
36  using namespace sdbusplus::bus::match;
37  
38  Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
39           std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
40           System& system) :
41      _bus(bus),
42      _name(def.name), _deviation(def.deviation),
43      _upperDeviation(def.upperDeviation),
44      _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc),
45      _trustManager(trust),
46  #ifdef MONITOR_USE_JSON
47      _monitorDelay(def.monitorStartDelay),
48      _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
49  #endif
50      _system(system),
51      _presenceMatch(bus,
52                     rules::propertiesChanged(util::INVENTORY_PATH + _name,
53                                              util::INV_ITEM_IFACE),
54                     std::bind(std::mem_fn(&Fan::presenceChanged), this,
55                               std::placeholders::_1)),
56      _presenceIfaceAddedMatch(
57          bus,
58          rules::interfacesAdded() +
59              rules::argNpath(0, util::INVENTORY_PATH + _name),
60          std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
61                    std::placeholders::_1)),
62      _fanMissingErrorDelay(def.fanMissingErrDelay),
63      _setFuncOnPresent(def.funcOnPresent)
64  {
65      // Setup tach sensors for monitoring
66      for (const auto& s : def.sensorList)
67      {
68          _sensors.emplace_back(std::make_shared<TachSensor>(
69              mode, bus, *this, s.name, s.hasTarget, def.funcDelay,
70              s.targetInterface, s.targetPath, s.factor, s.offset, def.method,
71              s.threshold, s.ignoreAboveMax, def.timeout,
72              def.nonfuncRotorErrDelay, def.countInterval, event));
73  
74          _trustManager->registerSensor(_sensors.back());
75      }
76  
77      bool functionalState =
78          (_numSensorFailsForNonFunc == 0) ||
79          (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
80  
81      if (updateInventory(functionalState) && !functionalState)
82      {
83          // the inventory update threw an exception, possibly because D-Bus
84          // wasn't ready. Try to update sensors back to functional to avoid a
85          // false-alarm. They will be updated again from subscribing to the
86          // properties-changed event
87  
88          for (auto& sensor : _sensors)
89              sensor->setFunctional(true);
90      }
91  
92  #ifndef MONITOR_USE_JSON
93      // Check current tach state when entering monitor mode
94      if (mode != Mode::init)
95      {
96          _monitorReady = true;
97  
98          // The TachSensors will now have already read the input
99          // and target values, so check them.
100          tachChanged();
101      }
102  #else
103      if (_system.isPowerOn())
104      {
105          _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
106      }
107  #endif
108  
109      if (_fanMissingErrorDelay)
110      {
111          _fanMissingErrorTimer = std::make_unique<
112              sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
113              event, std::bind(&System::fanMissingErrorTimerExpired, &system,
114                               std::ref(*this)));
115      }
116  
117      try
118      {
119          _present = util::SDBusPlus::getProperty<bool>(
120              util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
121  
122          if (!_present)
123          {
124              getLogger().log(
125                  fmt::format("On startup, fan {} is missing", _name));
126              if (_system.isPowerOn() && _fanMissingErrorTimer)
127              {
128                  _fanMissingErrorTimer->restartOnce(
129                      std::chrono::seconds{*_fanMissingErrorDelay});
130              }
131          }
132      }
133      catch (const util::DBusServiceError& e)
134      {
135          // This could happen on the first BMC boot if the presence
136          // detect app hasn't started yet and there isn't an inventory
137          // cache yet.
138      }
139  }
140  
141  void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
142  {
143      sdbusplus::message::object_path path;
144      std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
145  
146      msg.read(path, interfaces);
147  
148      auto properties = interfaces.find(util::INV_ITEM_IFACE);
149      if (properties == interfaces.end())
150      {
151          return;
152      }
153  
154      auto property = properties->second.find("Present");
155      if (property == properties->second.end())
156      {
157          return;
158      }
159  
160      _present = std::get<bool>(property->second);
161  
162      if (!_present)
163      {
164          getLogger().log(fmt::format(
165              "New fan {} interface added and fan is not present", _name));
166          if (_system.isPowerOn() && _fanMissingErrorTimer)
167          {
168              _fanMissingErrorTimer->restartOnce(
169                  std::chrono::seconds{*_fanMissingErrorDelay});
170          }
171      }
172  
173      _system.fanStatusChange(*this);
174  }
175  
176  void Fan::startMonitor()
177  {
178      _monitorReady = true;
179  
180      std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
181          if (_present)
182          {
183              try
184              {
185                  // Force a getProperty call to check if the tach sensor is
186                  // on D-Bus.  If it isn't, now set it to nonfunctional.
187                  // This isn't done earlier so that code watching for
188                  // nonfunctional tach sensors doesn't take actions before
189                  // those sensors show up on D-Bus.
190                  sensor->updateTachAndTarget();
191                  tachChanged(*sensor);
192              }
193              catch (const util::DBusServiceError& e)
194              {
195                  // The tach property still isn't on D-Bus. Ensure
196                  // sensor is nonfunctional, but skip creating an
197                  // error for it since it isn't a fan problem.
198                  getLogger().log(fmt::format(
199                      "Monitoring starting but {} sensor value not on D-Bus",
200                      sensor->name()));
201  
202                  sensor->setFunctional(false, true);
203  
204                  if (_numSensorFailsForNonFunc)
205                  {
206                      if (_functional && (countNonFunctionalSensors() >=
207                                          _numSensorFailsForNonFunc))
208                      {
209                          updateInventory(false);
210                      }
211                  }
212  
213                  // At this point, don't start any power off actions due
214                  // to missing sensors.  Let something else handle that
215                  // policy.
216                  _system.fanStatusChange(*this, true);
217              }
218          }
219      });
220  }
221  
222  void Fan::tachChanged()
223  {
224      if (_monitorReady)
225      {
226          for (auto& s : _sensors)
227          {
228              tachChanged(*s);
229          }
230      }
231  }
232  
233  void Fan::tachChanged(TachSensor& sensor)
234  {
235      if (!_system.isPowerOn() || !_monitorReady)
236      {
237          return;
238      }
239  
240      if (_trustManager->active())
241      {
242          if (!_trustManager->checkTrust(sensor))
243          {
244              return;
245          }
246      }
247  
248      // If the error checking method is 'count', if a tach change leads
249      // to an out of range sensor the count timer will take over in calling
250      // process() until the sensor is healthy again.
251      if (!sensor.countTimerRunning())
252      {
253          process(sensor);
254      }
255  }
256  
257  void Fan::countTimerExpired(TachSensor& sensor)
258  {
259      if (_trustManager->active() && !_trustManager->checkTrust(sensor))
260      {
261          return;
262      }
263      process(sensor);
264  }
265  
266  void Fan::process(TachSensor& sensor)
267  {
268      // If this sensor is out of range at this moment, start
269      // its timer, at the end of which the inventory
270      // for the fan may get updated to not functional.
271  
272      // If this sensor is OK, put everything back into a good state.
273  
274      if (outOfRange(sensor))
275      {
276          if (sensor.functional())
277          {
278              switch (sensor.getMethod())
279              {
280                  case MethodMode::timebased:
281                      // Start nonfunctional timer if not already running
282                      sensor.startTimer(TimerMode::nonfunc);
283                      break;
284                  case MethodMode::count:
285  
286                      if (!sensor.countTimerRunning())
287                      {
288                          sensor.startCountTimer();
289                      }
290                      sensor.setCounter(true);
291                      if (sensor.getCounter() >= sensor.getThreshold())
292                      {
293                          updateState(sensor);
294                      }
295                      break;
296              }
297          }
298      }
299      else
300      {
301          switch (sensor.getMethod())
302          {
303              case MethodMode::timebased:
304                  if (sensor.functional())
305                  {
306                      if (sensor.timerRunning())
307                      {
308                          sensor.stopTimer();
309                      }
310                  }
311                  else
312                  {
313                      // Start functional timer if not already running
314                      sensor.startTimer(TimerMode::func);
315                  }
316                  break;
317              case MethodMode::count:
318                  sensor.setCounter(false);
319                  if (sensor.getCounter() == 0)
320                  {
321                      if (!sensor.functional())
322                      {
323                          updateState(sensor);
324                      }
325  
326                      sensor.stopCountTimer();
327                  }
328                  break;
329          }
330      }
331  }
332  
333  uint64_t Fan::findTargetSpeed()
334  {
335      uint64_t target = 0;
336      // The sensor doesn't support a target,
337      // so get it from another sensor.
338      auto s = std::find_if(_sensors.begin(), _sensors.end(),
339                            [](const auto& s) { return s->hasTarget(); });
340  
341      if (s != _sensors.end())
342      {
343          target = (*s)->getTarget();
344      }
345  
346      return target;
347  }
348  
349  size_t Fan::countNonFunctionalSensors() const
350  {
351      return std::count_if(_sensors.begin(), _sensors.end(),
352                           [](const auto& s) { return !s->functional(); });
353  }
354  
355  bool Fan::outOfRange(const TachSensor& sensor)
356  {
357      if (!sensor.hasOwner())
358      {
359          return true;
360      }
361  
362      auto actual = static_cast<uint64_t>(sensor.getInput());
363      auto range = sensor.getRange(_deviation, _upperDeviation);
364  
365      return ((actual < range.first) ||
366              (range.second && actual > range.second.value()));
367  }
368  
369  void Fan::updateState(TachSensor& sensor)
370  {
371      if (!_system.isPowerOn())
372      {
373          return;
374      }
375  
376      auto range = sensor.getRange(_deviation, _upperDeviation);
377      std::string rangeMax = "NoMax";
378      if (range.second)
379      {
380          rangeMax = std::to_string(range.second.value());
381      }
382  
383      // Skip starting the error timer if the sensor
384      // isn't on D-Bus as this isn't a fan hardware problem.
385      sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
386  
387      getLogger().log(fmt::format(
388          "Setting tach sensor {} functional state to {}. "
389          "[target = {}, actual = {}, allowed range = ({} - {}) "
390          "owned = {}]",
391          sensor.name(), sensor.functional(), sensor.getTarget(),
392          sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
393  
394      // A zero value for _numSensorFailsForNonFunc means we aren't dealing
395      // with fan FRU functional status, only sensor functional status.
396      if (_numSensorFailsForNonFunc)
397      {
398          auto numNonFuncSensors = countNonFunctionalSensors();
399          // If the fan was nonfunctional and enough sensors are now OK,
400          // the fan can be set to functional as long as `set_func_on_present` was
401          // not set
402          if (!_setFuncOnPresent && !_functional &&
403              !(numNonFuncSensors >= _numSensorFailsForNonFunc))
404          {
405              getLogger().log(fmt::format("Setting fan {} to functional, number "
406                                          "of nonfunctional sensors = {}",
407                                          _name, numNonFuncSensors));
408              updateInventory(true);
409          }
410  
411          // If the fan is currently functional, but too many
412          // contained sensors are now nonfunctional, update
413          // the fan to nonfunctional.
414          if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
415          {
416              getLogger().log(fmt::format("Setting fan {} to nonfunctional, "
417                                          "number of nonfunctional sensors = {}",
418                                          _name, numNonFuncSensors));
419              updateInventory(false);
420          }
421      }
422  
423      // Skip the power off rule checks if the sensor isn't
424      // on D-Bus so a running system isn't shutdown.
425      _system.fanStatusChange(*this, !sensor.hasOwner());
426  }
427  
428  bool Fan::updateInventory(bool functional)
429  {
430      bool dbusError = false;
431  
432      try
433      {
434          auto objectMap =
435              util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
436                                    util::FUNCTIONAL_PROPERTY, functional);
437  
438          auto response = util::SDBusPlus::callMethod(
439              _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
440              util::INVENTORY_INTF, "Notify", objectMap);
441  
442          if (response.is_method_error())
443          {
444              log<level::ERR>("Error in Notify call to update inventory");
445  
446              dbusError = true;
447          }
448      }
449      catch (const util::DBusError& e)
450      {
451          dbusError = true;
452  
453          getLogger().log(
454              fmt::format("D-Bus Exception reading/updating inventory : {}",
455                          e.what()),
456              Logger::error);
457      }
458  
459      // This will always track the current state of the inventory.
460      _functional = functional;
461  
462      return dbusError;
463  }
464  
465  void Fan::presenceChanged(sdbusplus::message_t& msg)
466  {
467      std::string interface;
468      std::map<std::string, std::variant<bool>> properties;
469  
470      msg.read(interface, properties);
471  
472      auto presentProp = properties.find("Present");
473      if (presentProp != properties.end())
474      {
475          _present = std::get<bool>(presentProp->second);
476  
477          getLogger().log(
478              fmt::format("Fan {} presence state change to {}", _name, _present));
479  
480          if (_present && _setFuncOnPresent)
481          {
482              updateInventory(true);
483              std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
484                  sensor->setFunctional(true);
485                  sensor->resetMethod();
486              });
487          }
488  
489          _system.fanStatusChange(*this);
490  
491          if (_fanMissingErrorDelay)
492          {
493              if (!_present && _system.isPowerOn())
494              {
495                  _fanMissingErrorTimer->restartOnce(
496                      std::chrono::seconds{*_fanMissingErrorDelay});
497              }
498              else if (_present && _fanMissingErrorTimer->isEnabled())
499              {
500                  _fanMissingErrorTimer->setEnabled(false);
501              }
502          }
503      }
504  }
505  
506  void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
507  {
508      if (_present && _system.isPowerOn())
509      {
510          _system.sensorErrorTimerExpired(*this, sensor);
511      }
512  }
513  
514  void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
515  {
516  #ifdef MONITOR_USE_JSON
517      if (powerStateOn)
518      {
519          _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
520  
521          _numSensorsOnDBusAtPowerOn = 0;
522  
523          std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
524              try
525              {
526                  // Force a getProperty call.  If sensor is on D-Bus,
527                  // then make sure it's functional.
528                  sensor->updateTachAndTarget();
529  
530                  _numSensorsOnDBusAtPowerOn++;
531  
532                  if (_present)
533                  {
534                      // If not functional, set it back to functional.
535                      if (!sensor->functional())
536                      {
537                          sensor->setFunctional(true);
538                          _system.fanStatusChange(*this, true);
539                      }
540  
541                      // Set the counters back to zero
542                      if (sensor->getMethod() == MethodMode::count)
543                      {
544                          sensor->resetMethod();
545                      }
546                  }
547              }
548              catch (const util::DBusError& e)
549              {
550                  // Properties still aren't on D-Bus.  Let startMonitor()
551                  // deal with it, or maybe System::powerStateChanged() if
552                  // there aren't any sensors at all on D-Bus.
553                  getLogger().log(fmt::format(
554                      "At power on, tach sensor {} value not on D-Bus",
555                      sensor->name()));
556              }
557          });
558  
559          if (_present)
560          {
561              // If configured to change functional state on the fan itself,
562              // Set it back to true now if necessary.
563              if (_numSensorFailsForNonFunc)
564              {
565                  if (!_functional &&
566                      (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
567                  {
568                      updateInventory(true);
569                  }
570              }
571          }
572          else
573          {
574              getLogger().log(
575                  fmt::format("At power on, fan {} is missing", _name));
576  
577              if (_fanMissingErrorTimer)
578              {
579                  _fanMissingErrorTimer->restartOnce(
580                      std::chrono::seconds{*_fanMissingErrorDelay});
581              }
582          }
583      }
584      else
585      {
586          _monitorReady = false;
587  
588          if (_monitorTimer.isEnabled())
589          {
590              _monitorTimer.setEnabled(false);
591          }
592  
593          if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
594          {
595              _fanMissingErrorTimer->setEnabled(false);
596          }
597  
598          std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
599              if (sensor->timerRunning())
600              {
601                  sensor->stopTimer();
602              }
603  
604              sensor->stopCountTimer();
605          });
606      }
607  #endif
608  }
609  
610  } // namespace monitor
611  } // namespace fan
612  } // namespace phosphor
613