xref: /openbmc/phosphor-fan-presence/monitor/fan.cpp (revision 623635c62f229008400e69d6cbfdb84c12610807)
1 /**
2  * Copyright © 2017 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "fan.hpp"
17 
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23 
24 #include <fmt/format.h>
25 
26 #include <phosphor-logging/log.hpp>
27 
28 #include <algorithm>
29 
30 namespace phosphor
31 {
32 namespace fan
33 {
34 namespace monitor
35 {
36 
37 using namespace phosphor::logging;
38 using namespace sdbusplus::bus::match;
39 
40 Fan::Fan(Mode mode, sdbusplus::bus::bus& bus, const sdeventplus::Event& event,
41          std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
42          System& system) :
43     _bus(bus),
44     _name(std::get<fanNameField>(def)),
45     _deviation(std::get<fanDeviationField>(def)),
46     _numSensorFailsForNonFunc(std::get<numSensorFailsForNonfuncField>(def)),
47     _trustManager(trust),
48 #ifdef MONITOR_USE_JSON
49     _monitorDelay(std::get<monitorStartDelayField>(def)),
50     _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
51 #endif
52     _system(system),
53     _presenceMatch(bus,
54                    rules::propertiesChanged(util::INVENTORY_PATH + _name,
55                                             util::INV_ITEM_IFACE),
56                    std::bind(std::mem_fn(&Fan::presenceChanged), this,
57                              std::placeholders::_1)),
58     _presenceIfaceAddedMatch(
59         bus,
60         rules::interfacesAdded() +
61             rules::argNpath(0, util::INVENTORY_PATH + _name),
62         std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
63                   std::placeholders::_1)),
64     _fanMissingErrorDelay(std::get<fanMissingErrDelayField>(def)),
65     _countInterval(std::get<countIntervalField>(def))
66 {
67     bool enableCountTimer = false;
68 
69     // Start from a known state of functional (even if
70     // _numSensorFailsForNonFunc is 0)
71     updateInventory(true);
72 
73     // Setup tach sensors for monitoring
74     auto& sensors = std::get<sensorListField>(def);
75     for (auto& s : sensors)
76     {
77         _sensors.emplace_back(std::make_shared<TachSensor>(
78             mode, bus, *this, std::get<sensorNameField>(s),
79             std::get<hasTargetField>(s), std::get<funcDelay>(def),
80             std::get<targetInterfaceField>(s), std::get<factorField>(s),
81             std::get<offsetField>(s), std::get<methodField>(def),
82             std::get<thresholdField>(s), std::get<timeoutField>(def),
83             std::get<nonfuncRotorErrDelayField>(def), event));
84 
85         _trustManager->registerSensor(_sensors.back());
86         if (_sensors.back()->getMethod() == MethodMode::count)
87         {
88             enableCountTimer = true;
89         }
90     }
91 
92     // If the error checking method will be 'count', then it needs a timer.
93     // The timer is repeating but is disabled immediately because it doesn't
94     // need to start yet.
95     if (enableCountTimer)
96     {
97         _countTimer = std::make_unique<
98             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
99             event, std::bind(&Fan::countTimerExpired, this),
100             std::chrono::seconds(_countInterval));
101 
102         _countTimer->setEnabled(false);
103     }
104 
105 #ifndef MONITOR_USE_JSON
106     // Check current tach state when entering monitor mode
107     if (mode != Mode::init)
108     {
109         _monitorReady = true;
110 
111         // The TachSensors will now have already read the input
112         // and target values, so check them.
113         tachChanged();
114     }
115 #else
116     if (_system.isPowerOn())
117     {
118         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
119     }
120 #endif
121 
122     if (_fanMissingErrorDelay)
123     {
124         _fanMissingErrorTimer = std::make_unique<
125             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
126             event, std::bind(&System::fanMissingErrorTimerExpired, &system,
127                              std::ref(*this)));
128     }
129 
130     try
131     {
132         _present = util::SDBusPlus::getProperty<bool>(
133             util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
134 
135         if (!_present)
136         {
137             getLogger().log(
138                 fmt::format("On startup, fan {} is missing", _name));
139             if (_system.isPowerOn() && _fanMissingErrorTimer)
140             {
141                 _fanMissingErrorTimer->restartOnce(
142                     std::chrono::seconds{*_fanMissingErrorDelay});
143             }
144         }
145     }
146     catch (const util::DBusServiceError& e)
147     {
148         // This could happen on the first BMC boot if the presence
149         // detect app hasn't started yet and there isn't an inventory
150         // cache yet.
151     }
152 }
153 
154 void Fan::presenceIfaceAdded(sdbusplus::message::message& msg)
155 {
156     sdbusplus::message::object_path path;
157     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
158 
159     msg.read(path, interfaces);
160 
161     auto properties = interfaces.find(util::INV_ITEM_IFACE);
162     if (properties == interfaces.end())
163     {
164         return;
165     }
166 
167     auto property = properties->second.find("Present");
168     if (property == properties->second.end())
169     {
170         return;
171     }
172 
173     _present = std::get<bool>(property->second);
174 
175     if (!_present)
176     {
177         getLogger().log(fmt::format(
178             "New fan {} interface added and fan is not present", _name));
179         if (_system.isPowerOn() && _fanMissingErrorTimer)
180         {
181             _fanMissingErrorTimer->restartOnce(
182                 std::chrono::seconds{*_fanMissingErrorDelay});
183         }
184     }
185 
186     _system.fanStatusChange(*this);
187 }
188 
189 void Fan::startMonitor()
190 {
191     _monitorReady = true;
192 
193     if (_countTimer)
194     {
195         _countTimer->resetRemaining();
196         _countTimer->setEnabled(true);
197     }
198 
199     std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
200         if (_present)
201         {
202             try
203             {
204                 // Force a getProperty call to check if the tach sensor is
205                 // on D-Bus.  If it isn't, now set it to nonfunctional.
206                 // This isn't done earlier so that code watching for
207                 // nonfunctional tach sensors doesn't take actions before
208                 // those sensors show up on D-Bus.
209                 sensor->updateTachAndTarget();
210                 tachChanged(*sensor);
211             }
212             catch (const util::DBusServiceError& e)
213             {
214                 // The tach property still isn't on D-Bus, ensure
215                 // sensor is nonfunctional.
216                 getLogger().log(fmt::format(
217                     "Monitoring starting but {} sensor value not on D-Bus",
218                     sensor->name()));
219 
220                 sensor->setFunctional(false);
221 
222                 if (_numSensorFailsForNonFunc)
223                 {
224                     if (_functional && (countNonFunctionalSensors() >=
225                                         _numSensorFailsForNonFunc))
226                     {
227                         updateInventory(false);
228                     }
229                 }
230 
231                 _system.fanStatusChange(*this);
232             }
233         }
234     });
235 }
236 
237 void Fan::tachChanged()
238 {
239     if (_monitorReady)
240     {
241         for (auto& s : _sensors)
242         {
243             tachChanged(*s);
244         }
245     }
246 }
247 
248 void Fan::tachChanged(TachSensor& sensor)
249 {
250     if (!_system.isPowerOn() || !_monitorReady)
251     {
252         return;
253     }
254 
255     if (_trustManager->active())
256     {
257         if (!_trustManager->checkTrust(sensor))
258         {
259             return;
260         }
261     }
262 
263     // If using the timebased method to determine functional status,
264     // check now, otherwise let _countTimer handle it.  A timer is
265     // used for the count method so that stuck sensors will continue
266     // to be checked.
267     if (sensor.getMethod() == MethodMode::timebased)
268     {
269         process(sensor);
270     }
271 }
272 
273 void Fan::countTimerExpired()
274 {
275     // For sensors that use the 'count' method, time to check their
276     // status and increment/decrement counts as necessary.
277     for (auto& sensor : _sensors)
278     {
279         if (_trustManager->active() && !_trustManager->checkTrust(*sensor))
280         {
281             continue;
282         }
283         process(*sensor);
284     }
285 }
286 
287 void Fan::process(TachSensor& sensor)
288 {
289     // If this sensor is out of range at this moment, start
290     // its timer, at the end of which the inventory
291     // for the fan may get updated to not functional.
292 
293     // If this sensor is OK, put everything back into a good state.
294 
295     if (outOfRange(sensor))
296     {
297         if (sensor.functional())
298         {
299             switch (sensor.getMethod())
300             {
301                 case MethodMode::timebased:
302                     // Start nonfunctional timer if not already running
303                     sensor.startTimer(TimerMode::nonfunc);
304                     break;
305                 case MethodMode::count:
306                     sensor.setCounter(true);
307                     if (sensor.getCounter() >= sensor.getThreshold())
308                     {
309                         updateState(sensor);
310                     }
311                     break;
312             }
313         }
314     }
315     else
316     {
317         switch (sensor.getMethod())
318         {
319             case MethodMode::timebased:
320                 if (sensor.functional())
321                 {
322                     if (sensor.timerRunning())
323                     {
324                         sensor.stopTimer();
325                     }
326                 }
327                 else
328                 {
329                     // Start functional timer if not already running
330                     sensor.startTimer(TimerMode::func);
331                 }
332                 break;
333             case MethodMode::count:
334                 sensor.setCounter(false);
335                 if (!sensor.functional() && sensor.getCounter() == 0)
336                 {
337                     updateState(sensor);
338                 }
339                 break;
340         }
341     }
342 }
343 
344 uint64_t Fan::findTargetSpeed()
345 {
346     uint64_t target = 0;
347     // The sensor doesn't support a target,
348     // so get it from another sensor.
349     auto s = std::find_if(_sensors.begin(), _sensors.end(),
350                           [](const auto& s) { return s->hasTarget(); });
351 
352     if (s != _sensors.end())
353     {
354         target = (*s)->getTarget();
355     }
356 
357     return target;
358 }
359 
360 size_t Fan::countNonFunctionalSensors()
361 {
362     return std::count_if(_sensors.begin(), _sensors.end(),
363                          [](const auto& s) { return !s->functional(); });
364 }
365 
366 bool Fan::outOfRange(const TachSensor& sensor)
367 {
368     auto actual = static_cast<uint64_t>(sensor.getInput());
369     auto range = sensor.getRange(_deviation);
370 
371     if ((actual < range.first) || (actual > range.second))
372     {
373         return true;
374     }
375 
376     return false;
377 }
378 
379 void Fan::updateState(TachSensor& sensor)
380 {
381     auto range = sensor.getRange(_deviation);
382 
383     if (!_system.isPowerOn())
384     {
385         return;
386     }
387 
388     sensor.setFunctional(!sensor.functional());
389     getLogger().log(
390         fmt::format("Setting tach sensor {} functional state to {}. "
391                     "[target = {}, input = {}, allowed range = ({} - {})]",
392                     sensor.name(), sensor.functional(), sensor.getTarget(),
393                     sensor.getInput(), range.first, range.second));
394 
395     // A zero value for _numSensorFailsForNonFunc means we aren't dealing
396     // with fan FRU functional status, only sensor functional status.
397     if (_numSensorFailsForNonFunc)
398     {
399         auto numNonFuncSensors = countNonFunctionalSensors();
400         // If the fan was nonfunctional and enough sensors are now OK,
401         // the fan can be set to functional
402         if (!_functional && !(numNonFuncSensors >= _numSensorFailsForNonFunc))
403         {
404             getLogger().log(fmt::format("Setting fan {} to functional, number "
405                                         "of nonfunctional sensors = {}",
406                                         _name, numNonFuncSensors));
407             updateInventory(true);
408         }
409 
410         // If the fan is currently functional, but too many
411         // contained sensors are now nonfunctional, update
412         // the fan to nonfunctional.
413         if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
414         {
415             getLogger().log(fmt::format("Setting fan {} to nonfunctional, "
416                                         "number of nonfunctional sensors = {}",
417                                         _name, numNonFuncSensors));
418             updateInventory(false);
419         }
420     }
421 
422     _system.fanStatusChange(*this);
423 }
424 
425 void Fan::updateInventory(bool functional)
426 {
427     auto objectMap =
428         util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
429                               util::FUNCTIONAL_PROPERTY, functional);
430     auto response = util::SDBusPlus::lookupAndCallMethod(
431         _bus, util::INVENTORY_PATH, util::INVENTORY_INTF, "Notify", objectMap);
432     if (response.is_method_error())
433     {
434         log<level::ERR>("Error in Notify call to update inventory");
435         return;
436     }
437 
438     // This will always track the current state of the inventory.
439     _functional = functional;
440 }
441 
442 void Fan::presenceChanged(sdbusplus::message::message& msg)
443 {
444     std::string interface;
445     std::map<std::string, std::variant<bool>> properties;
446 
447     msg.read(interface, properties);
448 
449     auto presentProp = properties.find("Present");
450     if (presentProp != properties.end())
451     {
452         _present = std::get<bool>(presentProp->second);
453 
454         getLogger().log(
455             fmt::format("Fan {} presence state change to {}", _name, _present));
456 
457         _system.fanStatusChange(*this);
458 
459         if (_fanMissingErrorDelay)
460         {
461             if (!_present && _system.isPowerOn())
462             {
463                 _fanMissingErrorTimer->restartOnce(
464                     std::chrono::seconds{*_fanMissingErrorDelay});
465             }
466             else if (_present && _fanMissingErrorTimer->isEnabled())
467             {
468                 _fanMissingErrorTimer->setEnabled(false);
469             }
470         }
471     }
472 }
473 
474 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
475 {
476     if (_present && _system.isPowerOn())
477     {
478         _system.sensorErrorTimerExpired(*this, sensor);
479     }
480 }
481 
482 void Fan::powerStateChanged(bool powerStateOn)
483 {
484 #ifdef MONITOR_USE_JSON
485     if (powerStateOn)
486     {
487         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
488 
489         if (_present)
490         {
491             std::for_each(
492                 _sensors.begin(), _sensors.end(), [this](auto& sensor) {
493                     try
494                     {
495                         // Force a getProperty call.  If sensor is on D-Bus,
496                         // then make sure it's functional.
497                         sensor->updateTachAndTarget();
498 
499                         // If not functional, set it back to functional.
500                         if (!sensor->functional())
501                         {
502                             sensor->setFunctional(true);
503                             _system.fanStatusChange(*this, true);
504                         }
505 
506                         // Set the counters back to zero
507                         if (sensor->getMethod() == MethodMode::count)
508                         {
509                             sensor->resetMethod();
510                         }
511                     }
512                     catch (const util::DBusServiceError& e)
513                     {
514                         // Properties still aren't on D-Bus.  Let startMonitor()
515                         // deal with it.
516                         getLogger().log(fmt::format(
517                             "At power on, tach sensor {} value not on D-Bus",
518                             sensor->name()));
519                     }
520                 });
521 
522             // If configured to change functional state on the fan itself,
523             // Set it back to true now if necessary.
524             if (_numSensorFailsForNonFunc)
525             {
526                 if (!_functional &&
527                     (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
528                 {
529                     updateInventory(true);
530                 }
531             }
532         }
533         else
534         {
535             getLogger().log(
536                 fmt::format("At power on, fan {} is missing", _name));
537 
538             if (_fanMissingErrorTimer)
539             {
540                 _fanMissingErrorTimer->restartOnce(
541                     std::chrono::seconds{*_fanMissingErrorDelay});
542             }
543         }
544     }
545     else
546     {
547         _monitorReady = false;
548 
549         if (_monitorTimer.isEnabled())
550         {
551             _monitorTimer.setEnabled(false);
552         }
553 
554         if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
555         {
556             _fanMissingErrorTimer->setEnabled(false);
557         }
558 
559         std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
560             if (sensor->timerRunning())
561             {
562                 sensor->stopTimer();
563             }
564         });
565 
566         if (_countTimer)
567         {
568             _countTimer->setEnabled(false);
569         }
570     }
571 #endif
572 }
573 
574 } // namespace monitor
575 } // namespace fan
576 } // namespace phosphor
577