xref: /openbmc/phosphor-fan-presence/monitor/fan.cpp (revision cceffdd91cf3cc0c651b5c44424fb3377cded964)
1 /**
2  * Copyright © 2017 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "fan.hpp"
17 
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23 
24 #include <fmt/format.h>
25 
26 #include <phosphor-logging/log.hpp>
27 
28 #include <algorithm>
29 
30 namespace phosphor
31 {
32 namespace fan
33 {
34 namespace monitor
35 {
36 
37 using namespace phosphor::logging;
38 using namespace sdbusplus::bus::match;
39 
40 Fan::Fan(Mode mode, sdbusplus::bus::bus& bus, const sdeventplus::Event& event,
41          std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
42          System& system) :
43     _bus(bus),
44     _name(std::get<fanNameField>(def)),
45     _deviation(std::get<fanDeviationField>(def)),
46     _numSensorFailsForNonFunc(std::get<numSensorFailsForNonfuncField>(def)),
47     _trustManager(trust),
48 #ifdef MONITOR_USE_JSON
49     _monitorDelay(std::get<monitorStartDelayField>(def)),
50     _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
51 #endif
52     _system(system),
53     _presenceMatch(bus,
54                    rules::propertiesChanged(util::INVENTORY_PATH + _name,
55                                             util::INV_ITEM_IFACE),
56                    std::bind(std::mem_fn(&Fan::presenceChanged), this,
57                              std::placeholders::_1)),
58     _presenceIfaceAddedMatch(
59         bus,
60         rules::interfacesAdded() +
61             rules::argNpath(0, util::INVENTORY_PATH + _name),
62         std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
63                   std::placeholders::_1)),
64     _fanMissingErrorDelay(std::get<fanMissingErrDelayField>(def)),
65     _countInterval(std::get<countIntervalField>(def)),
66     _setFuncOnPresent(std::get<funcOnPresentField>(def))
67 {
68     bool enableCountTimer = false;
69 
70     // Start from a known state of functional (even if
71     // _numSensorFailsForNonFunc is 0)
72     updateInventory(true);
73 
74     // Setup tach sensors for monitoring
75     auto& sensors = std::get<sensorListField>(def);
76     for (auto& s : sensors)
77     {
78         _sensors.emplace_back(std::make_shared<TachSensor>(
79             mode, bus, *this, std::get<sensorNameField>(s),
80             std::get<hasTargetField>(s), std::get<funcDelay>(def),
81             std::get<targetInterfaceField>(s), std::get<factorField>(s),
82             std::get<offsetField>(s), std::get<methodField>(def),
83             std::get<thresholdField>(s), std::get<timeoutField>(def),
84             std::get<nonfuncRotorErrDelayField>(def), event));
85 
86         _trustManager->registerSensor(_sensors.back());
87         if (_sensors.back()->getMethod() == MethodMode::count)
88         {
89             enableCountTimer = true;
90         }
91     }
92 
93     // If the error checking method will be 'count', then it needs a timer.
94     // The timer is repeating but is disabled immediately because it doesn't
95     // need to start yet.
96     if (enableCountTimer)
97     {
98         _countTimer = std::make_unique<
99             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
100             event, std::bind(&Fan::countTimerExpired, this),
101             std::chrono::seconds(_countInterval));
102 
103         _countTimer->setEnabled(false);
104     }
105 
106 #ifndef MONITOR_USE_JSON
107     // Check current tach state when entering monitor mode
108     if (mode != Mode::init)
109     {
110         _monitorReady = true;
111 
112         // The TachSensors will now have already read the input
113         // and target values, so check them.
114         tachChanged();
115     }
116 #else
117     if (_system.isPowerOn())
118     {
119         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
120     }
121 #endif
122 
123     if (_fanMissingErrorDelay)
124     {
125         _fanMissingErrorTimer = std::make_unique<
126             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
127             event, std::bind(&System::fanMissingErrorTimerExpired, &system,
128                              std::ref(*this)));
129     }
130 
131     try
132     {
133         _present = util::SDBusPlus::getProperty<bool>(
134             util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
135 
136         if (!_present)
137         {
138             getLogger().log(
139                 fmt::format("On startup, fan {} is missing", _name));
140             if (_system.isPowerOn() && _fanMissingErrorTimer)
141             {
142                 _fanMissingErrorTimer->restartOnce(
143                     std::chrono::seconds{*_fanMissingErrorDelay});
144             }
145         }
146     }
147     catch (const util::DBusServiceError& e)
148     {
149         // This could happen on the first BMC boot if the presence
150         // detect app hasn't started yet and there isn't an inventory
151         // cache yet.
152     }
153 }
154 
155 void Fan::presenceIfaceAdded(sdbusplus::message::message& msg)
156 {
157     sdbusplus::message::object_path path;
158     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
159 
160     msg.read(path, interfaces);
161 
162     auto properties = interfaces.find(util::INV_ITEM_IFACE);
163     if (properties == interfaces.end())
164     {
165         return;
166     }
167 
168     auto property = properties->second.find("Present");
169     if (property == properties->second.end())
170     {
171         return;
172     }
173 
174     _present = std::get<bool>(property->second);
175 
176     if (!_present)
177     {
178         getLogger().log(fmt::format(
179             "New fan {} interface added and fan is not present", _name));
180         if (_system.isPowerOn() && _fanMissingErrorTimer)
181         {
182             _fanMissingErrorTimer->restartOnce(
183                 std::chrono::seconds{*_fanMissingErrorDelay});
184         }
185     }
186 
187     _system.fanStatusChange(*this);
188 }
189 
190 void Fan::startMonitor()
191 {
192     _monitorReady = true;
193 
194     if (_countTimer)
195     {
196         _countTimer->resetRemaining();
197         _countTimer->setEnabled(true);
198     }
199 
200     std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
201         if (_present)
202         {
203             try
204             {
205                 // Force a getProperty call to check if the tach sensor is
206                 // on D-Bus.  If it isn't, now set it to nonfunctional.
207                 // This isn't done earlier so that code watching for
208                 // nonfunctional tach sensors doesn't take actions before
209                 // those sensors show up on D-Bus.
210                 sensor->updateTachAndTarget();
211                 tachChanged(*sensor);
212             }
213             catch (const util::DBusServiceError& e)
214             {
215                 // The tach property still isn't on D-Bus, ensure
216                 // sensor is nonfunctional.
217                 getLogger().log(fmt::format(
218                     "Monitoring starting but {} sensor value not on D-Bus",
219                     sensor->name()));
220 
221                 sensor->setFunctional(false);
222 
223                 if (_numSensorFailsForNonFunc)
224                 {
225                     if (_functional && (countNonFunctionalSensors() >=
226                                         _numSensorFailsForNonFunc))
227                     {
228                         updateInventory(false);
229                     }
230                 }
231 
232                 _system.fanStatusChange(*this);
233             }
234         }
235     });
236 }
237 
238 void Fan::tachChanged()
239 {
240     if (_monitorReady)
241     {
242         for (auto& s : _sensors)
243         {
244             tachChanged(*s);
245         }
246     }
247 }
248 
249 void Fan::tachChanged(TachSensor& sensor)
250 {
251     if (!_system.isPowerOn() || !_monitorReady)
252     {
253         return;
254     }
255 
256     if (_trustManager->active())
257     {
258         if (!_trustManager->checkTrust(sensor))
259         {
260             return;
261         }
262     }
263 
264     // If using the timebased method to determine functional status,
265     // check now, otherwise let _countTimer handle it.  A timer is
266     // used for the count method so that stuck sensors will continue
267     // to be checked.
268     if (sensor.getMethod() == MethodMode::timebased)
269     {
270         process(sensor);
271     }
272 }
273 
274 void Fan::countTimerExpired()
275 {
276     // For sensors that use the 'count' method, time to check their
277     // status and increment/decrement counts as necessary.
278     for (auto& sensor : _sensors)
279     {
280         if (_trustManager->active() && !_trustManager->checkTrust(*sensor))
281         {
282             continue;
283         }
284         process(*sensor);
285     }
286 }
287 
288 void Fan::process(TachSensor& sensor)
289 {
290     // If this sensor is out of range at this moment, start
291     // its timer, at the end of which the inventory
292     // for the fan may get updated to not functional.
293 
294     // If this sensor is OK, put everything back into a good state.
295 
296     if (outOfRange(sensor))
297     {
298         if (sensor.functional())
299         {
300             switch (sensor.getMethod())
301             {
302                 case MethodMode::timebased:
303                     // Start nonfunctional timer if not already running
304                     sensor.startTimer(TimerMode::nonfunc);
305                     break;
306                 case MethodMode::count:
307                     sensor.setCounter(true);
308                     if (sensor.getCounter() >= sensor.getThreshold())
309                     {
310                         updateState(sensor);
311                     }
312                     break;
313             }
314         }
315     }
316     else
317     {
318         switch (sensor.getMethod())
319         {
320             case MethodMode::timebased:
321                 if (sensor.functional())
322                 {
323                     if (sensor.timerRunning())
324                     {
325                         sensor.stopTimer();
326                     }
327                 }
328                 else
329                 {
330                     // Start functional timer if not already running
331                     sensor.startTimer(TimerMode::func);
332                 }
333                 break;
334             case MethodMode::count:
335                 sensor.setCounter(false);
336                 if (!sensor.functional() && sensor.getCounter() == 0)
337                 {
338                     updateState(sensor);
339                 }
340                 break;
341         }
342     }
343 }
344 
345 uint64_t Fan::findTargetSpeed()
346 {
347     uint64_t target = 0;
348     // The sensor doesn't support a target,
349     // so get it from another sensor.
350     auto s = std::find_if(_sensors.begin(), _sensors.end(),
351                           [](const auto& s) { return s->hasTarget(); });
352 
353     if (s != _sensors.end())
354     {
355         target = (*s)->getTarget();
356     }
357 
358     return target;
359 }
360 
361 size_t Fan::countNonFunctionalSensors()
362 {
363     return std::count_if(_sensors.begin(), _sensors.end(),
364                          [](const auto& s) { return !s->functional(); });
365 }
366 
367 bool Fan::outOfRange(const TachSensor& sensor)
368 {
369     auto actual = static_cast<uint64_t>(sensor.getInput());
370     auto range = sensor.getRange(_deviation);
371 
372     if ((actual < range.first) || (actual > range.second))
373     {
374         return true;
375     }
376 
377     return false;
378 }
379 
380 void Fan::updateState(TachSensor& sensor)
381 {
382     auto range = sensor.getRange(_deviation);
383 
384     if (!_system.isPowerOn())
385     {
386         return;
387     }
388 
389     sensor.setFunctional(!sensor.functional());
390     getLogger().log(
391         fmt::format("Setting tach sensor {} functional state to {}. "
392                     "[target = {}, input = {}, allowed range = ({} - {})]",
393                     sensor.name(), sensor.functional(), sensor.getTarget(),
394                     sensor.getInput(), range.first, range.second));
395 
396     // A zero value for _numSensorFailsForNonFunc means we aren't dealing
397     // with fan FRU functional status, only sensor functional status.
398     if (_numSensorFailsForNonFunc)
399     {
400         auto numNonFuncSensors = countNonFunctionalSensors();
401         // If the fan was nonfunctional and enough sensors are now OK,
402         // the fan can be set to functional as long as `set_func_on_present` was
403         // not set
404         if (!_setFuncOnPresent && !_functional &&
405             !(numNonFuncSensors >= _numSensorFailsForNonFunc))
406         {
407             getLogger().log(fmt::format("Setting fan {} to functional, number "
408                                         "of nonfunctional sensors = {}",
409                                         _name, numNonFuncSensors));
410             updateInventory(true);
411         }
412 
413         // If the fan is currently functional, but too many
414         // contained sensors are now nonfunctional, update
415         // the fan to nonfunctional.
416         if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
417         {
418             getLogger().log(fmt::format("Setting fan {} to nonfunctional, "
419                                         "number of nonfunctional sensors = {}",
420                                         _name, numNonFuncSensors));
421             updateInventory(false);
422         }
423     }
424 
425     _system.fanStatusChange(*this);
426 }
427 
428 void Fan::updateInventory(bool functional)
429 {
430     auto objectMap =
431         util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
432                               util::FUNCTIONAL_PROPERTY, functional);
433     auto response = util::SDBusPlus::lookupAndCallMethod(
434         _bus, util::INVENTORY_PATH, util::INVENTORY_INTF, "Notify", objectMap);
435     if (response.is_method_error())
436     {
437         log<level::ERR>("Error in Notify call to update inventory");
438         return;
439     }
440 
441     // This will always track the current state of the inventory.
442     _functional = functional;
443 }
444 
445 void Fan::presenceChanged(sdbusplus::message::message& msg)
446 {
447     std::string interface;
448     std::map<std::string, std::variant<bool>> properties;
449 
450     msg.read(interface, properties);
451 
452     auto presentProp = properties.find("Present");
453     if (presentProp != properties.end())
454     {
455         _present = std::get<bool>(presentProp->second);
456 
457         getLogger().log(
458             fmt::format("Fan {} presence state change to {}", _name, _present));
459 
460         _system.fanStatusChange(*this);
461 
462         if (_present && _setFuncOnPresent)
463         {
464             updateInventory(true);
465             std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
466                 sensor->setFunctional(true);
467                 sensor->resetMethod();
468             });
469         }
470 
471         if (_fanMissingErrorDelay)
472         {
473             if (!_present && _system.isPowerOn())
474             {
475                 _fanMissingErrorTimer->restartOnce(
476                     std::chrono::seconds{*_fanMissingErrorDelay});
477             }
478             else if (_present && _fanMissingErrorTimer->isEnabled())
479             {
480                 _fanMissingErrorTimer->setEnabled(false);
481             }
482         }
483     }
484 }
485 
486 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
487 {
488     if (_present && _system.isPowerOn())
489     {
490         _system.sensorErrorTimerExpired(*this, sensor);
491     }
492 }
493 
494 void Fan::powerStateChanged(bool powerStateOn)
495 {
496 #ifdef MONITOR_USE_JSON
497     if (powerStateOn)
498     {
499         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
500 
501         if (_present)
502         {
503             std::for_each(
504                 _sensors.begin(), _sensors.end(), [this](auto& sensor) {
505                     try
506                     {
507                         // Force a getProperty call.  If sensor is on D-Bus,
508                         // then make sure it's functional.
509                         sensor->updateTachAndTarget();
510 
511                         // If not functional, set it back to functional.
512                         if (!sensor->functional())
513                         {
514                             sensor->setFunctional(true);
515                             _system.fanStatusChange(*this, true);
516                         }
517 
518                         // Set the counters back to zero
519                         if (sensor->getMethod() == MethodMode::count)
520                         {
521                             sensor->resetMethod();
522                         }
523                     }
524                     catch (const util::DBusServiceError& e)
525                     {
526                         // Properties still aren't on D-Bus.  Let startMonitor()
527                         // deal with it.
528                         getLogger().log(fmt::format(
529                             "At power on, tach sensor {} value not on D-Bus",
530                             sensor->name()));
531                     }
532                 });
533 
534             // If configured to change functional state on the fan itself,
535             // Set it back to true now if necessary.
536             if (_numSensorFailsForNonFunc)
537             {
538                 if (!_functional &&
539                     (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
540                 {
541                     updateInventory(true);
542                 }
543             }
544         }
545         else
546         {
547             getLogger().log(
548                 fmt::format("At power on, fan {} is missing", _name));
549 
550             if (_fanMissingErrorTimer)
551             {
552                 _fanMissingErrorTimer->restartOnce(
553                     std::chrono::seconds{*_fanMissingErrorDelay});
554             }
555         }
556     }
557     else
558     {
559         _monitorReady = false;
560 
561         if (_monitorTimer.isEnabled())
562         {
563             _monitorTimer.setEnabled(false);
564         }
565 
566         if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
567         {
568             _fanMissingErrorTimer->setEnabled(false);
569         }
570 
571         std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
572             if (sensor->timerRunning())
573             {
574                 sensor->stopTimer();
575             }
576         });
577 
578         if (_countTimer)
579         {
580             _countTimer->setEnabled(false);
581         }
582     }
583 #endif
584 }
585 
586 } // namespace monitor
587 } // namespace fan
588 } // namespace phosphor
589