xref: /openbmc/phosphor-fan-presence/monitor/fan.cpp (revision b2e9a4fcc2253bcb585e92b4642ed4b0036941df)
1 /**
2  * Copyright © 2022 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "fan.hpp"
17 
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23 
24 #include <fmt/format.h>
25 
26 #include <phosphor-logging/log.hpp>
27 
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34 
35 using namespace phosphor::logging;
36 using namespace sdbusplus::bus::match;
37 
38 Fan::Fan(Mode mode, sdbusplus::bus::bus& bus, const sdeventplus::Event& event,
39          std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
40          System& system) :
41     _bus(bus),
42     _name(std::get<fanNameField>(def)),
43     _deviation(std::get<fanDeviationField>(def)),
44     _numSensorFailsForNonFunc(std::get<numSensorFailsForNonfuncField>(def)),
45     _trustManager(trust),
46 #ifdef MONITOR_USE_JSON
47     _monitorDelay(std::get<monitorStartDelayField>(def)),
48     _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
49 #endif
50     _system(system),
51     _presenceMatch(bus,
52                    rules::propertiesChanged(util::INVENTORY_PATH + _name,
53                                             util::INV_ITEM_IFACE),
54                    std::bind(std::mem_fn(&Fan::presenceChanged), this,
55                              std::placeholders::_1)),
56     _presenceIfaceAddedMatch(
57         bus,
58         rules::interfacesAdded() +
59             rules::argNpath(0, util::INVENTORY_PATH + _name),
60         std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
61                   std::placeholders::_1)),
62     _fanMissingErrorDelay(std::get<fanMissingErrDelayField>(def)),
63     _setFuncOnPresent(std::get<funcOnPresentField>(def))
64 {
65     // Setup tach sensors for monitoring
66     auto& sensors = std::get<sensorListField>(def);
67     for (auto& s : sensors)
68     {
69         _sensors.emplace_back(std::make_shared<TachSensor>(
70             mode, bus, *this, std::get<sensorNameField>(s),
71             std::get<hasTargetField>(s), std::get<funcDelay>(def),
72             std::get<targetInterfaceField>(s), std::get<factorField>(s),
73             std::get<offsetField>(s), std::get<methodField>(def),
74             std::get<thresholdField>(s), std::get<ignoreAboveMaxField>(s),
75             std::get<timeoutField>(def),
76             std::get<nonfuncRotorErrDelayField>(def),
77             std::get<countIntervalField>(def), event));
78 
79         _trustManager->registerSensor(_sensors.back());
80     }
81 
82     bool functionalState =
83         (_numSensorFailsForNonFunc == 0) ||
84         (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
85 
86     if (updateInventory(functionalState) && !functionalState)
87     {
88         // the inventory update threw an exception, possibly because D-Bus
89         // wasn't ready. Try to update sensors back to functional to avoid a
90         // false-alarm. They will be updated again from subscribing to the
91         // properties-changed event
92 
93         for (auto& sensor : _sensors)
94             sensor->setFunctional(true);
95     }
96 
97 #ifndef MONITOR_USE_JSON
98     // Check current tach state when entering monitor mode
99     if (mode != Mode::init)
100     {
101         _monitorReady = true;
102 
103         // The TachSensors will now have already read the input
104         // and target values, so check them.
105         tachChanged();
106     }
107 #else
108     if (_system.isPowerOn())
109     {
110         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
111     }
112 #endif
113 
114     if (_fanMissingErrorDelay)
115     {
116         _fanMissingErrorTimer = std::make_unique<
117             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
118             event, std::bind(&System::fanMissingErrorTimerExpired, &system,
119                              std::ref(*this)));
120     }
121 
122     try
123     {
124         _present = util::SDBusPlus::getProperty<bool>(
125             util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
126 
127         if (!_present)
128         {
129             getLogger().log(
130                 fmt::format("On startup, fan {} is missing", _name));
131             if (_system.isPowerOn() && _fanMissingErrorTimer)
132             {
133                 _fanMissingErrorTimer->restartOnce(
134                     std::chrono::seconds{*_fanMissingErrorDelay});
135             }
136         }
137     }
138     catch (const util::DBusServiceError& e)
139     {
140         // This could happen on the first BMC boot if the presence
141         // detect app hasn't started yet and there isn't an inventory
142         // cache yet.
143     }
144 }
145 
146 void Fan::presenceIfaceAdded(sdbusplus::message::message& msg)
147 {
148     sdbusplus::message::object_path path;
149     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
150 
151     msg.read(path, interfaces);
152 
153     auto properties = interfaces.find(util::INV_ITEM_IFACE);
154     if (properties == interfaces.end())
155     {
156         return;
157     }
158 
159     auto property = properties->second.find("Present");
160     if (property == properties->second.end())
161     {
162         return;
163     }
164 
165     _present = std::get<bool>(property->second);
166 
167     if (!_present)
168     {
169         getLogger().log(fmt::format(
170             "New fan {} interface added and fan is not present", _name));
171         if (_system.isPowerOn() && _fanMissingErrorTimer)
172         {
173             _fanMissingErrorTimer->restartOnce(
174                 std::chrono::seconds{*_fanMissingErrorDelay});
175         }
176     }
177 
178     _system.fanStatusChange(*this);
179 }
180 
181 void Fan::startMonitor()
182 {
183     _monitorReady = true;
184 
185     std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
186         if (_present)
187         {
188             try
189             {
190                 // Force a getProperty call to check if the tach sensor is
191                 // on D-Bus.  If it isn't, now set it to nonfunctional.
192                 // This isn't done earlier so that code watching for
193                 // nonfunctional tach sensors doesn't take actions before
194                 // those sensors show up on D-Bus.
195                 sensor->updateTachAndTarget();
196                 tachChanged(*sensor);
197             }
198             catch (const util::DBusServiceError& e)
199             {
200                 // The tach property still isn't on D-Bus, ensure
201                 // sensor is nonfunctional.
202                 getLogger().log(fmt::format(
203                     "Monitoring starting but {} sensor value not on D-Bus",
204                     sensor->name()));
205 
206                 sensor->setFunctional(false);
207 
208                 if (_numSensorFailsForNonFunc)
209                 {
210                     if (_functional && (countNonFunctionalSensors() >=
211                                         _numSensorFailsForNonFunc))
212                     {
213                         updateInventory(false);
214                     }
215                 }
216 
217                 _system.fanStatusChange(*this);
218             }
219         }
220     });
221 }
222 
223 void Fan::tachChanged()
224 {
225     if (_monitorReady)
226     {
227         for (auto& s : _sensors)
228         {
229             tachChanged(*s);
230         }
231     }
232 }
233 
234 void Fan::tachChanged(TachSensor& sensor)
235 {
236     if (!_system.isPowerOn() || !_monitorReady)
237     {
238         return;
239     }
240 
241     if (_trustManager->active())
242     {
243         if (!_trustManager->checkTrust(sensor))
244         {
245             return;
246         }
247     }
248 
249     // If the error checking method is 'count', if a tach change leads
250     // to an out of range sensor the count timer will take over in calling
251     // process() until the sensor is healthy again.
252     if (!sensor.countTimerRunning())
253     {
254         process(sensor);
255     }
256 }
257 
258 void Fan::countTimerExpired(TachSensor& sensor)
259 {
260     if (_trustManager->active() && !_trustManager->checkTrust(sensor))
261     {
262         return;
263     }
264     process(sensor);
265 }
266 
267 void Fan::process(TachSensor& sensor)
268 {
269     // If this sensor is out of range at this moment, start
270     // its timer, at the end of which the inventory
271     // for the fan may get updated to not functional.
272 
273     // If this sensor is OK, put everything back into a good state.
274 
275     if (outOfRange(sensor))
276     {
277         if (sensor.functional())
278         {
279             switch (sensor.getMethod())
280             {
281                 case MethodMode::timebased:
282                     // Start nonfunctional timer if not already running
283                     sensor.startTimer(TimerMode::nonfunc);
284                     break;
285                 case MethodMode::count:
286 
287                     if (!sensor.countTimerRunning())
288                     {
289                         sensor.startCountTimer();
290                     }
291                     sensor.setCounter(true);
292                     if (sensor.getCounter() >= sensor.getThreshold())
293                     {
294                         updateState(sensor);
295                     }
296                     break;
297             }
298         }
299     }
300     else
301     {
302         switch (sensor.getMethod())
303         {
304             case MethodMode::timebased:
305                 if (sensor.functional())
306                 {
307                     if (sensor.timerRunning())
308                     {
309                         sensor.stopTimer();
310                     }
311                 }
312                 else
313                 {
314                     // Start functional timer if not already running
315                     sensor.startTimer(TimerMode::func);
316                 }
317                 break;
318             case MethodMode::count:
319                 sensor.setCounter(false);
320                 if (sensor.getCounter() == 0)
321                 {
322                     if (!sensor.functional())
323                     {
324                         updateState(sensor);
325                     }
326 
327                     sensor.stopCountTimer();
328                 }
329                 break;
330         }
331     }
332 }
333 
334 uint64_t Fan::findTargetSpeed()
335 {
336     uint64_t target = 0;
337     // The sensor doesn't support a target,
338     // so get it from another sensor.
339     auto s = std::find_if(_sensors.begin(), _sensors.end(),
340                           [](const auto& s) { return s->hasTarget(); });
341 
342     if (s != _sensors.end())
343     {
344         target = (*s)->getTarget();
345     }
346 
347     return target;
348 }
349 
350 size_t Fan::countNonFunctionalSensors() const
351 {
352     return std::count_if(_sensors.begin(), _sensors.end(),
353                          [](const auto& s) { return !s->functional(); });
354 }
355 
356 bool Fan::outOfRange(const TachSensor& sensor)
357 {
358     if (!sensor.hasOwner())
359     {
360         return true;
361     }
362 
363     auto actual = static_cast<uint64_t>(sensor.getInput());
364     auto range = sensor.getRange(_deviation);
365 
366     return ((actual < range.first) ||
367             (range.second && actual > range.second.value()));
368 }
369 
370 void Fan::updateState(TachSensor& sensor)
371 {
372     if (!_system.isPowerOn())
373     {
374         return;
375     }
376 
377     auto range = sensor.getRange(_deviation);
378     std::string rangeMax = "NoMax";
379     if (range.second)
380     {
381         rangeMax = std::to_string(range.second.value());
382     }
383 
384     sensor.setFunctional(!sensor.functional());
385     getLogger().log(
386         fmt::format("Setting tach sensor {} functional state to {}. "
387                     "[target = {}, input = {}, allowed range = ({} - {})]",
388                     sensor.name(), sensor.functional(), sensor.getTarget(),
389                     sensor.getInput(), range.first, rangeMax));
390 
391     // A zero value for _numSensorFailsForNonFunc means we aren't dealing
392     // with fan FRU functional status, only sensor functional status.
393     if (_numSensorFailsForNonFunc)
394     {
395         auto numNonFuncSensors = countNonFunctionalSensors();
396         // If the fan was nonfunctional and enough sensors are now OK,
397         // the fan can be set to functional as long as `set_func_on_present` was
398         // not set
399         if (!_setFuncOnPresent && !_functional &&
400             !(numNonFuncSensors >= _numSensorFailsForNonFunc))
401         {
402             getLogger().log(fmt::format("Setting fan {} to functional, number "
403                                         "of nonfunctional sensors = {}",
404                                         _name, numNonFuncSensors));
405             updateInventory(true);
406         }
407 
408         // If the fan is currently functional, but too many
409         // contained sensors are now nonfunctional, update
410         // the fan to nonfunctional.
411         if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
412         {
413             getLogger().log(fmt::format("Setting fan {} to nonfunctional, "
414                                         "number of nonfunctional sensors = {}",
415                                         _name, numNonFuncSensors));
416             updateInventory(false);
417         }
418     }
419 
420     _system.fanStatusChange(*this);
421 }
422 
423 bool Fan::updateInventory(bool functional)
424 {
425     bool dbusError = false;
426 
427     try
428     {
429         auto objectMap =
430             util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
431                                   util::FUNCTIONAL_PROPERTY, functional);
432 
433         auto response = util::SDBusPlus::callMethod(
434             _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
435             util::INVENTORY_INTF, "Notify", objectMap);
436 
437         if (response.is_method_error())
438         {
439             log<level::ERR>("Error in Notify call to update inventory");
440 
441             dbusError = true;
442         }
443     }
444     catch (const util::DBusError& e)
445     {
446         dbusError = true;
447 
448         getLogger().log(
449             fmt::format("D-Bus Exception reading/updating inventory : {}",
450                         e.what()),
451             Logger::error);
452     }
453 
454     // This will always track the current state of the inventory.
455     _functional = functional;
456 
457     return dbusError;
458 }
459 
460 void Fan::presenceChanged(sdbusplus::message::message& msg)
461 {
462     std::string interface;
463     std::map<std::string, std::variant<bool>> properties;
464 
465     msg.read(interface, properties);
466 
467     auto presentProp = properties.find("Present");
468     if (presentProp != properties.end())
469     {
470         _present = std::get<bool>(presentProp->second);
471 
472         getLogger().log(
473             fmt::format("Fan {} presence state change to {}", _name, _present));
474 
475         if (_present && _setFuncOnPresent)
476         {
477             updateInventory(true);
478             std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
479                 sensor->setFunctional(true);
480                 sensor->resetMethod();
481             });
482         }
483 
484         _system.fanStatusChange(*this);
485 
486         if (_fanMissingErrorDelay)
487         {
488             if (!_present && _system.isPowerOn())
489             {
490                 _fanMissingErrorTimer->restartOnce(
491                     std::chrono::seconds{*_fanMissingErrorDelay});
492             }
493             else if (_present && _fanMissingErrorTimer->isEnabled())
494             {
495                 _fanMissingErrorTimer->setEnabled(false);
496             }
497         }
498     }
499 }
500 
501 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
502 {
503     if (_present && _system.isPowerOn())
504     {
505         _system.sensorErrorTimerExpired(*this, sensor);
506     }
507 }
508 
509 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
510 {
511 #ifdef MONITOR_USE_JSON
512     if (powerStateOn)
513     {
514         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
515 
516         _numSensorsOnDBusAtPowerOn = 0;
517 
518         std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
519             try
520             {
521                 // Force a getProperty call.  If sensor is on D-Bus,
522                 // then make sure it's functional.
523                 sensor->updateTachAndTarget();
524 
525                 _numSensorsOnDBusAtPowerOn++;
526 
527                 if (_present)
528                 {
529                     // If not functional, set it back to functional.
530                     if (!sensor->functional())
531                     {
532                         sensor->setFunctional(true);
533                         _system.fanStatusChange(*this, true);
534                     }
535 
536                     // Set the counters back to zero
537                     if (sensor->getMethod() == MethodMode::count)
538                     {
539                         sensor->resetMethod();
540                     }
541                 }
542             }
543             catch (const util::DBusError& e)
544             {
545                 // Properties still aren't on D-Bus.  Let startMonitor()
546                 // deal with it, or maybe System::powerStateChanged() if
547                 // there aren't any sensors at all on D-Bus.
548                 getLogger().log(fmt::format(
549                     "At power on, tach sensor {} value not on D-Bus",
550                     sensor->name()));
551             }
552         });
553 
554         if (_present)
555         {
556             // If configured to change functional state on the fan itself,
557             // Set it back to true now if necessary.
558             if (_numSensorFailsForNonFunc)
559             {
560                 if (!_functional &&
561                     (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
562                 {
563                     updateInventory(true);
564                 }
565             }
566         }
567         else
568         {
569             getLogger().log(
570                 fmt::format("At power on, fan {} is missing", _name));
571 
572             if (_fanMissingErrorTimer)
573             {
574                 _fanMissingErrorTimer->restartOnce(
575                     std::chrono::seconds{*_fanMissingErrorDelay});
576             }
577         }
578     }
579     else
580     {
581         _monitorReady = false;
582 
583         if (_monitorTimer.isEnabled())
584         {
585             _monitorTimer.setEnabled(false);
586         }
587 
588         if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
589         {
590             _fanMissingErrorTimer->setEnabled(false);
591         }
592 
593         std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
594             if (sensor->timerRunning())
595             {
596                 sensor->stopTimer();
597             }
598 
599             sensor->stopCountTimer();
600         });
601     }
602 #endif
603 }
604 
605 } // namespace monitor
606 } // namespace fan
607 } // namespace phosphor
608