xref: /openbmc/phosphor-fan-presence/monitor/fan.cpp (revision a787af09c1e7b6d1a9b76b8a203615a3c919af18)
1 /**
2  * Copyright © 2022 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "fan.hpp"
17 
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23 
24 #include <fmt/format.h>
25 
26 #include <phosphor-logging/log.hpp>
27 
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34 
35 using namespace phosphor::logging;
36 using namespace sdbusplus::bus::match;
37 
38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
39          std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
40          System& system) :
41     _bus(bus),
42     _name(std::get<fanNameField>(def)),
43     _deviation(std::get<fanDeviationField>(def)),
44     _numSensorFailsForNonFunc(std::get<numSensorFailsForNonfuncField>(def)),
45     _trustManager(trust),
46 #ifdef MONITOR_USE_JSON
47     _monitorDelay(std::get<monitorStartDelayField>(def)),
48     _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
49 #endif
50     _system(system),
51     _presenceMatch(bus,
52                    rules::propertiesChanged(util::INVENTORY_PATH + _name,
53                                             util::INV_ITEM_IFACE),
54                    std::bind(std::mem_fn(&Fan::presenceChanged), this,
55                              std::placeholders::_1)),
56     _presenceIfaceAddedMatch(
57         bus,
58         rules::interfacesAdded() +
59             rules::argNpath(0, util::INVENTORY_PATH + _name),
60         std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
61                   std::placeholders::_1)),
62     _fanMissingErrorDelay(std::get<fanMissingErrDelayField>(def)),
63     _setFuncOnPresent(std::get<funcOnPresentField>(def))
64 {
65     // Setup tach sensors for monitoring
66     auto& sensors = std::get<sensorListField>(def);
67     for (auto& s : sensors)
68     {
69         _sensors.emplace_back(std::make_shared<TachSensor>(
70             mode, bus, *this, std::get<sensorNameField>(s),
71             std::get<hasTargetField>(s), std::get<funcDelay>(def),
72             std::get<targetInterfaceField>(s), std::get<factorField>(s),
73             std::get<offsetField>(s), std::get<methodField>(def),
74             std::get<thresholdField>(s), std::get<ignoreAboveMaxField>(s),
75             std::get<timeoutField>(def),
76             std::get<nonfuncRotorErrDelayField>(def),
77             std::get<countIntervalField>(def), event));
78 
79         _trustManager->registerSensor(_sensors.back());
80     }
81 
82     bool functionalState =
83         (_numSensorFailsForNonFunc == 0) ||
84         (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
85 
86     if (updateInventory(functionalState) && !functionalState)
87     {
88         // the inventory update threw an exception, possibly because D-Bus
89         // wasn't ready. Try to update sensors back to functional to avoid a
90         // false-alarm. They will be updated again from subscribing to the
91         // properties-changed event
92 
93         for (auto& sensor : _sensors)
94             sensor->setFunctional(true);
95     }
96 
97 #ifndef MONITOR_USE_JSON
98     // Check current tach state when entering monitor mode
99     if (mode != Mode::init)
100     {
101         _monitorReady = true;
102 
103         // The TachSensors will now have already read the input
104         // and target values, so check them.
105         tachChanged();
106     }
107 #else
108     if (_system.isPowerOn())
109     {
110         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
111     }
112 #endif
113 
114     if (_fanMissingErrorDelay)
115     {
116         _fanMissingErrorTimer = std::make_unique<
117             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
118             event, std::bind(&System::fanMissingErrorTimerExpired, &system,
119                              std::ref(*this)));
120     }
121 
122     try
123     {
124         _present = util::SDBusPlus::getProperty<bool>(
125             util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
126 
127         if (!_present)
128         {
129             getLogger().log(
130                 fmt::format("On startup, fan {} is missing", _name));
131             if (_system.isPowerOn() && _fanMissingErrorTimer)
132             {
133                 _fanMissingErrorTimer->restartOnce(
134                     std::chrono::seconds{*_fanMissingErrorDelay});
135             }
136         }
137     }
138     catch (const util::DBusServiceError& e)
139     {
140         // This could happen on the first BMC boot if the presence
141         // detect app hasn't started yet and there isn't an inventory
142         // cache yet.
143     }
144 }
145 
146 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
147 {
148     sdbusplus::message::object_path path;
149     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
150 
151     msg.read(path, interfaces);
152 
153     auto properties = interfaces.find(util::INV_ITEM_IFACE);
154     if (properties == interfaces.end())
155     {
156         return;
157     }
158 
159     auto property = properties->second.find("Present");
160     if (property == properties->second.end())
161     {
162         return;
163     }
164 
165     _present = std::get<bool>(property->second);
166 
167     if (!_present)
168     {
169         getLogger().log(fmt::format(
170             "New fan {} interface added and fan is not present", _name));
171         if (_system.isPowerOn() && _fanMissingErrorTimer)
172         {
173             _fanMissingErrorTimer->restartOnce(
174                 std::chrono::seconds{*_fanMissingErrorDelay});
175         }
176     }
177 
178     _system.fanStatusChange(*this);
179 }
180 
181 void Fan::startMonitor()
182 {
183     _monitorReady = true;
184 
185     std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
186         if (_present)
187         {
188             try
189             {
190                 // Force a getProperty call to check if the tach sensor is
191                 // on D-Bus.  If it isn't, now set it to nonfunctional.
192                 // This isn't done earlier so that code watching for
193                 // nonfunctional tach sensors doesn't take actions before
194                 // those sensors show up on D-Bus.
195                 sensor->updateTachAndTarget();
196                 tachChanged(*sensor);
197             }
198             catch (const util::DBusServiceError& e)
199             {
200                 // The tach property still isn't on D-Bus. Ensure
201                 // sensor is nonfunctional, but skip creating an
202                 // error for it since it isn't a fan problem.
203                 getLogger().log(fmt::format(
204                     "Monitoring starting but {} sensor value not on D-Bus",
205                     sensor->name()));
206 
207                 sensor->setFunctional(false, true);
208 
209                 if (_numSensorFailsForNonFunc)
210                 {
211                     if (_functional && (countNonFunctionalSensors() >=
212                                         _numSensorFailsForNonFunc))
213                     {
214                         updateInventory(false);
215                     }
216                 }
217 
218                 // At this point, don't start any power off actions due
219                 // to missing sensors.  Let something else handle that
220                 // policy.
221                 _system.fanStatusChange(*this, true);
222             }
223         }
224     });
225 }
226 
227 void Fan::tachChanged()
228 {
229     if (_monitorReady)
230     {
231         for (auto& s : _sensors)
232         {
233             tachChanged(*s);
234         }
235     }
236 }
237 
238 void Fan::tachChanged(TachSensor& sensor)
239 {
240     if (!_system.isPowerOn() || !_monitorReady)
241     {
242         return;
243     }
244 
245     if (_trustManager->active())
246     {
247         if (!_trustManager->checkTrust(sensor))
248         {
249             return;
250         }
251     }
252 
253     // If the error checking method is 'count', if a tach change leads
254     // to an out of range sensor the count timer will take over in calling
255     // process() until the sensor is healthy again.
256     if (!sensor.countTimerRunning())
257     {
258         process(sensor);
259     }
260 }
261 
262 void Fan::countTimerExpired(TachSensor& sensor)
263 {
264     if (_trustManager->active() && !_trustManager->checkTrust(sensor))
265     {
266         return;
267     }
268     process(sensor);
269 }
270 
271 void Fan::process(TachSensor& sensor)
272 {
273     // If this sensor is out of range at this moment, start
274     // its timer, at the end of which the inventory
275     // for the fan may get updated to not functional.
276 
277     // If this sensor is OK, put everything back into a good state.
278 
279     if (outOfRange(sensor))
280     {
281         if (sensor.functional())
282         {
283             switch (sensor.getMethod())
284             {
285                 case MethodMode::timebased:
286                     // Start nonfunctional timer if not already running
287                     sensor.startTimer(TimerMode::nonfunc);
288                     break;
289                 case MethodMode::count:
290 
291                     if (!sensor.countTimerRunning())
292                     {
293                         sensor.startCountTimer();
294                     }
295                     sensor.setCounter(true);
296                     if (sensor.getCounter() >= sensor.getThreshold())
297                     {
298                         updateState(sensor);
299                     }
300                     break;
301             }
302         }
303     }
304     else
305     {
306         switch (sensor.getMethod())
307         {
308             case MethodMode::timebased:
309                 if (sensor.functional())
310                 {
311                     if (sensor.timerRunning())
312                     {
313                         sensor.stopTimer();
314                     }
315                 }
316                 else
317                 {
318                     // Start functional timer if not already running
319                     sensor.startTimer(TimerMode::func);
320                 }
321                 break;
322             case MethodMode::count:
323                 sensor.setCounter(false);
324                 if (sensor.getCounter() == 0)
325                 {
326                     if (!sensor.functional())
327                     {
328                         updateState(sensor);
329                     }
330 
331                     sensor.stopCountTimer();
332                 }
333                 break;
334         }
335     }
336 }
337 
338 uint64_t Fan::findTargetSpeed()
339 {
340     uint64_t target = 0;
341     // The sensor doesn't support a target,
342     // so get it from another sensor.
343     auto s = std::find_if(_sensors.begin(), _sensors.end(),
344                           [](const auto& s) { return s->hasTarget(); });
345 
346     if (s != _sensors.end())
347     {
348         target = (*s)->getTarget();
349     }
350 
351     return target;
352 }
353 
354 size_t Fan::countNonFunctionalSensors() const
355 {
356     return std::count_if(_sensors.begin(), _sensors.end(),
357                          [](const auto& s) { return !s->functional(); });
358 }
359 
360 bool Fan::outOfRange(const TachSensor& sensor)
361 {
362     if (!sensor.hasOwner())
363     {
364         return true;
365     }
366 
367     auto actual = static_cast<uint64_t>(sensor.getInput());
368     auto range = sensor.getRange(_deviation);
369 
370     return ((actual < range.first) ||
371             (range.second && actual > range.second.value()));
372 }
373 
374 void Fan::updateState(TachSensor& sensor)
375 {
376     if (!_system.isPowerOn())
377     {
378         return;
379     }
380 
381     auto range = sensor.getRange(_deviation);
382     std::string rangeMax = "NoMax";
383     if (range.second)
384     {
385         rangeMax = std::to_string(range.second.value());
386     }
387 
388     // Skip starting the error timer if the sensor
389     // isn't on D-Bus as this isn't a fan hardware problem.
390     sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
391 
392     getLogger().log(fmt::format(
393         "Setting tach sensor {} functional state to {}. "
394         "[target = {}, input = {}, allowed range = ({} - {}) "
395         "owned = {}]",
396         sensor.name(), sensor.functional(), sensor.getTarget(),
397         sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
398 
399     // A zero value for _numSensorFailsForNonFunc means we aren't dealing
400     // with fan FRU functional status, only sensor functional status.
401     if (_numSensorFailsForNonFunc)
402     {
403         auto numNonFuncSensors = countNonFunctionalSensors();
404         // If the fan was nonfunctional and enough sensors are now OK,
405         // the fan can be set to functional as long as `set_func_on_present` was
406         // not set
407         if (!_setFuncOnPresent && !_functional &&
408             !(numNonFuncSensors >= _numSensorFailsForNonFunc))
409         {
410             getLogger().log(fmt::format("Setting fan {} to functional, number "
411                                         "of nonfunctional sensors = {}",
412                                         _name, numNonFuncSensors));
413             updateInventory(true);
414         }
415 
416         // If the fan is currently functional, but too many
417         // contained sensors are now nonfunctional, update
418         // the fan to nonfunctional.
419         if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
420         {
421             getLogger().log(fmt::format("Setting fan {} to nonfunctional, "
422                                         "number of nonfunctional sensors = {}",
423                                         _name, numNonFuncSensors));
424             updateInventory(false);
425         }
426     }
427 
428     // Skip the power off rule checks if the sensor isn't
429     // on D-Bus so a running system isn't shutdown.
430     _system.fanStatusChange(*this, !sensor.hasOwner());
431 }
432 
433 bool Fan::updateInventory(bool functional)
434 {
435     bool dbusError = false;
436 
437     try
438     {
439         auto objectMap =
440             util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
441                                   util::FUNCTIONAL_PROPERTY, functional);
442 
443         auto response = util::SDBusPlus::callMethod(
444             _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
445             util::INVENTORY_INTF, "Notify", objectMap);
446 
447         if (response.is_method_error())
448         {
449             log<level::ERR>("Error in Notify call to update inventory");
450 
451             dbusError = true;
452         }
453     }
454     catch (const util::DBusError& e)
455     {
456         dbusError = true;
457 
458         getLogger().log(
459             fmt::format("D-Bus Exception reading/updating inventory : {}",
460                         e.what()),
461             Logger::error);
462     }
463 
464     // This will always track the current state of the inventory.
465     _functional = functional;
466 
467     return dbusError;
468 }
469 
470 void Fan::presenceChanged(sdbusplus::message_t& msg)
471 {
472     std::string interface;
473     std::map<std::string, std::variant<bool>> properties;
474 
475     msg.read(interface, properties);
476 
477     auto presentProp = properties.find("Present");
478     if (presentProp != properties.end())
479     {
480         _present = std::get<bool>(presentProp->second);
481 
482         getLogger().log(
483             fmt::format("Fan {} presence state change to {}", _name, _present));
484 
485         if (_present && _setFuncOnPresent)
486         {
487             updateInventory(true);
488             std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
489                 sensor->setFunctional(true);
490                 sensor->resetMethod();
491             });
492         }
493 
494         _system.fanStatusChange(*this);
495 
496         if (_fanMissingErrorDelay)
497         {
498             if (!_present && _system.isPowerOn())
499             {
500                 _fanMissingErrorTimer->restartOnce(
501                     std::chrono::seconds{*_fanMissingErrorDelay});
502             }
503             else if (_present && _fanMissingErrorTimer->isEnabled())
504             {
505                 _fanMissingErrorTimer->setEnabled(false);
506             }
507         }
508     }
509 }
510 
511 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
512 {
513     if (_present && _system.isPowerOn())
514     {
515         _system.sensorErrorTimerExpired(*this, sensor);
516     }
517 }
518 
519 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
520 {
521 #ifdef MONITOR_USE_JSON
522     if (powerStateOn)
523     {
524         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
525 
526         _numSensorsOnDBusAtPowerOn = 0;
527 
528         std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
529             try
530             {
531                 // Force a getProperty call.  If sensor is on D-Bus,
532                 // then make sure it's functional.
533                 sensor->updateTachAndTarget();
534 
535                 _numSensorsOnDBusAtPowerOn++;
536 
537                 if (_present)
538                 {
539                     // If not functional, set it back to functional.
540                     if (!sensor->functional())
541                     {
542                         sensor->setFunctional(true);
543                         _system.fanStatusChange(*this, true);
544                     }
545 
546                     // Set the counters back to zero
547                     if (sensor->getMethod() == MethodMode::count)
548                     {
549                         sensor->resetMethod();
550                     }
551                 }
552             }
553             catch (const util::DBusError& e)
554             {
555                 // Properties still aren't on D-Bus.  Let startMonitor()
556                 // deal with it, or maybe System::powerStateChanged() if
557                 // there aren't any sensors at all on D-Bus.
558                 getLogger().log(fmt::format(
559                     "At power on, tach sensor {} value not on D-Bus",
560                     sensor->name()));
561             }
562         });
563 
564         if (_present)
565         {
566             // If configured to change functional state on the fan itself,
567             // Set it back to true now if necessary.
568             if (_numSensorFailsForNonFunc)
569             {
570                 if (!_functional &&
571                     (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
572                 {
573                     updateInventory(true);
574                 }
575             }
576         }
577         else
578         {
579             getLogger().log(
580                 fmt::format("At power on, fan {} is missing", _name));
581 
582             if (_fanMissingErrorTimer)
583             {
584                 _fanMissingErrorTimer->restartOnce(
585                     std::chrono::seconds{*_fanMissingErrorDelay});
586             }
587         }
588     }
589     else
590     {
591         _monitorReady = false;
592 
593         if (_monitorTimer.isEnabled())
594         {
595             _monitorTimer.setEnabled(false);
596         }
597 
598         if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
599         {
600             _fanMissingErrorTimer->setEnabled(false);
601         }
602 
603         std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
604             if (sensor->timerRunning())
605             {
606                 sensor->stopTimer();
607             }
608 
609             sensor->stopCountTimer();
610         });
611     }
612 #endif
613 }
614 
615 } // namespace monitor
616 } // namespace fan
617 } // namespace phosphor
618