xref: /openbmc/phosphor-fan-presence/monitor/fan.cpp (revision fbf4703f3de7fbdbd8388e946bd71c3b760b174c)
1 /**
2  * Copyright © 2022 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "fan.hpp"
17 
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23 
24 #include <phosphor-logging/log.hpp>
25 
26 #include <format>
27 
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34 
35 using namespace phosphor::logging;
36 using namespace sdbusplus::bus::match;
37 
38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
39          std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
40          System& system) :
41     _bus(bus),
42     _name(def.name), _deviation(def.deviation),
43     _upperDeviation(def.upperDeviation),
44     _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc),
45     _trustManager(trust),
46 #ifdef MONITOR_USE_JSON
47     _monitorDelay(def.monitorStartDelay),
48     _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
49 #endif
50     _system(system),
51     _presenceMatch(bus,
52                    rules::propertiesChanged(util::INVENTORY_PATH + _name,
53                                             util::INV_ITEM_IFACE),
54                    std::bind(std::mem_fn(&Fan::presenceChanged), this,
55                              std::placeholders::_1)),
56     _presenceIfaceAddedMatch(
57         bus,
58         rules::interfacesAdded() +
59             rules::argNpath(0, util::INVENTORY_PATH + _name),
60         std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
61                   std::placeholders::_1)),
62     _fanMissingErrorDelay(def.fanMissingErrDelay),
63     _setFuncOnPresent(def.funcOnPresent)
64 {
65     // Setup tach sensors for monitoring
66     for (const auto& s : def.sensorList)
67     {
68         _sensors.emplace_back(std::make_shared<TachSensor>(
69             mode, bus, *this, s.name, s.hasTarget, def.funcDelay,
70             s.targetInterface, s.targetPath, s.factor, s.offset, def.method,
71             s.threshold, s.ignoreAboveMax, def.timeout,
72             def.nonfuncRotorErrDelay, def.countInterval, event));
73 
74         _trustManager->registerSensor(_sensors.back());
75     }
76 
77     bool functionalState =
78         (_numSensorFailsForNonFunc == 0) ||
79         (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
80 
81     if (updateInventory(functionalState) && !functionalState)
82     {
83         // the inventory update threw an exception, possibly because D-Bus
84         // wasn't ready. Try to update sensors back to functional to avoid a
85         // false-alarm. They will be updated again from subscribing to the
86         // properties-changed event
87 
88         for (auto& sensor : _sensors)
89             sensor->setFunctional(true);
90     }
91 
92 #ifndef MONITOR_USE_JSON
93     // Check current tach state when entering monitor mode
94     if (mode != Mode::init)
95     {
96         _monitorReady = true;
97 
98         // The TachSensors will now have already read the input
99         // and target values, so check them.
100         tachChanged();
101     }
102 #else
103     if (_system.isPowerOn())
104     {
105         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
106     }
107 #endif
108 
109     if (_fanMissingErrorDelay)
110     {
111         _fanMissingErrorTimer = std::make_unique<
112             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
113             event, std::bind(&System::fanMissingErrorTimerExpired, &system,
114                              std::ref(*this)));
115     }
116 
117     try
118     {
119         _present = util::SDBusPlus::getProperty<bool>(
120             util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
121 
122         if (!_present)
123         {
124             getLogger().log(
125                 std::format("On startup, fan {} is missing", _name));
126             if (_system.isPowerOn() && _fanMissingErrorTimer)
127             {
128                 _fanMissingErrorTimer->restartOnce(
129                     std::chrono::seconds{*_fanMissingErrorDelay});
130             }
131         }
132     }
133     catch (const util::DBusServiceError& e)
134     {
135         // This could happen on the first BMC boot if the presence
136         // detect app hasn't started yet and there isn't an inventory
137         // cache yet.
138     }
139 }
140 
141 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
142 {
143     sdbusplus::message::object_path path;
144     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
145 
146     msg.read(path, interfaces);
147 
148     auto properties = interfaces.find(util::INV_ITEM_IFACE);
149     if (properties == interfaces.end())
150     {
151         return;
152     }
153 
154     auto property = properties->second.find("Present");
155     if (property == properties->second.end())
156     {
157         return;
158     }
159 
160     _present = std::get<bool>(property->second);
161 
162     if (!_present)
163     {
164         getLogger().log(std::format(
165             "New fan {} interface added and fan is not present", _name));
166         if (_system.isPowerOn() && _fanMissingErrorTimer)
167         {
168             _fanMissingErrorTimer->restartOnce(
169                 std::chrono::seconds{*_fanMissingErrorDelay});
170         }
171     }
172 
173     _system.fanStatusChange(*this);
174 }
175 
176 void Fan::startMonitor()
177 {
178     _monitorReady = true;
179 
180     std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
181         if (_present)
182         {
183             try
184             {
185                 // Force a getProperty call to check if the tach sensor is
186                 // on D-Bus.  If it isn't, now set it to nonfunctional.
187                 // This isn't done earlier so that code watching for
188                 // nonfunctional tach sensors doesn't take actions before
189                 // those sensors show up on D-Bus.
190                 sensor->updateTachAndTarget();
191                 tachChanged(*sensor);
192             }
193             catch (const util::DBusServiceError& e)
194             {
195                 // The tach property still isn't on D-Bus. Ensure
196                 // sensor is nonfunctional, but skip creating an
197                 // error for it since it isn't a fan problem.
198                 getLogger().log(std::format(
199                     "Monitoring starting but {} sensor value not on D-Bus",
200                     sensor->name()));
201 
202                 sensor->setFunctional(false, true);
203 
204                 if (_numSensorFailsForNonFunc)
205                 {
206                     if (_functional && (countNonFunctionalSensors() >=
207                                         _numSensorFailsForNonFunc))
208                     {
209                         updateInventory(false);
210                     }
211                 }
212 
213                 // At this point, don't start any power off actions due
214                 // to missing sensors.  Let something else handle that
215                 // policy.
216                 _system.fanStatusChange(*this, true);
217             }
218         }
219     });
220 }
221 
222 void Fan::tachChanged()
223 {
224     if (_monitorReady)
225     {
226         for (auto& s : _sensors)
227         {
228             tachChanged(*s);
229         }
230     }
231 }
232 
233 void Fan::tachChanged(TachSensor& sensor)
234 {
235     if (!_system.isPowerOn() || !_monitorReady)
236     {
237         return;
238     }
239 
240     if (_trustManager->active())
241     {
242         if (!_trustManager->checkTrust(sensor))
243         {
244             return;
245         }
246     }
247 
248     // If the error checking method is 'count', if a tach change leads
249     // to an out of range sensor the count timer will take over in calling
250     // process() until the sensor is healthy again.
251     if (!sensor.countTimerRunning())
252     {
253         process(sensor);
254     }
255 }
256 
257 void Fan::countTimerExpired(TachSensor& sensor)
258 {
259     if (_trustManager->active() && !_trustManager->checkTrust(sensor))
260     {
261         return;
262     }
263     process(sensor);
264 }
265 
266 void Fan::process(TachSensor& sensor)
267 {
268     // If this sensor is out of range at this moment, start
269     // its timer, at the end of which the inventory
270     // for the fan may get updated to not functional.
271 
272     // If this sensor is OK, put everything back into a good state.
273 
274     if (outOfRange(sensor))
275     {
276         if (sensor.functional())
277         {
278             switch (sensor.getMethod())
279             {
280                 case MethodMode::timebased:
281                     // Start nonfunctional timer if not already running
282                     sensor.startTimer(TimerMode::nonfunc);
283                     break;
284                 case MethodMode::count:
285 
286                     if (!sensor.countTimerRunning())
287                     {
288                         sensor.startCountTimer();
289                     }
290                     sensor.setCounter(true);
291                     if (sensor.getCounter() >= sensor.getThreshold())
292                     {
293                         updateState(sensor);
294                     }
295                     break;
296             }
297         }
298     }
299     else
300     {
301         switch (sensor.getMethod())
302         {
303             case MethodMode::timebased:
304                 if (sensor.functional())
305                 {
306                     if (sensor.timerRunning())
307                     {
308                         sensor.stopTimer();
309                     }
310                 }
311                 else
312                 {
313                     // Start functional timer if not already running
314                     sensor.startTimer(TimerMode::func);
315                 }
316                 break;
317             case MethodMode::count:
318                 sensor.setCounter(false);
319                 if (sensor.getCounter() == 0)
320                 {
321                     if (!sensor.functional())
322                     {
323                         updateState(sensor);
324                     }
325 
326                     sensor.stopCountTimer();
327                 }
328                 break;
329         }
330     }
331 }
332 
333 uint64_t Fan::findTargetSpeed()
334 {
335     uint64_t target = 0;
336     // The sensor doesn't support a target,
337     // so get it from another sensor.
338     auto s = std::find_if(_sensors.begin(), _sensors.end(),
339                           [](const auto& s) { return s->hasTarget(); });
340 
341     if (s != _sensors.end())
342     {
343         target = (*s)->getTarget();
344     }
345 
346     return target;
347 }
348 
349 size_t Fan::countNonFunctionalSensors() const
350 {
351     return std::count_if(_sensors.begin(), _sensors.end(),
352                          [](const auto& s) { return !s->functional(); });
353 }
354 
355 bool Fan::outOfRange(const TachSensor& sensor)
356 {
357     if (!sensor.hasOwner())
358     {
359         return true;
360     }
361 
362     auto actual = static_cast<uint64_t>(sensor.getInput());
363     auto range = sensor.getRange(_deviation, _upperDeviation);
364 
365     return ((actual < range.first) ||
366             (range.second && actual > range.second.value()));
367 }
368 
369 void Fan::updateState(TachSensor& sensor)
370 {
371     if (!_system.isPowerOn())
372     {
373         return;
374     }
375 
376     auto range = sensor.getRange(_deviation, _upperDeviation);
377     std::string rangeMax = "NoMax";
378     if (range.second)
379     {
380         rangeMax = std::to_string(range.second.value());
381     }
382 
383     // Skip starting the error timer if the sensor
384     // isn't on D-Bus as this isn't a fan hardware problem.
385     sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
386 
387     getLogger().log(std::format(
388         "Setting tach sensor {} functional state to {}. "
389         "[target = {}, actual = {}, allowed range = ({} - {}) "
390         "owned = {}]",
391         sensor.name(), sensor.functional(), sensor.getTarget(),
392         sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
393 
394     // A zero value for _numSensorFailsForNonFunc means we aren't dealing
395     // with fan FRU functional status, only sensor functional status.
396     if (_numSensorFailsForNonFunc)
397     {
398         auto numNonFuncSensors = countNonFunctionalSensors();
399         // If the fan was nonfunctional and enough sensors are now OK,
400         // the fan can be set to functional as long as `set_func_on_present` was
401         // not set
402         if (!_setFuncOnPresent && !_functional &&
403             !(numNonFuncSensors >= _numSensorFailsForNonFunc))
404         {
405             getLogger().log(std::format("Setting fan {} to functional, number "
406                                         "of nonfunctional sensors = {}",
407                                         _name, numNonFuncSensors));
408             updateInventory(true);
409         }
410 
411         // If the fan is currently functional, but too many
412         // contained sensors are now nonfunctional, update
413         // the fan to nonfunctional.
414         if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
415         {
416             getLogger().log(std::format("Setting fan {} to nonfunctional, "
417                                         "number of nonfunctional sensors = {}",
418                                         _name, numNonFuncSensors));
419             updateInventory(false);
420         }
421     }
422 
423     // Skip the power off rule checks if the sensor isn't
424     // on D-Bus so a running system isn't shutdown.
425     _system.fanStatusChange(*this, !sensor.hasOwner());
426 }
427 
428 bool Fan::updateInventory(bool functional)
429 {
430     bool dbusError = false;
431 
432     try
433     {
434         auto objectMap =
435             util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
436                                   util::FUNCTIONAL_PROPERTY, functional);
437 
438         auto response = util::SDBusPlus::callMethod(
439             _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
440             util::INVENTORY_INTF, "Notify", objectMap);
441 
442         if (response.is_method_error())
443         {
444             log<level::ERR>("Error in Notify call to update inventory");
445 
446             dbusError = true;
447         }
448     }
449     catch (const util::DBusError& e)
450     {
451         dbusError = true;
452 
453         getLogger().log(
454             std::format("D-Bus Exception reading/updating inventory : {}",
455                         e.what()),
456             Logger::error);
457     }
458 
459     // This will always track the current state of the inventory.
460     _functional = functional;
461 
462     return dbusError;
463 }
464 
465 void Fan::presenceChanged(sdbusplus::message_t& msg)
466 {
467     std::string interface;
468     std::map<std::string, std::variant<bool>> properties;
469 
470     msg.read(interface, properties);
471 
472     auto presentProp = properties.find("Present");
473     if (presentProp != properties.end())
474     {
475         _present = std::get<bool>(presentProp->second);
476 
477         getLogger().log(
478             std::format("Fan {} presence state change to {}", _name, _present));
479 
480         if (_present && _setFuncOnPresent)
481         {
482             updateInventory(true);
483             std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
484                 sensor->setFunctional(true);
485                 sensor->resetMethod();
486             });
487         }
488 
489         _system.fanStatusChange(*this);
490 
491         if (_fanMissingErrorDelay)
492         {
493             if (!_present && _system.isPowerOn())
494             {
495                 _fanMissingErrorTimer->restartOnce(
496                     std::chrono::seconds{*_fanMissingErrorDelay});
497             }
498             else if (_present && _fanMissingErrorTimer->isEnabled())
499             {
500                 _fanMissingErrorTimer->setEnabled(false);
501             }
502         }
503     }
504 }
505 
506 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
507 {
508     if (_present && _system.isPowerOn())
509     {
510         _system.sensorErrorTimerExpired(*this, sensor);
511     }
512 }
513 
514 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
515 {
516 #ifdef MONITOR_USE_JSON
517     if (powerStateOn)
518     {
519         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
520 
521         _numSensorsOnDBusAtPowerOn = 0;
522 
523         std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
524             try
525             {
526                 // Force a getProperty call.  If sensor is on D-Bus,
527                 // then make sure it's functional.
528                 sensor->updateTachAndTarget();
529 
530                 _numSensorsOnDBusAtPowerOn++;
531 
532                 if (_present)
533                 {
534                     // If not functional, set it back to functional.
535                     if (!sensor->functional())
536                     {
537                         sensor->setFunctional(true);
538                         _system.fanStatusChange(*this, true);
539                     }
540 
541                     // Set the counters back to zero
542                     if (sensor->getMethod() == MethodMode::count)
543                     {
544                         sensor->resetMethod();
545                     }
546                 }
547             }
548             catch (const util::DBusError& e)
549             {
550                 // Properties still aren't on D-Bus.  Let startMonitor()
551                 // deal with it, or maybe System::powerStateChanged() if
552                 // there aren't any sensors at all on D-Bus.
553                 getLogger().log(std::format(
554                     "At power on, tach sensor {} value not on D-Bus",
555                     sensor->name()));
556             }
557         });
558 
559         if (_present)
560         {
561             // If configured to change functional state on the fan itself,
562             // Set it back to true now if necessary.
563             if (_numSensorFailsForNonFunc)
564             {
565                 if (!_functional &&
566                     (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
567                 {
568                     updateInventory(true);
569                 }
570             }
571         }
572         else
573         {
574             getLogger().log(
575                 std::format("At power on, fan {} is missing", _name));
576 
577             if (_fanMissingErrorTimer)
578             {
579                 _fanMissingErrorTimer->restartOnce(
580                     std::chrono::seconds{*_fanMissingErrorDelay});
581             }
582         }
583     }
584     else
585     {
586         _monitorReady = false;
587 
588         if (_monitorTimer.isEnabled())
589         {
590             _monitorTimer.setEnabled(false);
591         }
592 
593         if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
594         {
595             _fanMissingErrorTimer->setEnabled(false);
596         }
597 
598         std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
599             if (sensor->timerRunning())
600             {
601                 sensor->stopTimer();
602             }
603 
604             sensor->stopCountTimer();
605         });
606     }
607 #endif
608 }
609 
610 } // namespace monitor
611 } // namespace fan
612 } // namespace phosphor
613