xref: /openbmc/phosphor-fan-presence/monitor/fan.cpp (revision dfddd648cb81b27492afead4e2346f5fcd1397cb)
1 /**
2  * Copyright © 2022 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "fan.hpp"
17 
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23 
24 #include <phosphor-logging/log.hpp>
25 
26 #include <format>
27 
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34 
35 using namespace phosphor::logging;
36 using namespace sdbusplus::bus::match;
37 
Fan(Mode mode,sdbusplus::bus_t & bus,const sdeventplus::Event & event,std::unique_ptr<trust::Manager> & trust,const FanDefinition & def,System & system)38 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
39          std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
40          System& system) :
41     _bus(bus), _name(def.name), _deviation(def.deviation),
42     _upperDeviation(def.upperDeviation),
43     _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc),
44     _trustManager(trust),
45 #ifdef MONITOR_USE_JSON
46     _monitorDelay(def.monitorStartDelay),
47     _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
48 #endif
49     _system(system),
50     _presenceMatch(bus,
51                    rules::propertiesChanged(util::INVENTORY_PATH + _name,
52                                             util::INV_ITEM_IFACE),
53                    std::bind(std::mem_fn(&Fan::presenceChanged), this,
54                              std::placeholders::_1)),
55     _presenceIfaceAddedMatch(
56         bus,
57         rules::interfacesAdded() +
58             rules::argNpath(0, util::INVENTORY_PATH + _name),
59         std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
60                   std::placeholders::_1)),
61     _fanMissingErrorDelay(def.fanMissingErrDelay),
62     _setFuncOnPresent(def.funcOnPresent)
63 {
64     // Setup tach sensors for monitoring
65     for (const auto& s : def.sensorList)
66     {
67         _sensors.emplace_back(std::make_shared<TachSensor>(
68             mode, bus, *this, s.name, s.hasTarget, def.funcDelay,
69             s.targetInterface, s.targetPath, s.factor, s.offset, def.method,
70             s.threshold, s.ignoreAboveMax, def.timeout,
71             def.nonfuncRotorErrDelay, def.countInterval, event));
72 
73         _trustManager->registerSensor(_sensors.back());
74     }
75 
76     bool functionalState =
77         (_numSensorFailsForNonFunc == 0) ||
78         (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
79 
80     if (updateInventory(functionalState) && !functionalState)
81     {
82         // the inventory update threw an exception, possibly because D-Bus
83         // wasn't ready. Try to update sensors back to functional to avoid a
84         // false-alarm. They will be updated again from subscribing to the
85         // properties-changed event
86 
87         for (auto& sensor : _sensors)
88             sensor->setFunctional(true);
89     }
90 
91 #ifndef MONITOR_USE_JSON
92     // Check current tach state when entering monitor mode
93     if (mode != Mode::init)
94     {
95         _monitorReady = true;
96 
97         // The TachSensors will now have already read the input
98         // and target values, so check them.
99         tachChanged();
100     }
101 #else
102     if (_system.isPowerOn())
103     {
104         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
105     }
106 #endif
107 
108     if (_fanMissingErrorDelay)
109     {
110         _fanMissingErrorTimer = std::make_unique<
111             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
112             event, std::bind(&System::fanMissingErrorTimerExpired, &system,
113                              std::ref(*this)));
114     }
115 
116     try
117     {
118         _present = util::SDBusPlus::getProperty<bool>(
119             util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
120 
121         if (!_present)
122         {
123             getLogger().log(
124                 std::format("On startup, fan {} is missing", _name));
125             if (_system.isPowerOn() && _fanMissingErrorTimer)
126             {
127                 _fanMissingErrorTimer->restartOnce(
128                     std::chrono::seconds{*_fanMissingErrorDelay});
129             }
130         }
131     }
132     catch (const util::DBusServiceError& e)
133     {
134         // This could happen on the first BMC boot if the presence
135         // detect app hasn't started yet and there isn't an inventory
136         // cache yet.
137     }
138 }
139 
presenceIfaceAdded(sdbusplus::message_t & msg)140 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
141 {
142     sdbusplus::message::object_path path;
143     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
144 
145     msg.read(path, interfaces);
146 
147     auto properties = interfaces.find(util::INV_ITEM_IFACE);
148     if (properties == interfaces.end())
149     {
150         return;
151     }
152 
153     auto property = properties->second.find("Present");
154     if (property == properties->second.end())
155     {
156         return;
157     }
158 
159     _present = std::get<bool>(property->second);
160 
161     if (!_present)
162     {
163         getLogger().log(std::format(
164             "New fan {} interface added and fan is not present", _name));
165         if (_system.isPowerOn() && _fanMissingErrorTimer)
166         {
167             _fanMissingErrorTimer->restartOnce(
168                 std::chrono::seconds{*_fanMissingErrorDelay});
169         }
170     }
171 
172     _system.fanStatusChange(*this);
173 }
174 
startMonitor()175 void Fan::startMonitor()
176 {
177     _monitorReady = true;
178 
179     std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
180         try
181         {
182             // Force a getProperty call to check if the tach sensor is
183             // on D-Bus.  If it isn't, now set it to nonfunctional.
184             // This isn't done earlier so that code watching for
185             // nonfunctional tach sensors doesn't take actions before
186             // those sensors show up on D-Bus.
187             sensor->updateTachAndTarget();
188             tachChanged(*sensor);
189         }
190         catch (const util::DBusServiceError& e)
191         {
192             // The tach property still isn't on D-Bus. Ensure
193             // sensor is nonfunctional, but skip creating an
194             // error for it since it isn't a fan problem.
195             getLogger().log(std::format(
196                 "Monitoring starting but {} sensor value not on D-Bus",
197                 sensor->name()));
198 
199             sensor->setFunctional(false, true);
200 
201             if (_numSensorFailsForNonFunc)
202             {
203                 if (_functional &&
204                     (countNonFunctionalSensors() >= _numSensorFailsForNonFunc))
205                 {
206                     updateInventory(false);
207                 }
208             }
209 
210             // At this point, don't start any power off actions due
211             // to missing sensors.  Let something else handle that
212             // policy.
213             _system.fanStatusChange(*this, true);
214         }
215     });
216 }
217 
tachChanged()218 void Fan::tachChanged()
219 {
220     if (_monitorReady)
221     {
222         for (auto& s : _sensors)
223         {
224             tachChanged(*s);
225         }
226     }
227 }
228 
tachChanged(TachSensor & sensor)229 void Fan::tachChanged(TachSensor& sensor)
230 {
231     if (!_system.isPowerOn() || !_monitorReady)
232     {
233         return;
234     }
235 
236     if (_trustManager->active())
237     {
238         if (!_trustManager->checkTrust(sensor))
239         {
240             return;
241         }
242     }
243 
244     // If the error checking method is 'count', if a tach change leads
245     // to an out of range sensor the count timer will take over in calling
246     // process() until the sensor is healthy again.
247     if (!sensor.countTimerRunning())
248     {
249         process(sensor);
250     }
251 }
252 
countTimerExpired(TachSensor & sensor)253 void Fan::countTimerExpired(TachSensor& sensor)
254 {
255     if (_trustManager->active() && !_trustManager->checkTrust(sensor))
256     {
257         return;
258     }
259     process(sensor);
260 }
261 
process(TachSensor & sensor)262 void Fan::process(TachSensor& sensor)
263 {
264     // If this sensor is out of range at this moment, start
265     // its timer, at the end of which the inventory
266     // for the fan may get updated to not functional.
267 
268     // If this sensor is OK, put everything back into a good state.
269 
270     if (outOfRange(sensor))
271     {
272         if (sensor.functional())
273         {
274             switch (sensor.getMethod())
275             {
276                 case MethodMode::timebased:
277                     // Start nonfunctional timer if not already running
278                     sensor.startTimer(TimerMode::nonfunc);
279                     break;
280                 case MethodMode::count:
281 
282                     if (!sensor.countTimerRunning())
283                     {
284                         sensor.startCountTimer();
285                     }
286                     sensor.setCounter(true);
287                     if (sensor.getCounter() >= sensor.getThreshold())
288                     {
289                         updateState(sensor);
290                     }
291                     break;
292             }
293         }
294     }
295     else
296     {
297         switch (sensor.getMethod())
298         {
299             case MethodMode::timebased:
300                 if (sensor.functional())
301                 {
302                     if (sensor.timerRunning())
303                     {
304                         sensor.stopTimer();
305                     }
306                 }
307                 else
308                 {
309                     // Start functional timer if not already running
310                     sensor.startTimer(TimerMode::func);
311                 }
312                 break;
313             case MethodMode::count:
314                 sensor.setCounter(false);
315                 if (sensor.getCounter() == 0)
316                 {
317                     if (!sensor.functional())
318                     {
319                         updateState(sensor);
320                     }
321 
322                     sensor.stopCountTimer();
323                 }
324                 break;
325         }
326     }
327 }
328 
findTargetSpeed()329 uint64_t Fan::findTargetSpeed()
330 {
331     uint64_t target = 0;
332     // The sensor doesn't support a target,
333     // so get it from another sensor.
334     auto s = std::find_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
335         return s->hasTarget();
336     });
337 
338     if (s != _sensors.end())
339     {
340         target = (*s)->getTarget();
341     }
342 
343     return target;
344 }
345 
countNonFunctionalSensors() const346 size_t Fan::countNonFunctionalSensors() const
347 {
348     return std::count_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
349         return !s->functional();
350     });
351 }
352 
outOfRange(const TachSensor & sensor)353 bool Fan::outOfRange(const TachSensor& sensor)
354 {
355     if (!sensor.hasOwner())
356     {
357         return true;
358     }
359 
360     auto actual = static_cast<uint64_t>(sensor.getInput());
361     auto range = sensor.getRange(_deviation, _upperDeviation);
362 
363     return ((actual < range.first) ||
364             (range.second && actual > range.second.value()));
365 }
366 
updateState(TachSensor & sensor)367 void Fan::updateState(TachSensor& sensor)
368 {
369     if (!_system.isPowerOn())
370     {
371         return;
372     }
373 
374     auto range = sensor.getRange(_deviation, _upperDeviation);
375     std::string rangeMax = "NoMax";
376     if (range.second)
377     {
378         rangeMax = std::to_string(range.second.value());
379     }
380 
381     // Skip starting the error timer if the sensor
382     // isn't on D-Bus as this isn't a fan hardware problem.
383     sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
384 
385     getLogger().log(std::format(
386         "Setting tach sensor {} functional state to {}. "
387         "[target = {}, actual = {}, allowed range = ({} - {}) "
388         "owned = {}]",
389         sensor.name(), sensor.functional(), sensor.getTarget(),
390         sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
391 
392     // A zero value for _numSensorFailsForNonFunc means we aren't dealing
393     // with fan FRU functional status, only sensor functional status.
394     if (_numSensorFailsForNonFunc)
395     {
396         auto numNonFuncSensors = countNonFunctionalSensors();
397         // If the fan was nonfunctional and enough sensors are now OK,
398         // the fan can be set to functional as long as `set_func_on_present` was
399         // not set
400         if (!_setFuncOnPresent && !_functional &&
401             !(numNonFuncSensors >= _numSensorFailsForNonFunc))
402         {
403             getLogger().log(std::format("Setting fan {} to functional, number "
404                                         "of nonfunctional sensors = {}",
405                                         _name, numNonFuncSensors));
406             updateInventory(true);
407         }
408 
409         // If the fan is currently functional, but too many
410         // contained sensors are now nonfunctional, update
411         // the fan to nonfunctional.
412         if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
413         {
414             getLogger().log(std::format("Setting fan {} to nonfunctional, "
415                                         "number of nonfunctional sensors = {}",
416                                         _name, numNonFuncSensors));
417             updateInventory(false);
418         }
419     }
420 
421     // Skip the power off rule checks if the sensor isn't
422     // on D-Bus so a running system isn't shutdown.
423     _system.fanStatusChange(*this, !sensor.hasOwner());
424 }
425 
updateInventory(bool functional)426 bool Fan::updateInventory(bool functional)
427 {
428     bool dbusError = false;
429 
430     try
431     {
432         auto objectMap =
433             util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
434                                   util::FUNCTIONAL_PROPERTY, functional);
435 
436         auto response = util::SDBusPlus::callMethod(
437             _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
438             util::INVENTORY_INTF, "Notify", objectMap);
439 
440         if (response.is_method_error())
441         {
442             log<level::ERR>("Error in Notify call to update inventory");
443 
444             dbusError = true;
445         }
446     }
447     catch (const util::DBusError& e)
448     {
449         dbusError = true;
450 
451         getLogger().log(
452             std::format("D-Bus Exception reading/updating inventory : {}",
453                         e.what()),
454             Logger::error);
455     }
456 
457     // This will always track the current state of the inventory.
458     _functional = functional;
459 
460     return dbusError;
461 }
462 
presenceChanged(sdbusplus::message_t & msg)463 void Fan::presenceChanged(sdbusplus::message_t& msg)
464 {
465     std::string interface;
466     std::map<std::string, std::variant<bool>> properties;
467 
468     msg.read(interface, properties);
469 
470     auto presentProp = properties.find("Present");
471     if (presentProp != properties.end())
472     {
473         _present = std::get<bool>(presentProp->second);
474 
475         getLogger().log(
476             std::format("Fan {} presence state change to {}", _name, _present));
477 
478         if (_present && _setFuncOnPresent)
479         {
480             updateInventory(true);
481             std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
482                 sensor->setFunctional(true);
483                 sensor->resetMethod();
484             });
485         }
486 
487         _system.fanStatusChange(*this);
488 
489         if (_fanMissingErrorDelay)
490         {
491             if (!_present && _system.isPowerOn())
492             {
493                 _fanMissingErrorTimer->restartOnce(
494                     std::chrono::seconds{*_fanMissingErrorDelay});
495             }
496             else if (_present && _fanMissingErrorTimer->isEnabled())
497             {
498                 _fanMissingErrorTimer->setEnabled(false);
499             }
500         }
501     }
502 }
503 
sensorErrorTimerExpired(const TachSensor & sensor)504 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
505 {
506     if (_present && _system.isPowerOn())
507     {
508         _system.sensorErrorTimerExpired(*this, sensor);
509     }
510 }
511 
powerStateChanged(bool powerStateOn)512 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
513 {
514 #ifdef MONITOR_USE_JSON
515     if (powerStateOn)
516     {
517         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
518 
519         _numSensorsOnDBusAtPowerOn = 0;
520 
521         std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
522             try
523             {
524                 // Force a getProperty call.  If sensor is on D-Bus,
525                 // then make sure it's functional.
526                 sensor->updateTachAndTarget();
527 
528                 _numSensorsOnDBusAtPowerOn++;
529 
530                 if (_present)
531                 {
532                     // If not functional, set it back to functional.
533                     if (!sensor->functional())
534                     {
535                         sensor->setFunctional(true);
536                         _system.fanStatusChange(*this, true);
537                     }
538 
539                     // Set the counters back to zero
540                     if (sensor->getMethod() == MethodMode::count)
541                     {
542                         sensor->resetMethod();
543                     }
544                 }
545             }
546             catch (const util::DBusError& e)
547             {
548                 // Properties still aren't on D-Bus.  Let startMonitor()
549                 // deal with it, or maybe System::powerStateChanged() if
550                 // there aren't any sensors at all on D-Bus.
551                 getLogger().log(std::format(
552                     "At power on, tach sensor {} value not on D-Bus",
553                     sensor->name()));
554             }
555         });
556 
557         if (_present)
558         {
559             // If configured to change functional state on the fan itself,
560             // Set it back to true now if necessary.
561             if (_numSensorFailsForNonFunc)
562             {
563                 if (!_functional &&
564                     (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
565                 {
566                     updateInventory(true);
567                 }
568             }
569         }
570         else
571         {
572             getLogger().log(
573                 std::format("At power on, fan {} is missing", _name));
574 
575             if (_fanMissingErrorTimer)
576             {
577                 _fanMissingErrorTimer->restartOnce(
578                     std::chrono::seconds{*_fanMissingErrorDelay});
579             }
580         }
581     }
582     else
583     {
584         _monitorReady = false;
585 
586         if (_monitorTimer.isEnabled())
587         {
588             _monitorTimer.setEnabled(false);
589         }
590 
591         if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
592         {
593             _fanMissingErrorTimer->setEnabled(false);
594         }
595 
596         std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
597             if (sensor->timerRunning())
598             {
599                 sensor->stopTimer();
600             }
601 
602             sensor->stopCountTimer();
603         });
604     }
605 #endif
606 }
607 
608 } // namespace monitor
609 } // namespace fan
610 } // namespace phosphor
611