xref: /openbmc/phosphor-fan-presence/monitor/fan.cpp (revision 9d533806250cea56406bdd39e025f0d820c4ed90)
1 /**
2  * Copyright © 2022 IBM Corporation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "fan.hpp"
17 
18 #include "logging.hpp"
19 #include "sdbusplus.hpp"
20 #include "system.hpp"
21 #include "types.hpp"
22 #include "utility.hpp"
23 
24 #include <phosphor-logging/lg2.hpp>
25 
26 #include <format>
27 
28 namespace phosphor
29 {
30 namespace fan
31 {
32 namespace monitor
33 {
34 
35 using namespace sdbusplus::bus::match;
36 
Fan(Mode mode,sdbusplus::bus_t & bus,const sdeventplus::Event & event,std::unique_ptr<trust::Manager> & trust,const FanDefinition & def,System & system)37 Fan::Fan(Mode mode, sdbusplus::bus_t& bus, const sdeventplus::Event& event,
38          std::unique_ptr<trust::Manager>& trust, const FanDefinition& def,
39          System& system) :
40     _bus(bus), _name(def.name), _deviation(def.deviation),
41     _upperDeviation(def.upperDeviation),
42     _numSensorFailsForNonFunc(def.numSensorFailsForNonfunc),
43     _trustManager(trust),
44 #ifdef MONITOR_USE_JSON
45     _monitorDelay(def.monitorStartDelay),
46     _monitorTimer(event, std::bind(std::mem_fn(&Fan::startMonitor), this)),
47 #endif
48     _system(system),
49     _presenceMatch(bus,
50                    rules::propertiesChanged(util::INVENTORY_PATH + _name,
51                                             util::INV_ITEM_IFACE),
52                    std::bind(std::mem_fn(&Fan::presenceChanged), this,
53                              std::placeholders::_1)),
54     _presenceIfaceAddedMatch(
55         bus,
56         rules::interfacesAdded() +
57             rules::argNpath(0, util::INVENTORY_PATH + _name),
58         std::bind(std::mem_fn(&Fan::presenceIfaceAdded), this,
59                   std::placeholders::_1)),
60     _fanMissingErrorDelay(def.fanMissingErrDelay),
61     _setFuncOnPresent(def.funcOnPresent)
62 {
63     // Setup tach sensors for monitoring
64     for (const auto& s : def.sensorList)
65     {
66         _sensors.emplace_back(std::make_shared<TachSensor>(
67             mode, bus, *this, s.name, s.hasTarget, def.funcDelay,
68             s.targetInterface, s.targetPath, s.factor, s.offset, def.method,
69             s.threshold, s.ignoreAboveMax, def.timeout,
70             def.nonfuncRotorErrDelay, def.countInterval, event));
71 
72         _trustManager->registerSensor(_sensors.back());
73     }
74 
75     bool functionalState =
76         (_numSensorFailsForNonFunc == 0) ||
77         (countNonFunctionalSensors() < _numSensorFailsForNonFunc);
78 
79     if (updateInventory(functionalState) && !functionalState)
80     {
81         // the inventory update threw an exception, possibly because D-Bus
82         // wasn't ready. Try to update sensors back to functional to avoid a
83         // false-alarm. They will be updated again from subscribing to the
84         // properties-changed event
85 
86         for (auto& sensor : _sensors)
87             sensor->setFunctional(true);
88     }
89 
90 #ifndef MONITOR_USE_JSON
91     // Check current tach state when entering monitor mode
92     if (mode != Mode::init)
93     {
94         _monitorReady = true;
95 
96         // The TachSensors will now have already read the input
97         // and target values, so check them.
98         tachChanged();
99     }
100 #else
101     if (_system.isPowerOn())
102     {
103         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
104     }
105 #endif
106 
107     if (_fanMissingErrorDelay)
108     {
109         _fanMissingErrorTimer = std::make_unique<
110             sdeventplus::utility::Timer<sdeventplus::ClockId::Monotonic>>(
111             event, std::bind(&System::fanMissingErrorTimerExpired, &system,
112                              std::ref(*this)));
113     }
114 
115     try
116     {
117         _present = util::SDBusPlus::getProperty<bool>(
118             util::INVENTORY_PATH + _name, util::INV_ITEM_IFACE, "Present");
119 
120         if (!_present)
121         {
122             getLogger().log(
123                 std::format("On startup, fan {} is missing", _name));
124             if (_system.isPowerOn() && _fanMissingErrorTimer)
125             {
126                 _fanMissingErrorTimer->restartOnce(
127                     std::chrono::seconds{*_fanMissingErrorDelay});
128             }
129         }
130     }
131     catch (const util::DBusServiceError& e)
132     {
133         // This could happen on the first BMC boot if the presence
134         // detect app hasn't started yet and there isn't an inventory
135         // cache yet.
136     }
137 }
138 
presenceIfaceAdded(sdbusplus::message_t & msg)139 void Fan::presenceIfaceAdded(sdbusplus::message_t& msg)
140 {
141     sdbusplus::message::object_path path;
142     std::map<std::string, std::map<std::string, std::variant<bool>>> interfaces;
143 
144     msg.read(path, interfaces);
145 
146     auto properties = interfaces.find(util::INV_ITEM_IFACE);
147     if (properties == interfaces.end())
148     {
149         return;
150     }
151 
152     auto property = properties->second.find("Present");
153     if (property == properties->second.end())
154     {
155         return;
156     }
157 
158     _present = std::get<bool>(property->second);
159 
160     if (!_present)
161     {
162         getLogger().log(std::format(
163             "New fan {} interface added and fan is not present", _name));
164         if (_system.isPowerOn() && _fanMissingErrorTimer)
165         {
166             _fanMissingErrorTimer->restartOnce(
167                 std::chrono::seconds{*_fanMissingErrorDelay});
168         }
169     }
170 
171     _system.fanStatusChange(*this);
172 }
173 
startMonitor()174 void Fan::startMonitor()
175 {
176     _monitorReady = true;
177 
178     std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
179         try
180         {
181             // Force a getProperty call to check if the tach sensor is
182             // on D-Bus.  If it isn't, now set it to nonfunctional.
183             // This isn't done earlier so that code watching for
184             // nonfunctional tach sensors doesn't take actions before
185             // those sensors show up on D-Bus.
186             sensor->updateTachAndTarget();
187             tachChanged(*sensor);
188         }
189         catch (const util::DBusServiceError& e)
190         {
191             // The tach property still isn't on D-Bus. Ensure
192             // sensor is nonfunctional, but skip creating an
193             // error for it since it isn't a fan problem.
194             getLogger().log(std::format(
195                 "Monitoring starting but {} sensor value not on D-Bus",
196                 sensor->name()));
197 
198             sensor->setFunctional(false, true);
199 
200             if (_numSensorFailsForNonFunc)
201             {
202                 if (_functional &&
203                     (countNonFunctionalSensors() >= _numSensorFailsForNonFunc))
204                 {
205                     updateInventory(false);
206                 }
207             }
208 
209             // At this point, don't start any power off actions due
210             // to missing sensors.  Let something else handle that
211             // policy.
212             _system.fanStatusChange(*this, true);
213         }
214     });
215 }
216 
tachChanged()217 void Fan::tachChanged()
218 {
219     if (_monitorReady)
220     {
221         for (auto& s : _sensors)
222         {
223             tachChanged(*s);
224         }
225     }
226 }
227 
tachChanged(TachSensor & sensor)228 void Fan::tachChanged(TachSensor& sensor)
229 {
230     if (!_system.isPowerOn() || !_monitorReady)
231     {
232         return;
233     }
234 
235     if (_trustManager->active())
236     {
237         if (!_trustManager->checkTrust(sensor))
238         {
239             return;
240         }
241     }
242 
243     // If the error checking method is 'count', if a tach change leads
244     // to an out of range sensor the count timer will take over in calling
245     // process() until the sensor is healthy again.
246     if (!sensor.countTimerRunning())
247     {
248         process(sensor);
249     }
250 }
251 
countTimerExpired(TachSensor & sensor)252 void Fan::countTimerExpired(TachSensor& sensor)
253 {
254     if (_trustManager->active() && !_trustManager->checkTrust(sensor))
255     {
256         return;
257     }
258     process(sensor);
259 }
260 
process(TachSensor & sensor)261 void Fan::process(TachSensor& sensor)
262 {
263     // If this sensor is out of range at this moment, start
264     // its timer, at the end of which the inventory
265     // for the fan may get updated to not functional.
266 
267     // If this sensor is OK, put everything back into a good state.
268 
269     if (outOfRange(sensor))
270     {
271         if (sensor.functional())
272         {
273             switch (sensor.getMethod())
274             {
275                 case MethodMode::timebased:
276                     // Start nonfunctional timer if not already running
277                     sensor.startTimer(TimerMode::nonfunc);
278                     break;
279                 case MethodMode::count:
280 
281                     if (!sensor.countTimerRunning())
282                     {
283                         sensor.startCountTimer();
284                     }
285                     sensor.setCounter(true);
286                     if (sensor.getCounter() >= sensor.getThreshold())
287                     {
288                         updateState(sensor);
289                     }
290                     break;
291             }
292         }
293     }
294     else
295     {
296         switch (sensor.getMethod())
297         {
298             case MethodMode::timebased:
299                 if (sensor.functional())
300                 {
301                     if (sensor.timerRunning())
302                     {
303                         sensor.stopTimer();
304                     }
305                 }
306                 else
307                 {
308                     // Start functional timer if not already running
309                     sensor.startTimer(TimerMode::func);
310                 }
311                 break;
312             case MethodMode::count:
313                 sensor.setCounter(false);
314                 if (sensor.getCounter() == 0)
315                 {
316                     if (!sensor.functional())
317                     {
318                         updateState(sensor);
319                     }
320 
321                     sensor.stopCountTimer();
322                 }
323                 break;
324         }
325     }
326 }
327 
findTargetSpeed()328 uint64_t Fan::findTargetSpeed()
329 {
330     uint64_t target = 0;
331     // The sensor doesn't support a target,
332     // so get it from another sensor.
333     auto s = std::find_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
334         return s->hasTarget();
335     });
336 
337     if (s != _sensors.end())
338     {
339         target = (*s)->getTarget();
340     }
341 
342     return target;
343 }
344 
countNonFunctionalSensors() const345 size_t Fan::countNonFunctionalSensors() const
346 {
347     return std::count_if(_sensors.begin(), _sensors.end(), [](const auto& s) {
348         return !s->functional();
349     });
350 }
351 
outOfRange(const TachSensor & sensor)352 bool Fan::outOfRange(const TachSensor& sensor)
353 {
354     if (!sensor.hasOwner())
355     {
356         return true;
357     }
358 
359     auto actual = static_cast<uint64_t>(sensor.getInput());
360     auto range = sensor.getRange(_deviation, _upperDeviation);
361 
362     return ((actual < range.first) ||
363             (range.second && actual > range.second.value()));
364 }
365 
updateState(TachSensor & sensor)366 void Fan::updateState(TachSensor& sensor)
367 {
368     if (!_system.isPowerOn())
369     {
370         return;
371     }
372 
373     auto range = sensor.getRange(_deviation, _upperDeviation);
374     std::string rangeMax = "NoMax";
375     if (range.second)
376     {
377         rangeMax = std::to_string(range.second.value());
378     }
379 
380     // Skip starting the error timer if the sensor
381     // isn't on D-Bus as this isn't a fan hardware problem.
382     sensor.setFunctional(!sensor.functional(), !sensor.hasOwner());
383 
384     getLogger().log(std::format(
385         "Setting tach sensor {} functional state to {}. "
386         "[target = {}, actual = {}, allowed range = ({} - {}) "
387         "owned = {}]",
388         sensor.name(), sensor.functional(), sensor.getTarget(),
389         sensor.getInput(), range.first, rangeMax, sensor.hasOwner()));
390 
391     // A zero value for _numSensorFailsForNonFunc means we aren't dealing
392     // with fan FRU functional status, only sensor functional status.
393     if (_numSensorFailsForNonFunc)
394     {
395         auto numNonFuncSensors = countNonFunctionalSensors();
396         // If the fan was nonfunctional and enough sensors are now OK,
397         // the fan can be set to functional as long as `set_func_on_present` was
398         // not set
399         if (!_setFuncOnPresent && !_functional &&
400             !(numNonFuncSensors >= _numSensorFailsForNonFunc))
401         {
402             getLogger().log(std::format("Setting fan {} to functional, number "
403                                         "of nonfunctional sensors = {}",
404                                         _name, numNonFuncSensors));
405             updateInventory(true);
406         }
407 
408         // If the fan is currently functional, but too many
409         // contained sensors are now nonfunctional, update
410         // the fan to nonfunctional.
411         if (_functional && (numNonFuncSensors >= _numSensorFailsForNonFunc))
412         {
413             getLogger().log(std::format("Setting fan {} to nonfunctional, "
414                                         "number of nonfunctional sensors = {}",
415                                         _name, numNonFuncSensors));
416             updateInventory(false);
417         }
418     }
419 
420     // Skip the power off rule checks if the sensor isn't
421     // on D-Bus so a running system isn't shutdown.
422     _system.fanStatusChange(*this, !sensor.hasOwner());
423 }
424 
updateInventory(bool functional)425 bool Fan::updateInventory(bool functional)
426 {
427     bool dbusError = false;
428 
429     try
430     {
431         auto objectMap =
432             util::getObjMap<bool>(_name, util::OPERATIONAL_STATUS_INTF,
433                                   util::FUNCTIONAL_PROPERTY, functional);
434 
435         auto response = util::SDBusPlus::callMethod(
436             _bus, util::INVENTORY_SVC, util::INVENTORY_PATH,
437             util::INVENTORY_INTF, "Notify", objectMap);
438 
439         if (response.is_method_error())
440         {
441             lg2::error("Error in Notify call to update inventory");
442 
443             dbusError = true;
444         }
445     }
446     catch (const util::DBusError& e)
447     {
448         dbusError = true;
449 
450         getLogger().log(
451             std::format("D-Bus Exception reading/updating inventory : {}",
452                         e.what()),
453             Logger::error);
454     }
455 
456     // This will always track the current state of the inventory.
457     _functional = functional;
458 
459     return dbusError;
460 }
461 
presenceChanged(sdbusplus::message_t & msg)462 void Fan::presenceChanged(sdbusplus::message_t& msg)
463 {
464     std::string interface;
465     std::map<std::string, std::variant<bool>> properties;
466 
467     msg.read(interface, properties);
468 
469     auto presentProp = properties.find("Present");
470     if (presentProp != properties.end())
471     {
472         _present = std::get<bool>(presentProp->second);
473 
474         getLogger().log(
475             std::format("Fan {} presence state change to {}", _name, _present));
476 
477         if (_present && _setFuncOnPresent)
478         {
479             updateInventory(true);
480             std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
481                 sensor->setFunctional(true);
482                 sensor->resetMethod();
483             });
484         }
485 
486         _system.fanStatusChange(*this);
487 
488         if (_fanMissingErrorDelay)
489         {
490             if (!_present && _system.isPowerOn())
491             {
492                 _fanMissingErrorTimer->restartOnce(
493                     std::chrono::seconds{*_fanMissingErrorDelay});
494             }
495             else if (_present && _fanMissingErrorTimer->isEnabled())
496             {
497                 _fanMissingErrorTimer->setEnabled(false);
498             }
499         }
500     }
501 }
502 
sensorErrorTimerExpired(const TachSensor & sensor)503 void Fan::sensorErrorTimerExpired(const TachSensor& sensor)
504 {
505     if (_present && _system.isPowerOn())
506     {
507         _system.sensorErrorTimerExpired(*this, sensor);
508     }
509 }
510 
powerStateChanged(bool powerStateOn)511 void Fan::powerStateChanged([[maybe_unused]] bool powerStateOn)
512 {
513 #ifdef MONITOR_USE_JSON
514     if (powerStateOn)
515     {
516         _monitorTimer.restartOnce(std::chrono::seconds(_monitorDelay));
517 
518         _numSensorsOnDBusAtPowerOn = 0;
519 
520         std::for_each(_sensors.begin(), _sensors.end(), [this](auto& sensor) {
521             try
522             {
523                 // Force a getProperty call.  If sensor is on D-Bus,
524                 // then make sure it's functional.
525                 sensor->updateTachAndTarget();
526 
527                 _numSensorsOnDBusAtPowerOn++;
528 
529                 if (_present)
530                 {
531                     // If not functional, set it back to functional.
532                     if (!sensor->functional())
533                     {
534                         sensor->setFunctional(true);
535                         _system.fanStatusChange(*this, true);
536                     }
537 
538                     // Set the counters back to zero
539                     if (sensor->getMethod() == MethodMode::count)
540                     {
541                         sensor->resetMethod();
542                     }
543                 }
544             }
545             catch (const util::DBusError& e)
546             {
547                 // Properties still aren't on D-Bus.  Let startMonitor()
548                 // deal with it, or maybe System::powerStateChanged() if
549                 // there aren't any sensors at all on D-Bus.
550                 getLogger().log(std::format(
551                     "At power on, tach sensor {} value not on D-Bus",
552                     sensor->name()));
553             }
554         });
555 
556         if (_present)
557         {
558             // If configured to change functional state on the fan itself,
559             // Set it back to true now if necessary.
560             if (_numSensorFailsForNonFunc)
561             {
562                 if (!_functional &&
563                     (countNonFunctionalSensors() < _numSensorFailsForNonFunc))
564                 {
565                     updateInventory(true);
566                 }
567             }
568         }
569         else
570         {
571             getLogger().log(
572                 std::format("At power on, fan {} is missing", _name));
573 
574             if (_fanMissingErrorTimer)
575             {
576                 _fanMissingErrorTimer->restartOnce(
577                     std::chrono::seconds{*_fanMissingErrorDelay});
578             }
579         }
580     }
581     else
582     {
583         _monitorReady = false;
584 
585         if (_monitorTimer.isEnabled())
586         {
587             _monitorTimer.setEnabled(false);
588         }
589 
590         if (_fanMissingErrorTimer && _fanMissingErrorTimer->isEnabled())
591         {
592             _fanMissingErrorTimer->setEnabled(false);
593         }
594 
595         std::for_each(_sensors.begin(), _sensors.end(), [](auto& sensor) {
596             if (sensor->timerRunning())
597             {
598                 sensor->stopTimer();
599             }
600 
601             sensor->stopCountTimer();
602         });
603     }
604 #endif
605 }
606 
607 } // namespace monitor
608 } // namespace fan
609 } // namespace phosphor
610